diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 536b7c0..d888524 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -135,80 +135,49 @@ export async function registerSchedules(boss: PgBoss): Promise { } // ══════════════════════════════════════════════════════════════════════ - // PLAYWRIGHT SCRAPERS — priority competitors every 2h, others every 4h + // ══════════════════════════════════════════════════════════════════════ + // ALL PRICING SCRAPERS — 24/7, every 2h, staggered by 10min + // Goal: complete competitor coverage, no gaps, database always fresh // ══════════════════════════════════════════════════════════════════════ - // FS.com: every 2h — primary competitor, highest data value - await boss.schedule("scrape:pricing:fs", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 }); + // Playwright scrapers (resource-heavy) — every 2h, 10min apart + await boss.schedule("scrape:pricing:fs", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 }); + await boss.schedule("scrape:pricing:10gtek", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:atgbics", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:prolabs", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - // 10Gtek: every 2h offset by 20min - await boss.schedule("scrape:pricing:10gtek", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + // Fetch/Cheerio scrapers (lightweight) — every 2h, 5min apart + await boss.schedule("scrape:pricing:fluxlight", "0 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:gbics", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:optcore", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:champion-one", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:sfpcables", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:blueoptics", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:fiber24", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:tscom", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:skylane", "40 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:ascentoptics", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:gaotek", "50 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - // ATGBICS: every 4h (staggered) - await boss.schedule("scrape:pricing:atgbics", "50 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // ProLabs: every 2h offset by 40min - await boss.schedule("scrape:pricing:prolabs", "40 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + // Form-factor coverage scrapers — every 2h + await boss.schedule("scrape:pricing:comms-express", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); + await boss.schedule("scrape:pricing:router-switch", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); + await boss.schedule("scrape:pricing:multimode-inc", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ - // FETCH/CHEERIO SCRAPERS — every 4h (lightweight, Pi-friendly) - // ══════════════════════════════════════════════════════════════════════ - - // Fluxlight: 00:05, 04:05, 08:05, 12:05, 16:05, 20:05 - await boss.schedule("scrape:pricing:fluxlight", "5 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // GBICs: 00:15, 04:15, 08:15, 12:15, 16:15, 20:15 - await boss.schedule("scrape:pricing:gbics", "15 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // Optcore: 00:30, 04:30, 08:30, 12:30, 16:30, 20:30 - await boss.schedule("scrape:pricing:optcore", "30 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // Champion ONE: 00:45, 04:45, 08:45, 12:45, 16:45, 20:45 - await boss.schedule("scrape:pricing:champion-one", "45 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // SFPCables: 01:00, 05:00, 09:00, 13:00, 17:00, 21:00 - await boss.schedule("scrape:pricing:sfpcables", "0 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // BlueOptics: 01:15, 05:15, 09:15, 13:15, 17:15, 21:15 - await boss.schedule("scrape:pricing:blueoptics", "15 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // ShopFiber24: 01:30, 05:30, 09:30, 13:30, 17:30, 21:30 - await boss.schedule("scrape:pricing:fiber24", "30 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // T&S Communication: 01:45, 05:45, 09:45, 13:45, 17:45, 21:45 - await boss.schedule("scrape:pricing:tscom", "45 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // Skylane: 02:00, 06:00, 10:00, 14:00, 18:00, 22:00 - await boss.schedule("scrape:pricing:skylane", "0 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // AscentOptics: 02:15, 06:15, 10:15, 14:15, 18:15, 22:15 - await boss.schedule("scrape:pricing:ascentoptics", "15 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // GAO Tek: 02:30, 06:30, 10:30, 14:30, 18:30, 22:30 - await boss.schedule("scrape:pricing:gaotek", "30 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // ══════════════════════════════════════════════════════════════════════ - // CATALOG SCRAPERS — Flexoptix every 2h (primary price source) + // FLEXOPTIX CATALOG — every 2h (primary price source) // ══════════════════════════════════════════════════════════════════════ await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ - // MANUFACTURER CATALOGS — every 8h (product data, no prices) + // MANUFACTURER CATALOGS — every 4h (product data, no prices) // ══════════════════════════════════════════════════════════════════════ - await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - - // ══════════════════════════════════════════════════════════════════════ - // FORM-FACTOR COVERAGE SCRAPERS — every 8h (CFP, CSFP, SFP-DD, legacy) - // ══════════════════════════════════════════════════════════════════════ - - await boss.schedule("scrape:pricing:comms-express", "40 2,10,18 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); - await boss.schedule("scrape:pricing:router-switch", "0 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); - await boss.schedule("scrape:pricing:multimode-inc", "20 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - await boss.schedule("scrape:pricing:optictransceiver", "45 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - await boss.schedule("scrape:pricing:wiitek", "5 4,12,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:catalog:smartoptics", "10 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:catalog:hubersuhner", "25 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // VENDOR LISTS — every 12h