From 2deb8121ba1942e51196da86d8a0b8f6cac8a73b Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Thu, 9 Apr 2026 20:50:57 +0200 Subject: [PATCH] fix: route Pi-destined scrapers exclusively to Pi worker fleet Remove boss.work() registrations for lightweight fetch/cheerio scrapers from Erik's scheduler. Pis are now the SOLE consumers of these queues: fluxlight, gbics, optcore, champion-one, sfpcables, blueoptics, fiber24, tscom, skylane, ascentoptics, gaotek, smartoptics, hubersuhner, news, market-intel. --- packages/scraper/src/scheduler.ts | 101 ++++-------------------------- 1 file changed, 13 insertions(+), 88 deletions(-) diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index c80e8bf..891cd59 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -266,31 +266,22 @@ export async function registerWorkers(boss: PgBoss): Promise { // Lazy-load all scrapers const { scrapeFs } = await import("./scrapers/fs-com"); const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); - const { scrapeOptcore } = await import("./scrapers/optcore"); + // NOTE: Pi-only scrapers (fluxlight, gbics, optcore, champion-one, sfpcables, + // blueoptics, fiber24, tscom, skylane, ascentoptics, gaotek, smartoptics, + // hubersuhner, news, market-intel) are NOT registered here. + // Pi workers (index-pi.ts) are the SOLE consumers of those queues so that + // all lightweight scraping traffic flows through the Raspberry Pi Starlink nodes. const { scrape10Gtek } = await import("./scrapers/tenGtek"); const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog"); const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors"); - const { scrapeNews } = await import("./scrapers/news"); const { scrapeAtgbics } = await import("./scrapers/atgbics"); const { scrapeProLabs } = await import("./scrapers/prolabs"); - const { scrapeChampionOne } = await import("./scrapers/champion-one"); - const { scrapeFluxlight } = await import("./scrapers/fluxlight"); - const { scrapeGbics } = await import("./scrapers/gbics"); - const { scrapeSfpCables } = await import("./scrapers/sfpcables"); const { scrapeJuniperHct } = await import("./scrapers/juniper-hct"); const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl"); const { scrapeUfiSpace } = await import("./scrapers/ufispace"); const { scrapeEdgecore } = await import("./scrapers/edgecore"); const { scrapeSwitchAssets } = await import("./scrapers/switch-assets"); - const { scrapeBlueOptics } = await import("./scrapers/blueoptics"); - const { scrapeFiber24 } = await import("./scrapers/fiber24"); - const { scrapeTsCom } = await import("./scrapers/tscom"); - const { scrapeSmartOptics } = await import("./scrapers/smartoptics"); - const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner"); - const { scrapeSkylane } = await import("./scrapers/skylane"); - const { scrapeAscentOptics } = await import("./scrapers/ascentoptics"); - const { scrapeGaoTek } = await import("./scrapers/gaotek"); // ── Prediction signal scrapers ──────────────────────────────────────── const { scrapeSecEdgar } = await import("./scrapers/sec-edgar"); const { scrapeGithubSignals } = await import("./scrapers/github-signals"); @@ -322,62 +313,10 @@ export async function registerWorkers(boss: PgBoss): Promise { await withIsolatedStorage("prolabs", scrapeProLabs); }); - // ── Fetch/Cheerio scrapers ──────────────────────────────────────────── - - await boss.work("scrape:pricing:fluxlight", async () => { - console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`); - await withIsolatedStorage("fluxlight", scrapeFluxlight); - }); - - await boss.work("scrape:pricing:gbics", async () => { - console.log(`[${new Date().toISOString()}] Running: GBICs pricing`); - await withIsolatedStorage("gbics", scrapeGbics); - }); - - await boss.work("scrape:pricing:optcore", async () => { - console.log(`[${new Date().toISOString()}] Running: Optcore pricing`); - await withIsolatedStorage("optcore", scrapeOptcore); - }); - - await boss.work("scrape:pricing:champion-one", async () => { - console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`); - await withIsolatedStorage("champion-one", scrapeChampionOne); - }); - - await boss.work("scrape:pricing:sfpcables", async () => { - console.log(`[${new Date().toISOString()}] Running: SFPCables pricing`); - await withIsolatedStorage("sfpcables", scrapeSfpCables); - }); - - await boss.work("scrape:pricing:blueoptics", async () => { - console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`); - await withIsolatedStorage("blueoptics", scrapeBlueOptics); - }); - - await boss.work("scrape:pricing:fiber24", async () => { - console.log(`[${new Date().toISOString()}] Running: ShopFiber24 pricing`); - await withIsolatedStorage("fiber24", scrapeFiber24); - }); - - await boss.work("scrape:pricing:tscom", async () => { - console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`); - await withIsolatedStorage("tscom", scrapeTsCom); - }); - - await boss.work("scrape:pricing:skylane", async () => { - console.log(`[${new Date().toISOString()}] Running: Skylane Optics pricing`); - await withIsolatedStorage("skylane", scrapeSkylane); - }); - - await boss.work("scrape:pricing:ascentoptics", async () => { - console.log(`[${new Date().toISOString()}] Running: AscentOptics pricing`); - await withIsolatedStorage("ascentoptics", scrapeAscentOptics); - }); - - await boss.work("scrape:pricing:gaotek", async () => { - console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`); - await withIsolatedStorage("gaotek", scrapeGaoTek); - }); + // ── Pi-only scrapers: NO boss.work() here ──────────────────────────── + // fluxlight, gbics, optcore, champion-one, sfpcables, blueoptics, fiber24, + // tscom, skylane, ascentoptics, gaotek → handled exclusively by Pi fleet. + // Jobs are dispatched by the cron schedule above; Pi workers consume them. // ── Catalog scrapers ────────────────────────────────────────────────── @@ -386,15 +325,8 @@ export async function registerWorkers(boss: PgBoss): Promise { await scrapeFlexoptixCatalog(); }); - await boss.work("scrape:catalog:smartoptics", async () => { - console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`); - await withIsolatedStorage("smartoptics", scrapeSmartOptics); - }); - - await boss.work("scrape:catalog:hubersuhner", async () => { - console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`); - await withIsolatedStorage("hubersuhner", scrapeHuberSuhner); - }); + // scrape:catalog:smartoptics and scrape:catalog:hubersuhner → Pi-only + // scrape:news and scrape:market-intel → Pi-only (see index-pi.ts) // ── Vendor lists ────────────────────────────────────────────────────── @@ -458,11 +390,7 @@ export async function registerWorkers(boss: PgBoss): Promise { // ── Intelligence & community ────────────────────────────────────────── - await boss.work("scrape:market-intel", async () => { - console.log(`[${new Date().toISOString()}] Running: Market intelligence`); - const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence"); - await withIsolatedStorage("market-intel", scrapeMarketIntelligence); - }); + // scrape:market-intel → Pi-only await boss.work("scrape:nog-talks", async () => { console.log(`[${new Date().toISOString()}] Running: NOG conference talks`); @@ -482,10 +410,7 @@ export async function registerWorkers(boss: PgBoss): Promise { await findAndSeedDatasheetLinks(50); }); - await boss.work("scrape:news", async () => { - console.log(`[${new Date().toISOString()}] Running: News aggregation`); - await scrapeNews(); - }); + // scrape:news → Pi-only await boss.work("scrape:faq", async () => { console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);