fix: route Pi-destined scrapers exclusively to Pi worker fleet

Remove boss.work() registrations for lightweight fetch/cheerio scrapers
from Erik's scheduler. Pis are now the SOLE consumers of these queues:
fluxlight, gbics, optcore, champion-one, sfpcables, blueoptics, fiber24,
tscom, skylane, ascentoptics, gaotek, smartoptics, hubersuhner, news,
market-intel.
This commit is contained in:
Rene Fichtmueller 2026-04-09 20:50:57 +02:00
parent 692133f2ea
commit 2deb8121ba

View File

@ -266,31 +266,22 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
// Lazy-load all scrapers // Lazy-load all scrapers
const { scrapeFs } = await import("./scrapers/fs-com"); const { scrapeFs } = await import("./scrapers/fs-com");
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
const { scrapeOptcore } = await import("./scrapers/optcore"); // NOTE: Pi-only scrapers (fluxlight, gbics, optcore, champion-one, sfpcables,
// blueoptics, fiber24, tscom, skylane, ascentoptics, gaotek, smartoptics,
// hubersuhner, news, market-intel) are NOT registered here.
// Pi workers (index-pi.ts) are the SOLE consumers of those queues so that
// all lightweight scraping traffic flows through the Raspberry Pi Starlink nodes.
const { scrape10Gtek } = await import("./scrapers/tenGtek"); const { scrape10Gtek } = await import("./scrapers/tenGtek");
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog"); const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors"); const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors");
const { scrapeNews } = await import("./scrapers/news");
const { scrapeAtgbics } = await import("./scrapers/atgbics"); const { scrapeAtgbics } = await import("./scrapers/atgbics");
const { scrapeProLabs } = await import("./scrapers/prolabs"); const { scrapeProLabs } = await import("./scrapers/prolabs");
const { scrapeChampionOne } = await import("./scrapers/champion-one");
const { scrapeFluxlight } = await import("./scrapers/fluxlight");
const { scrapeGbics } = await import("./scrapers/gbics");
const { scrapeSfpCables } = await import("./scrapers/sfpcables");
const { scrapeJuniperHct } = await import("./scrapers/juniper-hct"); const { scrapeJuniperHct } = await import("./scrapers/juniper-hct");
const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl"); const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl");
const { scrapeUfiSpace } = await import("./scrapers/ufispace"); const { scrapeUfiSpace } = await import("./scrapers/ufispace");
const { scrapeEdgecore } = await import("./scrapers/edgecore"); const { scrapeEdgecore } = await import("./scrapers/edgecore");
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets"); const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
const { scrapeBlueOptics } = await import("./scrapers/blueoptics");
const { scrapeFiber24 } = await import("./scrapers/fiber24");
const { scrapeTsCom } = await import("./scrapers/tscom");
const { scrapeSmartOptics } = await import("./scrapers/smartoptics");
const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner");
const { scrapeSkylane } = await import("./scrapers/skylane");
const { scrapeAscentOptics } = await import("./scrapers/ascentoptics");
const { scrapeGaoTek } = await import("./scrapers/gaotek");
// ── Prediction signal scrapers ──────────────────────────────────────── // ── Prediction signal scrapers ────────────────────────────────────────
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar"); const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
const { scrapeGithubSignals } = await import("./scrapers/github-signals"); const { scrapeGithubSignals } = await import("./scrapers/github-signals");
@ -322,62 +313,10 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await withIsolatedStorage("prolabs", scrapeProLabs); await withIsolatedStorage("prolabs", scrapeProLabs);
}); });
// ── Fetch/Cheerio scrapers ──────────────────────────────────────────── // ── Pi-only scrapers: NO boss.work() here ────────────────────────────
// fluxlight, gbics, optcore, champion-one, sfpcables, blueoptics, fiber24,
await boss.work("scrape:pricing:fluxlight", async () => { // tscom, skylane, ascentoptics, gaotek → handled exclusively by Pi fleet.
console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`); // Jobs are dispatched by the cron schedule above; Pi workers consume them.
await withIsolatedStorage("fluxlight", scrapeFluxlight);
});
await boss.work("scrape:pricing:gbics", async () => {
console.log(`[${new Date().toISOString()}] Running: GBICs pricing`);
await withIsolatedStorage("gbics", scrapeGbics);
});
await boss.work("scrape:pricing:optcore", async () => {
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
await withIsolatedStorage("optcore", scrapeOptcore);
});
await boss.work("scrape:pricing:champion-one", async () => {
console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`);
await withIsolatedStorage("champion-one", scrapeChampionOne);
});
await boss.work("scrape:pricing:sfpcables", async () => {
console.log(`[${new Date().toISOString()}] Running: SFPCables pricing`);
await withIsolatedStorage("sfpcables", scrapeSfpCables);
});
await boss.work("scrape:pricing:blueoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`);
await withIsolatedStorage("blueoptics", scrapeBlueOptics);
});
await boss.work("scrape:pricing:fiber24", async () => {
console.log(`[${new Date().toISOString()}] Running: ShopFiber24 pricing`);
await withIsolatedStorage("fiber24", scrapeFiber24);
});
await boss.work("scrape:pricing:tscom", async () => {
console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`);
await withIsolatedStorage("tscom", scrapeTsCom);
});
await boss.work("scrape:pricing:skylane", async () => {
console.log(`[${new Date().toISOString()}] Running: Skylane Optics pricing`);
await withIsolatedStorage("skylane", scrapeSkylane);
});
await boss.work("scrape:pricing:ascentoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: AscentOptics pricing`);
await withIsolatedStorage("ascentoptics", scrapeAscentOptics);
});
await boss.work("scrape:pricing:gaotek", async () => {
console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`);
await withIsolatedStorage("gaotek", scrapeGaoTek);
});
// ── Catalog scrapers ────────────────────────────────────────────────── // ── Catalog scrapers ──────────────────────────────────────────────────
@ -386,15 +325,8 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await scrapeFlexoptixCatalog(); await scrapeFlexoptixCatalog();
}); });
await boss.work("scrape:catalog:smartoptics", async () => { // scrape:catalog:smartoptics and scrape:catalog:hubersuhner → Pi-only
console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`); // scrape:news and scrape:market-intel → Pi-only (see index-pi.ts)
await withIsolatedStorage("smartoptics", scrapeSmartOptics);
});
await boss.work("scrape:catalog:hubersuhner", async () => {
console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`);
await withIsolatedStorage("hubersuhner", scrapeHuberSuhner);
});
// ── Vendor lists ────────────────────────────────────────────────────── // ── Vendor lists ──────────────────────────────────────────────────────
@ -458,11 +390,7 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
// ── Intelligence & community ────────────────────────────────────────── // ── Intelligence & community ──────────────────────────────────────────
await boss.work("scrape:market-intel", async () => { // scrape:market-intel → Pi-only
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
});
await boss.work("scrape:nog-talks", async () => { await boss.work("scrape:nog-talks", async () => {
console.log(`[${new Date().toISOString()}] Running: NOG conference talks`); console.log(`[${new Date().toISOString()}] Running: NOG conference talks`);
@ -482,10 +410,7 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await findAndSeedDatasheetLinks(50); await findAndSeedDatasheetLinks(50);
}); });
await boss.work("scrape:news", async () => { // scrape:news → Pi-only
console.log(`[${new Date().toISOString()}] Running: News aggregation`);
await scrapeNews();
});
await boss.work("scrape:faq", async () => { await boss.work("scrape:faq", async () => {
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`); console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);