From 398dfcced836ea428f123837d9dea04a5b053ab7 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Thu, 2 Apr 2026 09:34:05 +0200 Subject: [PATCH] feat: add index-pi.ts with all 44 workers for Pi fleet scraper nodes Complete Pi scraper entry point covering all pricing, catalog, compat, intelligence and prediction signal scrapers. Includes 5 new form-factor coverage scrapers (comms-express, router-switch, multimode-inc, optictransceiver, wiitek). Erik runs only API+DB, all scraping on Pis. --- packages/scraper/src/index-pi.ts | 235 +++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 packages/scraper/src/index-pi.ts diff --git a/packages/scraper/src/index-pi.ts b/packages/scraper/src/index-pi.ts new file mode 100644 index 0000000..e1ae888 --- /dev/null +++ b/packages/scraper/src/index-pi.ts @@ -0,0 +1,235 @@ +/** + * TIP Pi Scraper Node — index-pi.ts + * + * Runs as a pg-boss worker on Raspberry Pi fleet (Pi1/Pi2/Pi3). + * Handles pricing, catalog, compatibility and intelligence scraping. + * Connects to production DB via WireGuard VPN. + * + * Erik (IONOS VPS) runs ONLY: tip-api, tip-mcp, tip-postgres (Docker), tip-qdrant (Docker) + * All scraping work is distributed to the Pi fleet. + */ +import { config } from "dotenv"; +import { join } from "path"; +config({ path: join(__dirname, "..", "..", "..", ".env") }); + +import PgBoss from "pg-boss"; +import { mkdirSync, rmSync } from "fs"; + +const connectionString = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || "5432"}/${process.env.POSTGRES_DB}`; +const PI_NAME = process.env.PI_NAME || "pi"; + +async function withIsolatedStorage(name: string, fn: () => Promise): Promise { + const dir = `/tmp/tip-crawlee-${name}-${Date.now()}`; + mkdirSync(join(dir, "request_queues", "default"), { recursive: true }); + mkdirSync(join(dir, "datasets", "default"), { recursive: true }); + mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true }); + const prev = process.env.CRAWLEE_STORAGE_DIR; + process.env.CRAWLEE_STORAGE_DIR = dir; + try { await fn(); } + finally { + process.env.CRAWLEE_STORAGE_DIR = prev ?? ""; + try { rmSync(dir, { recursive: true, force: true }); } catch {} + } +} + +const QUEUES = [ + // Pricing scrapers + "scrape:pricing:fs", + "scrape:pricing:10gtek", + "scrape:pricing:prolabs", + "scrape:pricing:atgbics", + "scrape:pricing:optcore", + "scrape:pricing:fluxlight", + "scrape:pricing:gbics", + "scrape:pricing:champion-one", + "scrape:pricing:sfpcables", + "scrape:pricing:blueoptics", + "scrape:pricing:fiber24", + "scrape:pricing:tscom", + "scrape:pricing:skylane", + "scrape:pricing:ascentoptics", + "scrape:pricing:gaotek", + // Form-factor coverage scrapers + "scrape:pricing:comms-express", + "scrape:pricing:router-switch", + "scrape:pricing:multimode-inc", + "scrape:pricing:optictransceiver", + "scrape:pricing:wiitek", + // Catalog scrapers + "scrape:pricing:flexoptix", + "scrape:catalog:smartoptics", + "scrape:catalog:hubersuhner", + // Vendor scrapers + "scrape:vendors:flexoptix", + "scrape:vendors:flexoptix-supported", + // Compatibility scrapers + "scrape:compat:cisco", + "scrape:compat:juniper", + "scrape:compat:sonic", + "scrape:compat:ufispace", + "scrape:compat:edgecore", + // Intelligence + "scrape:news", + "scrape:market-intel", + "scrape:community-issues", + "scrape:datasheet-links", + // Switch assets + "scrape:assets:switches", + // eBay enrichment + "enrich:ebay-transceivers", + "enrich:ebay-switches", + // Prediction signals + "scrape:signals:sec-edgar", + "scrape:signals:github", + "scrape:signals:ebay-velocity", + "scrape:signals:ai-clusters", + "scrape:signals:distributor-leads", + "scrape:signals:standards", + // Compute + "compute:abc", + "compute:reorder-signals", + "compute:forecast", +]; + +async function main() { + console.log(`\n=== TIP Pi Scraper Node [${PI_NAME}] ===`); + console.log(`DB: ${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || 5432}`); + console.log(`Queues: ${QUEUES.length} workers\n`); + + const boss = new PgBoss({ + connectionString, + retryLimit: 2, + retryDelay: 60, + expireInSeconds: 3600, + monitorStateIntervalSeconds: 60, + }); + + boss.on("error", (e: Error) => console.error("pg-boss error:", e.message)); + await boss.start(); + + for (const q of QUEUES) { + await boss.createQueue(q).catch(() => {}); + } + + const log = (q: string) => console.log(`[${new Date().toISOString()}] [${PI_NAME}] ${q}`); + + // ── Pricing scrapers ────────────────────────────────────────────────── + const { scrapeFs } = await import("./scrapers/fs"); + const { scrape10GTek } = await import("./scrapers/10gtek"); + const { scrapeProLabs } = await import("./scrapers/prolabs"); + const { scrapeAtgbics } = await import("./scrapers/atgbics"); + const { scrapeOptcore } = await import("./scrapers/optcore"); + const { scrapeFluxlight } = await import("./scrapers/fluxlight"); + const { scrapeGbics } = await import("./scrapers/gbics"); + const { scrapeChampionOne } = await import("./scrapers/champion-one"); + const { scrapeSfpCables } = await import("./scrapers/sfpcables"); + const { scrapeBlueOptics } = await import("./scrapers/blueoptics"); + const { scrapeFiber24 } = await import("./scrapers/fiber24"); + const { scrapeTsCom } = await import("./scrapers/tscom"); + const { scrapeSkylane } = await import("./scrapers/skylane"); + const { scrapeAscentOptics } = await import("./scrapers/ascentoptics"); + const { scrapeGaoTek } = await import("./scrapers/gaotek"); + + // ── Form-factor coverage scrapers ───────────────────────────────────── + const { scrapeCommsExpress } = await import("./scrapers/comms-express"); + const { scrapeRouterSwitch } = await import("./scrapers/router-switch"); + const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc"); + const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver"); + const { scrapeWiitek } = await import("./scrapers/wiitek"); + + // ── Catalog scrapers ────────────────────────────────────────────────── + const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog"); + const { scrapeSmartOptics } = await import("./scrapers/smartoptics"); + const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner"); + + // ── Vendor scrapers ─────────────────────────────────────────────────── + const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); + const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors"); + + // ── Compatibility scrapers ──────────────────────────────────────────── + const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); + const { scrapeJuniperHct } = await import("./scrapers/juniper-hct"); + const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl"); + const { scrapeUfiSpace } = await import("./scrapers/ufispace"); + const { scrapeEdgecore } = await import("./scrapers/edgecore"); + + // ── Intelligence scrapers ───────────────────────────────────────────── + const { scrapeNews } = await import("./scrapers/news"); + const { scrapeMarketIntelligence, computeAbcClassification, computeReorderSignals } = await import("./scrapers/market-intelligence"); + const { scrapeAllSwitchIssues, findAndSeedDatasheetLinks } = await import("./scrapers/community-issues"); + const { scrapeSwitchAssets } = await import("./scrapers/switch-assets"); + + // ── eBay enrichment ─────────────────────────────────────────────────── + const { enrichTransceiversFromEbay, enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher"); + + // ── Prediction signals ──────────────────────────────────────────────── + const { scrapeSecEdgar } = await import("./scrapers/sec-edgar"); + const { scrapeGithubSignals } = await import("./scrapers/github-signals"); + const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity"); + const { scrapeAiClusters } = await import("./scrapers/ai-clusters"); + const { scrapeDistributorLeads } = await import("./scrapers/distributor-leads"); + const { scrapeStandardsTracker } = await import("./scrapers/standards-tracker"); + const { runForecastEngine } = await import("./scrapers/forecast-engine"); + + // ── Register workers ────────────────────────────────────────────────── + await boss.work("scrape:pricing:fs", async () => { log("fs"); await withIsolatedStorage("fs", scrapeFs); }); + await boss.work("scrape:pricing:10gtek", async () => { log("10gtek"); await withIsolatedStorage("10gtek", scrape10GTek); }); + await boss.work("scrape:pricing:prolabs", async () => { log("prolabs"); await withIsolatedStorage("prolabs", scrapeProLabs); }); + await boss.work("scrape:pricing:atgbics", async () => { log("atgbics"); await withIsolatedStorage("atgbics", scrapeAtgbics); }); + await boss.work("scrape:pricing:optcore", async () => { log("optcore"); await withIsolatedStorage("optcore", scrapeOptcore); }); + await boss.work("scrape:pricing:fluxlight", async () => { log("fluxlight"); await withIsolatedStorage("fluxlight", scrapeFluxlight); }); + await boss.work("scrape:pricing:gbics", async () => { log("gbics"); await withIsolatedStorage("gbics", scrapeGbics); }); + await boss.work("scrape:pricing:champion-one", async () => { log("champion-one"); await withIsolatedStorage("champion-one", scrapeChampionOne); }); + await boss.work("scrape:pricing:sfpcables", async () => { log("sfpcables"); await withIsolatedStorage("sfpcables", scrapeSfpCables); }); + await boss.work("scrape:pricing:blueoptics", async () => { log("blueoptics"); await withIsolatedStorage("blueoptics", scrapeBlueOptics); }); + await boss.work("scrape:pricing:fiber24", async () => { log("fiber24"); await withIsolatedStorage("fiber24", scrapeFiber24); }); + await boss.work("scrape:pricing:tscom", async () => { log("tscom"); await withIsolatedStorage("tscom", scrapeTsCom); }); + await boss.work("scrape:pricing:skylane", async () => { log("skylane"); await withIsolatedStorage("skylane", scrapeSkylane); }); + await boss.work("scrape:pricing:ascentoptics", async () => { log("ascentoptics"); await withIsolatedStorage("ascentoptics", scrapeAscentOptics); }); + await boss.work("scrape:pricing:gaotek", async () => { log("gaotek"); await withIsolatedStorage("gaotek", scrapeGaoTek); }); + + await boss.work("scrape:pricing:comms-express", async () => { log("comms-express"); await scrapeCommsExpress(); }); + await boss.work("scrape:pricing:router-switch", async () => { log("router-switch"); await scrapeRouterSwitch(); }); + await boss.work("scrape:pricing:multimode-inc", async () => { log("multimode-inc"); await scrapeMultimodeInc(); }); + await boss.work("scrape:pricing:optictransceiver", async () => { log("optictransceiver"); await scrapeOpticTransceiver(); }); + await boss.work("scrape:pricing:wiitek", async () => { log("wiitek"); await scrapeWiitek(); }); + + await boss.work("scrape:pricing:flexoptix", async () => { log("flexoptix-catalog"); await scrapeFlexoptixCatalog(); }); + await boss.work("scrape:catalog:smartoptics", async () => { log("smartoptics"); await withIsolatedStorage("smartoptics", scrapeSmartOptics); }); + await boss.work("scrape:catalog:hubersuhner", async () => { log("hubersuhner"); await withIsolatedStorage("hubersuhner", scrapeHuberSuhner); }); + + await boss.work("scrape:vendors:flexoptix", async () => { log("flexoptix-vendors"); await scrapeFlexoptixVendors(); }); + await boss.work("scrape:vendors:flexoptix-supported", async () => { log("flexoptix-supported"); await seedFlexoptixVendors(); }); + + await boss.work("scrape:compat:cisco", async () => { log("cisco-compat"); await withIsolatedStorage("cisco", scrapeCiscoTmg); }); + await boss.work("scrape:compat:juniper", async () => { log("juniper-compat"); await withIsolatedStorage("juniper", scrapeJuniperHct); }); + await boss.work("scrape:compat:sonic", async () => { log("sonic-compat"); await withIsolatedStorage("sonic", scrapeSonicHcl); }); + await boss.work("scrape:compat:ufispace", async () => { log("ufispace"); await withIsolatedStorage("ufispace", scrapeUfiSpace); }); + await boss.work("scrape:compat:edgecore", async () => { log("edgecore"); await withIsolatedStorage("edgecore", scrapeEdgecore); }); + + await boss.work("scrape:news", async () => { log("news"); await scrapeNews(); }); + await boss.work("scrape:market-intel", async () => { log("market-intel"); await withIsolatedStorage("market-intel", scrapeMarketIntelligence); }); + await boss.work("scrape:community-issues", async () => { log("community"); await withIsolatedStorage("community", () => scrapeAllSwitchIssues(30)); }); + await boss.work("scrape:datasheet-links", async () => { log("datasheets"); await findAndSeedDatasheetLinks(50); }); + await boss.work("scrape:assets:switches", async () => { log("switch-assets"); await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets()); }); + + await boss.work("enrich:ebay-transceivers", async () => { log("ebay-transceivers"); await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100)); }); + await boss.work("enrich:ebay-switches", async () => { log("ebay-switches"); await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30)); }); + + await boss.work("scrape:signals:sec-edgar", async () => { log("sec-edgar"); await scrapeSecEdgar(); }); + await boss.work("scrape:signals:github", async () => { log("github-signals"); await scrapeGithubSignals(); }); + await boss.work("scrape:signals:ebay-velocity", async () => { log("ebay-velocity"); await scrapeEbayVelocity(); }); + await boss.work("scrape:signals:ai-clusters", async () => { log("ai-clusters"); await scrapeAiClusters(); }); + await boss.work("scrape:signals:distributor-leads", async () => { log("distributor-leads"); await scrapeDistributorLeads(); }); + await boss.work("scrape:signals:standards", async () => { log("standards"); await scrapeStandardsTracker(); }); + + await boss.work("compute:abc", async () => { log("abc"); await computeAbcClassification(); }); + await boss.work("compute:reorder-signals", async () => { log("reorder"); await computeReorderSignals(); }); + await boss.work("compute:forecast", async () => { log("forecast"); await runForecastEngine(); }); + + console.log(`${QUEUES.length} workers active — running 24/7\n`); + process.on("SIGTERM", async () => { await boss.stop(); process.exit(0); }); + process.on("SIGINT", async () => { await boss.stop(); process.exit(0); }); +} + +main().catch((e) => { console.error("Fatal:", e); process.exit(1); });