/** * TIP Pi Scraper Node — index-pi.ts * * Runs as a pg-boss worker on Raspberry Pi fleet (Pi1/Pi2/Pi3). * Handles pricing, catalog, compatibility and intelligence scraping. * Connects to production DB via WireGuard VPN. * * Erik (IONOS VPS) runs ONLY: tip-api, tip-mcp, tip-postgres (Docker), tip-qdrant (Docker) * All scraping work is distributed to the Pi fleet. */ import { config } from "dotenv"; import { join } from "path"; config({ path: join(__dirname, "..", "..", "..", ".env") }); import PgBoss from "pg-boss"; import { mkdirSync, rmSync } from "fs"; const connectionString = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || "5432"}/${process.env.POSTGRES_DB}`; const PI_NAME = process.env.PI_NAME || "pi"; async function withIsolatedStorage(name: string, fn: () => Promise): Promise { const dir = `/tmp/tip-crawlee-${name}-${Date.now()}`; mkdirSync(join(dir, "request_queues", "default"), { recursive: true }); mkdirSync(join(dir, "datasets", "default"), { recursive: true }); mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true }); const prev = process.env.CRAWLEE_STORAGE_DIR; process.env.CRAWLEE_STORAGE_DIR = dir; try { await fn(); } finally { process.env.CRAWLEE_STORAGE_DIR = prev ?? ""; try { rmSync(dir, { recursive: true, force: true }); } catch {} } } const QUEUES = [ // Pricing scrapers "scrape:pricing:fs", "scrape:pricing:10gtek", "scrape:pricing:prolabs", "scrape:pricing:atgbics", "scrape:pricing:optcore", "scrape:pricing:fluxlight", "scrape:pricing:gbics", "scrape:pricing:champion-one", "scrape:pricing:sfpcables", "scrape:pricing:blueoptics", "scrape:pricing:fiber24", "scrape:pricing:tscom", "scrape:pricing:skylane", "scrape:pricing:ascentoptics", "scrape:pricing:gaotek", // Form-factor coverage scrapers "scrape:pricing:comms-express", "scrape:pricing:router-switch", "scrape:pricing:multimode-inc", "scrape:pricing:optictransceiver", "scrape:pricing:wiitek", // Catalog scrapers "scrape:pricing:flexoptix", "scrape:catalog:smartoptics", "scrape:catalog:hubersuhner", // Vendor scrapers "scrape:vendors:flexoptix", "scrape:vendors:flexoptix-supported", // Compatibility scrapers "scrape:compat:cisco", "scrape:compat:juniper", "scrape:compat:sonic", "scrape:compat:ufispace", "scrape:compat:edgecore", // Intelligence "scrape:news", "scrape:market-intel", "scrape:nog-talks", "scrape:community-issues", "scrape:datasheet-links", // Switch assets "scrape:assets:switches", // eBay enrichment "enrich:ebay-transceivers", "enrich:ebay-switches", // Prediction signals "scrape:signals:sec-edgar", "scrape:signals:github", "scrape:signals:ebay-velocity", "scrape:signals:ai-clusters", "scrape:signals:distributor-leads", "scrape:signals:standards", // Compute "compute:abc", "compute:reorder-signals", "compute:forecast", ]; async function main() { console.log(`\n=== TIP Pi Scraper Node [${PI_NAME}] ===`); console.log(`DB: ${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || 5432}`); console.log(`Queues: ${QUEUES.length} workers\n`); const boss = new PgBoss({ connectionString, retryLimit: 2, retryDelay: 60, expireInSeconds: 3600, monitorStateIntervalSeconds: 60, }); boss.on("error", (e: Error) => console.error("pg-boss error:", e.message)); await boss.start(); for (const q of QUEUES) { await boss.createQueue(q).catch(() => {}); } const log = (q: string) => console.log(`[${new Date().toISOString()}] [${PI_NAME}] ${q}`); // ── Pricing scrapers ────────────────────────────────────────────────── const { scrapeFs } = await import("./scrapers/fs-com"); const { scrape10Gtek } = await import("./scrapers/tenGtek"); const { scrapeProLabs } = await import("./scrapers/prolabs"); const { scrapeAtgbics } = await import("./scrapers/atgbics"); const { scrapeOptcore } = await import("./scrapers/optcore"); const { scrapeFluxlight } = await import("./scrapers/fluxlight"); const { scrapeGbics } = await import("./scrapers/gbics"); const { scrapeChampionOne } = await import("./scrapers/champion-one"); const { scrapeSfpCables } = await import("./scrapers/sfpcables"); const { scrapeBlueOptics } = await import("./scrapers/blueoptics"); const { scrapeFiber24 } = await import("./scrapers/fiber24"); const { scrapeTsCom } = await import("./scrapers/tscom"); const { scrapeSkylane } = await import("./scrapers/skylane"); const { scrapeAscentOptics } = await import("./scrapers/ascentoptics"); const { scrapeGaoTek } = await import("./scrapers/gaotek"); // ── Form-factor coverage scrapers ───────────────────────────────────── const { scrapeCommsExpress } = await import("./scrapers/comms-express"); const { scrapeRouterSwitch } = await import("./scrapers/router-switch"); const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc"); const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver"); const { scrapeWiitek } = await import("./scrapers/wiitek"); // ── Catalog scrapers ────────────────────────────────────────────────── const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog"); const { scrapeSmartOptics } = await import("./scrapers/smartoptics"); const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner"); // ── Vendor scrapers ─────────────────────────────────────────────────── const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors"); // ── Compatibility scrapers ──────────────────────────────────────────── const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); const { scrapeJuniperHct } = await import("./scrapers/juniper-hct"); const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl"); const { scrapeUfiSpace } = await import("./scrapers/ufispace"); const { scrapeEdgecore } = await import("./scrapers/edgecore"); // ── Intelligence scrapers ───────────────────────────────────────────── const { scrapeNews } = await import("./scrapers/news"); const { scrapeMarketIntelligence, computeAbcClassification, computeReorderSignals } = await import("./scrapers/market-intelligence"); const { scrapeAllSwitchIssues, findAndSeedDatasheetLinks } = await import("./scrapers/community-issues"); const { scrapeSwitchAssets } = await import("./scrapers/switch-assets"); // ── eBay enrichment ─────────────────────────────────────────────────── const { enrichTransceiversFromEbay, enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher"); // ── Prediction signals ──────────────────────────────────────────────── const { scrapeSecEdgar } = await import("./scrapers/sec-edgar"); const { scrapeGithubSignals } = await import("./scrapers/github-signals"); const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity"); const { scrapeAiClusters } = await import("./scrapers/ai-clusters"); const { scrapeDistributorLeads } = await import("./scrapers/distributor-leads"); const { scrapeStandardsTracker } = await import("./scrapers/standards-tracker"); const { runForecastEngine } = await import("./utils/forecast-engine"); // ── Register workers ────────────────────────────────────────────────── await boss.work("scrape:pricing:fs", async () => { log("fs"); await withIsolatedStorage("fs", scrapeFs); }); await boss.work("scrape:pricing:10gtek", async () => { log("10gtek"); await withIsolatedStorage("10gtek", scrape10Gtek); }); await boss.work("scrape:pricing:prolabs", async () => { log("prolabs"); await withIsolatedStorage("prolabs", scrapeProLabs); }); await boss.work("scrape:pricing:atgbics", async () => { log("atgbics"); await withIsolatedStorage("atgbics", scrapeAtgbics); }); await boss.work("scrape:pricing:optcore", async () => { log("optcore"); await withIsolatedStorage("optcore", scrapeOptcore); }); await boss.work("scrape:pricing:fluxlight", async () => { log("fluxlight"); await withIsolatedStorage("fluxlight", scrapeFluxlight); }); await boss.work("scrape:pricing:gbics", async () => { log("gbics"); await withIsolatedStorage("gbics", scrapeGbics); }); await boss.work("scrape:pricing:champion-one", async () => { log("champion-one"); await withIsolatedStorage("champion-one", scrapeChampionOne); }); await boss.work("scrape:pricing:sfpcables", async () => { log("sfpcables"); await withIsolatedStorage("sfpcables", scrapeSfpCables); }); await boss.work("scrape:pricing:blueoptics", async () => { log("blueoptics"); await withIsolatedStorage("blueoptics", scrapeBlueOptics); }); await boss.work("scrape:pricing:fiber24", async () => { log("fiber24"); await withIsolatedStorage("fiber24", scrapeFiber24); }); await boss.work("scrape:pricing:tscom", async () => { log("tscom"); await withIsolatedStorage("tscom", scrapeTsCom); }); await boss.work("scrape:pricing:skylane", async () => { log("skylane"); await withIsolatedStorage("skylane", scrapeSkylane); }); await boss.work("scrape:pricing:ascentoptics", async () => { log("ascentoptics"); await withIsolatedStorage("ascentoptics", scrapeAscentOptics); }); await boss.work("scrape:pricing:gaotek", async () => { log("gaotek"); await withIsolatedStorage("gaotek", scrapeGaoTek); }); await boss.work("scrape:pricing:comms-express", async () => { log("comms-express"); await scrapeCommsExpress(); }); await boss.work("scrape:pricing:router-switch", async () => { log("router-switch"); await scrapeRouterSwitch(); }); await boss.work("scrape:pricing:multimode-inc", async () => { log("multimode-inc"); await scrapeMultimodeInc(); }); await boss.work("scrape:pricing:optictransceiver", async () => { log("optictransceiver"); await scrapeOpticTransceiver(); }); await boss.work("scrape:pricing:wiitek", async () => { log("wiitek"); await scrapeWiitek(); }); await boss.work("scrape:pricing:flexoptix", async () => { log("flexoptix-catalog"); await scrapeFlexoptixCatalog(); }); await boss.work("scrape:catalog:smartoptics", async () => { log("smartoptics"); await withIsolatedStorage("smartoptics", scrapeSmartOptics); }); await boss.work("scrape:catalog:hubersuhner", async () => { log("hubersuhner"); await withIsolatedStorage("hubersuhner", scrapeHuberSuhner); }); await boss.work("scrape:vendors:flexoptix", async () => { log("flexoptix-vendors"); await scrapeFlexoptixVendors(); }); await boss.work("scrape:vendors:flexoptix-supported", async () => { log("flexoptix-supported"); await seedFlexoptixVendors(); }); await boss.work("scrape:compat:cisco", async () => { log("cisco-compat"); await withIsolatedStorage("cisco", scrapeCiscoTmg); }); await boss.work("scrape:compat:juniper", async () => { log("juniper-compat"); await withIsolatedStorage("juniper", scrapeJuniperHct); }); await boss.work("scrape:compat:sonic", async () => { log("sonic-compat"); await withIsolatedStorage("sonic", scrapeSonicHcl); }); await boss.work("scrape:compat:ufispace", async () => { log("ufispace"); await withIsolatedStorage("ufispace", scrapeUfiSpace); }); await boss.work("scrape:compat:edgecore", async () => { log("edgecore"); await withIsolatedStorage("edgecore", scrapeEdgecore); }); await boss.work("scrape:news", async () => { log("news"); await scrapeNews(); }); await boss.work("scrape:market-intel", async () => { log("market-intel"); await withIsolatedStorage("market-intel", scrapeMarketIntelligence); }); await boss.work("scrape:nog-talks", async () => { log("nog-talks"); const { scrapeNogTalks } = await import("./scrapers/nog-talks"); await scrapeNogTalks(); }); await boss.work("scrape:community-issues", async () => { log("community"); await withIsolatedStorage("community", () => scrapeAllSwitchIssues(30)); }); await boss.work("scrape:datasheet-links", async () => { log("datasheets"); await findAndSeedDatasheetLinks(50); }); await boss.work("scrape:assets:switches", async () => { log("switch-assets"); await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets()); }); await boss.work("enrich:ebay-transceivers", async () => { log("ebay-transceivers"); await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100)); }); await boss.work("enrich:ebay-switches", async () => { log("ebay-switches"); await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30)); }); await boss.work("scrape:signals:sec-edgar", async () => { log("sec-edgar"); await scrapeSecEdgar(); }); await boss.work("scrape:signals:github", async () => { log("github-signals"); await scrapeGithubSignals(); }); await boss.work("scrape:signals:ebay-velocity", async () => { log("ebay-velocity"); await scrapeEbayVelocity(); }); await boss.work("scrape:signals:ai-clusters", async () => { log("ai-clusters"); await scrapeAiClusters(); }); await boss.work("scrape:signals:distributor-leads", async () => { log("distributor-leads"); await scrapeDistributorLeads(); }); await boss.work("scrape:signals:standards", async () => { log("standards"); await scrapeStandardsTracker(); }); await boss.work("compute:abc", async () => { log("abc"); await computeAbcClassification(); }); await boss.work("compute:reorder-signals", async () => { log("reorder"); await computeReorderSignals(); }); await boss.work("compute:forecast", async () => { log("forecast"); await runForecastEngine(); }); console.log(`${QUEUES.length} workers active — running 24/7\n`); process.on("SIGTERM", async () => { await boss.stop(); process.exit(0); }); process.on("SIGINT", async () => { await boss.stop(); process.exit(0); }); } main().catch((e) => { console.error("Fatal:", e); process.exit(1); });