Rene Fichtmueller 6ccaa03932 feat: add index-pi.ts with all 44 workers for Pi fleet scraper nodes
Complete Pi scraper entry point covering all pricing, catalog, compat,
intelligence and prediction signal scrapers. Includes 5 new form-factor
coverage scrapers (comms-express, router-switch, multimode-inc,
optictransceiver, wiitek). Erik runs only API+DB, all scraping on Pis.
2026-04-02 09:34:05 +02:00

236 lines
15 KiB
TypeScript

/**
* TIP Pi Scraper Node — index-pi.ts
*
* Runs as a pg-boss worker on Raspberry Pi fleet (Pi1/Pi2/Pi3).
* Handles pricing, catalog, compatibility and intelligence scraping.
* Connects to production DB via WireGuard VPN.
*
* Erik (IONOS VPS) runs ONLY: tip-api, tip-mcp, tip-postgres (Docker), tip-qdrant (Docker)
* All scraping work is distributed to the Pi fleet.
*/
import { config } from "dotenv";
import { join } from "path";
config({ path: join(__dirname, "..", "..", "..", ".env") });
import PgBoss from "pg-boss";
import { mkdirSync, rmSync } from "fs";
const connectionString = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || "5432"}/${process.env.POSTGRES_DB}`;
const PI_NAME = process.env.PI_NAME || "pi";
async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promise<void> {
const dir = `/tmp/tip-crawlee-${name}-${Date.now()}`;
mkdirSync(join(dir, "request_queues", "default"), { recursive: true });
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
const prev = process.env.CRAWLEE_STORAGE_DIR;
process.env.CRAWLEE_STORAGE_DIR = dir;
try { await fn(); }
finally {
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
try { rmSync(dir, { recursive: true, force: true }); } catch {}
}
}
const QUEUES = [
// Pricing scrapers
"scrape:pricing:fs",
"scrape:pricing:10gtek",
"scrape:pricing:prolabs",
"scrape:pricing:atgbics",
"scrape:pricing:optcore",
"scrape:pricing:fluxlight",
"scrape:pricing:gbics",
"scrape:pricing:champion-one",
"scrape:pricing:sfpcables",
"scrape:pricing:blueoptics",
"scrape:pricing:fiber24",
"scrape:pricing:tscom",
"scrape:pricing:skylane",
"scrape:pricing:ascentoptics",
"scrape:pricing:gaotek",
// Form-factor coverage scrapers
"scrape:pricing:comms-express",
"scrape:pricing:router-switch",
"scrape:pricing:multimode-inc",
"scrape:pricing:optictransceiver",
"scrape:pricing:wiitek",
// Catalog scrapers
"scrape:pricing:flexoptix",
"scrape:catalog:smartoptics",
"scrape:catalog:hubersuhner",
// Vendor scrapers
"scrape:vendors:flexoptix",
"scrape:vendors:flexoptix-supported",
// Compatibility scrapers
"scrape:compat:cisco",
"scrape:compat:juniper",
"scrape:compat:sonic",
"scrape:compat:ufispace",
"scrape:compat:edgecore",
// Intelligence
"scrape:news",
"scrape:market-intel",
"scrape:community-issues",
"scrape:datasheet-links",
// Switch assets
"scrape:assets:switches",
// eBay enrichment
"enrich:ebay-transceivers",
"enrich:ebay-switches",
// Prediction signals
"scrape:signals:sec-edgar",
"scrape:signals:github",
"scrape:signals:ebay-velocity",
"scrape:signals:ai-clusters",
"scrape:signals:distributor-leads",
"scrape:signals:standards",
// Compute
"compute:abc",
"compute:reorder-signals",
"compute:forecast",
];
async function main() {
console.log(`\n=== TIP Pi Scraper Node [${PI_NAME}] ===`);
console.log(`DB: ${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || 5432}`);
console.log(`Queues: ${QUEUES.length} workers\n`);
const boss = new PgBoss({
connectionString,
retryLimit: 2,
retryDelay: 60,
expireInSeconds: 3600,
monitorStateIntervalSeconds: 60,
});
boss.on("error", (e: Error) => console.error("pg-boss error:", e.message));
await boss.start();
for (const q of QUEUES) {
await boss.createQueue(q).catch(() => {});
}
const log = (q: string) => console.log(`[${new Date().toISOString()}] [${PI_NAME}] ${q}`);
// ── Pricing scrapers ──────────────────────────────────────────────────
const { scrapeFs } = await import("./scrapers/fs");
const { scrape10GTek } = await import("./scrapers/10gtek");
const { scrapeProLabs } = await import("./scrapers/prolabs");
const { scrapeAtgbics } = await import("./scrapers/atgbics");
const { scrapeOptcore } = await import("./scrapers/optcore");
const { scrapeFluxlight } = await import("./scrapers/fluxlight");
const { scrapeGbics } = await import("./scrapers/gbics");
const { scrapeChampionOne } = await import("./scrapers/champion-one");
const { scrapeSfpCables } = await import("./scrapers/sfpcables");
const { scrapeBlueOptics } = await import("./scrapers/blueoptics");
const { scrapeFiber24 } = await import("./scrapers/fiber24");
const { scrapeTsCom } = await import("./scrapers/tscom");
const { scrapeSkylane } = await import("./scrapers/skylane");
const { scrapeAscentOptics } = await import("./scrapers/ascentoptics");
const { scrapeGaoTek } = await import("./scrapers/gaotek");
// ── Form-factor coverage scrapers ─────────────────────────────────────
const { scrapeCommsExpress } = await import("./scrapers/comms-express");
const { scrapeRouterSwitch } = await import("./scrapers/router-switch");
const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc");
const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver");
const { scrapeWiitek } = await import("./scrapers/wiitek");
// ── Catalog scrapers ──────────────────────────────────────────────────
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
const { scrapeSmartOptics } = await import("./scrapers/smartoptics");
const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner");
// ── Vendor scrapers ───────────────────────────────────────────────────
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors");
// ── Compatibility scrapers ────────────────────────────────────────────
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
const { scrapeJuniperHct } = await import("./scrapers/juniper-hct");
const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl");
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
const { scrapeEdgecore } = await import("./scrapers/edgecore");
// ── Intelligence scrapers ─────────────────────────────────────────────
const { scrapeNews } = await import("./scrapers/news");
const { scrapeMarketIntelligence, computeAbcClassification, computeReorderSignals } = await import("./scrapers/market-intelligence");
const { scrapeAllSwitchIssues, findAndSeedDatasheetLinks } = await import("./scrapers/community-issues");
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
// ── eBay enrichment ───────────────────────────────────────────────────
const { enrichTransceiversFromEbay, enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher");
// ── Prediction signals ────────────────────────────────────────────────
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
const { scrapeGithubSignals } = await import("./scrapers/github-signals");
const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity");
const { scrapeAiClusters } = await import("./scrapers/ai-clusters");
const { scrapeDistributorLeads } = await import("./scrapers/distributor-leads");
const { scrapeStandardsTracker } = await import("./scrapers/standards-tracker");
const { runForecastEngine } = await import("./scrapers/forecast-engine");
// ── Register workers ──────────────────────────────────────────────────
await boss.work("scrape:pricing:fs", async () => { log("fs"); await withIsolatedStorage("fs", scrapeFs); });
await boss.work("scrape:pricing:10gtek", async () => { log("10gtek"); await withIsolatedStorage("10gtek", scrape10GTek); });
await boss.work("scrape:pricing:prolabs", async () => { log("prolabs"); await withIsolatedStorage("prolabs", scrapeProLabs); });
await boss.work("scrape:pricing:atgbics", async () => { log("atgbics"); await withIsolatedStorage("atgbics", scrapeAtgbics); });
await boss.work("scrape:pricing:optcore", async () => { log("optcore"); await withIsolatedStorage("optcore", scrapeOptcore); });
await boss.work("scrape:pricing:fluxlight", async () => { log("fluxlight"); await withIsolatedStorage("fluxlight", scrapeFluxlight); });
await boss.work("scrape:pricing:gbics", async () => { log("gbics"); await withIsolatedStorage("gbics", scrapeGbics); });
await boss.work("scrape:pricing:champion-one", async () => { log("champion-one"); await withIsolatedStorage("champion-one", scrapeChampionOne); });
await boss.work("scrape:pricing:sfpcables", async () => { log("sfpcables"); await withIsolatedStorage("sfpcables", scrapeSfpCables); });
await boss.work("scrape:pricing:blueoptics", async () => { log("blueoptics"); await withIsolatedStorage("blueoptics", scrapeBlueOptics); });
await boss.work("scrape:pricing:fiber24", async () => { log("fiber24"); await withIsolatedStorage("fiber24", scrapeFiber24); });
await boss.work("scrape:pricing:tscom", async () => { log("tscom"); await withIsolatedStorage("tscom", scrapeTsCom); });
await boss.work("scrape:pricing:skylane", async () => { log("skylane"); await withIsolatedStorage("skylane", scrapeSkylane); });
await boss.work("scrape:pricing:ascentoptics", async () => { log("ascentoptics"); await withIsolatedStorage("ascentoptics", scrapeAscentOptics); });
await boss.work("scrape:pricing:gaotek", async () => { log("gaotek"); await withIsolatedStorage("gaotek", scrapeGaoTek); });
await boss.work("scrape:pricing:comms-express", async () => { log("comms-express"); await scrapeCommsExpress(); });
await boss.work("scrape:pricing:router-switch", async () => { log("router-switch"); await scrapeRouterSwitch(); });
await boss.work("scrape:pricing:multimode-inc", async () => { log("multimode-inc"); await scrapeMultimodeInc(); });
await boss.work("scrape:pricing:optictransceiver", async () => { log("optictransceiver"); await scrapeOpticTransceiver(); });
await boss.work("scrape:pricing:wiitek", async () => { log("wiitek"); await scrapeWiitek(); });
await boss.work("scrape:pricing:flexoptix", async () => { log("flexoptix-catalog"); await scrapeFlexoptixCatalog(); });
await boss.work("scrape:catalog:smartoptics", async () => { log("smartoptics"); await withIsolatedStorage("smartoptics", scrapeSmartOptics); });
await boss.work("scrape:catalog:hubersuhner", async () => { log("hubersuhner"); await withIsolatedStorage("hubersuhner", scrapeHuberSuhner); });
await boss.work("scrape:vendors:flexoptix", async () => { log("flexoptix-vendors"); await scrapeFlexoptixVendors(); });
await boss.work("scrape:vendors:flexoptix-supported", async () => { log("flexoptix-supported"); await seedFlexoptixVendors(); });
await boss.work("scrape:compat:cisco", async () => { log("cisco-compat"); await withIsolatedStorage("cisco", scrapeCiscoTmg); });
await boss.work("scrape:compat:juniper", async () => { log("juniper-compat"); await withIsolatedStorage("juniper", scrapeJuniperHct); });
await boss.work("scrape:compat:sonic", async () => { log("sonic-compat"); await withIsolatedStorage("sonic", scrapeSonicHcl); });
await boss.work("scrape:compat:ufispace", async () => { log("ufispace"); await withIsolatedStorage("ufispace", scrapeUfiSpace); });
await boss.work("scrape:compat:edgecore", async () => { log("edgecore"); await withIsolatedStorage("edgecore", scrapeEdgecore); });
await boss.work("scrape:news", async () => { log("news"); await scrapeNews(); });
await boss.work("scrape:market-intel", async () => { log("market-intel"); await withIsolatedStorage("market-intel", scrapeMarketIntelligence); });
await boss.work("scrape:community-issues", async () => { log("community"); await withIsolatedStorage("community", () => scrapeAllSwitchIssues(30)); });
await boss.work("scrape:datasheet-links", async () => { log("datasheets"); await findAndSeedDatasheetLinks(50); });
await boss.work("scrape:assets:switches", async () => { log("switch-assets"); await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets()); });
await boss.work("enrich:ebay-transceivers", async () => { log("ebay-transceivers"); await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100)); });
await boss.work("enrich:ebay-switches", async () => { log("ebay-switches"); await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30)); });
await boss.work("scrape:signals:sec-edgar", async () => { log("sec-edgar"); await scrapeSecEdgar(); });
await boss.work("scrape:signals:github", async () => { log("github-signals"); await scrapeGithubSignals(); });
await boss.work("scrape:signals:ebay-velocity", async () => { log("ebay-velocity"); await scrapeEbayVelocity(); });
await boss.work("scrape:signals:ai-clusters", async () => { log("ai-clusters"); await scrapeAiClusters(); });
await boss.work("scrape:signals:distributor-leads", async () => { log("distributor-leads"); await scrapeDistributorLeads(); });
await boss.work("scrape:signals:standards", async () => { log("standards"); await scrapeStandardsTracker(); });
await boss.work("compute:abc", async () => { log("abc"); await computeAbcClassification(); });
await boss.work("compute:reorder-signals", async () => { log("reorder"); await computeReorderSignals(); });
await boss.work("compute:forecast", async () => { log("forecast"); await runForecastEngine(); });
console.log(`${QUEUES.length} workers active — running 24/7\n`);
process.on("SIGTERM", async () => { await boss.stop(); process.exit(0); });
process.on("SIGINT", async () => { await boss.stop(); process.exit(0); });
}
main().catch((e) => { console.error("Fatal:", e); process.exit(1); });