/** * pg-boss Job Scheduler — 24/7 Continuous Scraping * * ARCHITECTURE: * - Erik (VPS, .82) : Playwright-heavy scrapers (FS.com, 10Gtek, ATGBICS, ProLabs) * + all compatibility + eBay + compute + NAS sync * - Raspberry Pi Fleet : Lightweight fetch/cheerio scrapers run continuously all day * (BlueOptics, Fiber24, T&S Com, Fluxlight, GBICs, Optcore, * Champion ONE, SFPCables, SmartOptics, HUBER+SUHNER, etc.) * * SCHEDULE PHILOSOPHY: * - Playwright scrapers: every 8h (resource-heavy, VPS only) * - Fetch/Cheerio scrapers: every 4h (lightweight, Pi-friendly) * - Catalog scrapers (Flexoptix): every 2h (fast GraphQL, primary price source) * - Compatibility matrices: every 12h (rarely change) * - eBay enrichment: every 6h * - Intelligence/community: every 6h * - Compute jobs: after each pricing wave * - NAS sync: nightly at 07:55 */ import PgBoss from "pg-boss"; import { config } from "dotenv"; import { join } from "path"; import { loadavg } from "os"; // withIsolatedStorage removed — all Crawlee scrapers now use makeCrawleeConfig() // for instance-level storage isolation. See packages/scraper/src/utils/crawlee-config.ts /** * Load-aware guard — skip heavy scrapers when the server is already busy. * Uses the 1-minute load average; maxLoad defaults to 2.5 (50% of 5 vCPUs). * Logs a warning and returns false when load is too high. */ function isLoadAcceptable(maxLoad = 2.5): boolean { const [avg1] = loadavg(); if (avg1 > maxLoad) { console.warn(`[load-guard] 1m load avg ${avg1.toFixed(2)} > ${maxLoad} — deferring heavy scraper`); return false; } return true; } config({ path: join(__dirname, "..", "..", "..", ".env") }); const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`; export async function createScheduler(): Promise { const boss = new PgBoss({ connectionString, retryLimit: 3, retryDelay: 30, retryBackoff: true, expireInSeconds: 300, monitorStateIntervalSeconds: 60, }); boss.on("error", (error) => console.error("pg-boss error:", error)); await boss.start(); console.log("pg-boss scheduler started"); return boss; } export async function registerSchedules(boss: PgBoss): Promise { const queues = [ // ── Playwright scrapers (Erik, every 8h) ─────────────────────────── "scrape:pricing:fs", "scrape:pricing:10gtek", "scrape:pricing:atgbics", "scrape:pricing:prolabs", // ── Fetch/Cheerio scrapers (Pi-friendly, every 4h) ───────────────── "scrape:pricing:fluxlight", "scrape:pricing:gbics", "scrape:pricing:optcore", "scrape:pricing:champion-one", "scrape:pricing:sfpcables", "scrape:pricing:blueoptics", "scrape:pricing:fiber24", "scrape:pricing:tscom", "scrape:pricing:skylane", "scrape:pricing:ascentoptics", "scrape:pricing:gaotek", // ── Catalog scrapers (every 2h) ──────────────────────────────────── "scrape:pricing:flexoptix", // ── Manufacturer catalogs (every 8h, no prices) ──────────────────── "scrape:catalog:smartoptics", "scrape:catalog:hubersuhner", "scrape:catalog:eoptolink", // ── Vendor lists ─────────────────────────────────────────────────── "scrape:vendors:flexoptix", "scrape:vendors:flexoptix-supported", // ── Compatibility (every 12h) ────────────────────────────────────── "scrape:compat:flexoptix", "scrape:compat:cisco", "scrape:compat:juniper", "scrape:compat:sonic", "scrape:compat:ufispace", "scrape:compat:edgecore", // ── Switch enrichment (every 12h) ───────────────────────────────── "scrape:assets:switches", // ── Switch og:image fetcher (daily, after switch-assets) ────────── "scrape:images:switches", // ── Playwright image fetcher for bot-blocked vendors (every 3d) ─── "scrape:images:switches:playwright", // ── eBay enrichment (every 6h) ──────────────────────────────────── "enrich:ebay-transceivers", "enrich:ebay-switches", // ── Intelligence & community (every 6h) ─────────────────────────── "scrape:market-intel", "scrape:nog-talks", "scrape:community-issues", "scrape:datasheet-links", "scrape:news", "scrape:faq", "scrape:docs", // ── Compute (every 4h, after pricing waves) ─────────────────────── "compute:abc", "compute:reorder-signals", // ── New form-factor coverage scrapers (every 8h) ────────────────── "scrape:pricing:comms-express", "scrape:pricing:router-switch", "scrape:pricing:multimode-inc", "scrape:pricing:optictransceiver", "scrape:pricing:wiitek", // ── Fetch-based catalog+pricing scrapers (every 2h) ────────────── "scrape:pricing:naddod", "scrape:pricing:qsfptek", "scrape:pricing:addon", "scrape:pricing:fibermall", "scrape:pricing:vcelink", "scrape:pricing:opticsbay", // ── OEM Reference Prices (Mouser API, once daily) ───────────────── "scrape:pricing:mouser-oem", // ── Prediction Signal Scrapers (new) ────────────────────────────── "scrape:signals:sec-edgar", "scrape:signals:github", "scrape:signals:ebay-velocity", "scrape:signals:ai-clusters", "scrape:signals:distributor-leads", "scrape:signals:standards", // ── Forecast Engine ─────────────────────────────────────────────── "compute:forecast", // ── Hype Cycle Engine (Norton-Bass, daily) ──────────────────────── "compute:hype-cycle", // ── Sync ────────────────────────────────────────────────────────── "sync:nas", // ── Health Monitoring ───────────────────────────────────────────── "monitor:scraper-health", // ── Price denormalization refresh ───────────────────────────────── "compute:price-denorm", // ── Verification Reconciliation ─────────────────────────────────── "maintenance:reconcile-verification", // ── Competitor Equivalence Matching ─────────────────────────────── "maintenance:find-equivalences", // ── Re-Research approved equivalences ───────────────────────────── "maintenance:re-research-equivalences", ]; for (const q of queues) { await boss.createQueue(q).catch(() => { /* already exists */ }); } // ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════ // ALL PRICING SCRAPERS — 24/7, every 2h, staggered by 10min // Goal: complete competitor coverage, no gaps, database always fresh // ══════════════════════════════════════════════════════════════════════ // Playwright scrapers (resource-heavy) — every 2h, 10min apart await boss.schedule("scrape:pricing:fs", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 }); await boss.schedule("scrape:pricing:10gtek", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:atgbics", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:prolabs", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // Fetch/Cheerio scrapers (lightweight) — every 2h, 5min apart await boss.schedule("scrape:pricing:fluxlight", "0 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:gbics", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:optcore", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:champion-one", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:sfpcables", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:blueoptics", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:fiber24", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:tscom", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:skylane", "40 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:ascentoptics", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:gaotek", "50 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // Form-factor coverage scrapers — every 2h await boss.schedule("scrape:pricing:comms-express", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); await boss.schedule("scrape:pricing:router-switch", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); await boss.schedule("scrape:pricing:multimode-inc", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // Fetch-based scrapers running on Erik — every 2h, end-of-cycle slots await boss.schedule("scrape:pricing:naddod", "48 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:qsfptek", "52 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:addon", "55 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:fibermall", "57 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:vcelink", "3 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:opticsbay", "7 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // OEM reference prices via Mouser API — once daily at 03:00 (slow: 2s/PID × 475 PIDs ≈ 16min) // Requires MOUSER_API_KEY env var (free at mouser.com/api-hub) await boss.schedule("scrape:pricing:mouser-oem", "0 3 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // FLEXOPTIX CATALOG — every 2h (primary price source) // ══════════════════════════════════════════════════════════════════════ await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // MANUFACTURER CATALOGS — every 4h (product data, no prices) // ══════════════════════════════════════════════════════════════════════ await boss.schedule("scrape:catalog:smartoptics", "10 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:hubersuhner", "25 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:eoptolink", "40 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // OEM vendor seed catalogs — daily at 04:00+ (stable data, rarely changes) await boss.schedule("scrape:catalog:arista-oem", "0 4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:juniper-oem", "15 4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:hpe-aruba-oem", "30 4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:nokia-oem", "45 4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:huawei-oem", "0 5 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:dell-emc-oem", "15 5 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:extreme-oem", "30 5 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // VENDOR LISTS — every 12h // ══════════════════════════════════════════════════════════════════════ await boss.schedule("scrape:vendors:flexoptix", "0 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 }); await boss.schedule("scrape:vendors:flexoptix-supported", "15 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 }); // ══════════════════════════════════════════════════════════════════════ // COMPATIBILITY MATRICES — every 12h // ══════════════════════════════════════════════════════════════════════ // Flexoptix compatibility — every 24h at 09:00 (after both switch-assets + image-fetcher) await boss.schedule("scrape:compat:flexoptix", "0 9 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("scrape:compat:cisco", "0 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:compat:juniper", "15 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:compat:sonic", "30 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:compat:ufispace", "45 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 }); await boss.schedule("scrape:compat:edgecore", "55 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 }); // ══════════════════════════════════════════════════════════════════════ // SWITCH ASSETS — every 12h // ══════════════════════════════════════════════════════════════════════ await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); // og:image fetcher: daily at 08:30, after switch-assets completes at 07:30 await boss.schedule("scrape:images:switches", "30 8 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); // Playwright image scraper for bot-blocked vendors (Arista/Dell/Edgecore/Fortinet/Extreme) // Every 3 days at 09:00 — Playwright is slower and heavier than plain HTTP await boss.schedule("scrape:images:switches:playwright", "0 9 */3 * *", {}, { retryLimit: 1, expireInSeconds: 10800 }); // ══════════════════════════════════════════════════════════════════════ // EBAY ENRICHMENT — every 6h // ══════════════════════════════════════════════════════════════════════ await boss.schedule("enrich:ebay-transceivers", "0 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 }); await boss.schedule("enrich:ebay-switches", "30 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 }); // ══════════════════════════════════════════════════════════════════════ // INTELLIGENCE & COMMUNITY — every 6h // ══════════════════════════════════════════════════════════════════════ await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // NOG conference talks — weekly on Mondays 06:00 UTC await boss.schedule("scrape:nog-talks", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 7200 }); await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 }); await boss.schedule("scrape:faq", "40 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:docs", "50 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // COMPUTE JOBS — every 4h (after pricing waves settle) // ══════════════════════════════════════════════════════════════════════ await boss.schedule("compute:abc", "50 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 }); await boss.schedule("compute:reorder-signals", "55 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 }); // Price denorm refresh: daily 05:30 UTC after overnight scraping waves settle await boss.schedule("compute:price-denorm", "30 5 * * *", {}, { retryLimit: 1, expireInSeconds: 600 }); // ══════════════════════════════════════════════════════════════════════ // PREDICTION SIGNAL SCRAPERS // ══════════════════════════════════════════════════════════════════════ // SEC EDGAR CapEx — weekly Monday 06:00 (filings don't change that fast) await boss.schedule("scrape:signals:sec-edgar", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 3600 }); // GitHub signals — weekly Sunday 05:00 await boss.schedule("scrape:signals:github", "0 5 * * 0", {}, { retryLimit: 2, expireInSeconds: 7200 }); // eBay sold velocity — every 12h (fast-moving market signal) await boss.schedule("scrape:signals:ebay-velocity", "0 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // AI cluster RSS feeds — every 4h (news moves fast) await boss.schedule("scrape:signals:ai-clusters", "10 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 }); // Distributor lead times — daily 03:30 (stock changes overnight) await boss.schedule("scrape:signals:distributor-leads","30 3 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // Standards tracker — weekly Wednesday 04:00 (standards move slowly) await boss.schedule("scrape:signals:standards", "0 4 * * 3", {}, { retryLimit: 1, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // FORECAST ENGINE — daily at 08:00 (after all nightly scrapers done) // ══════════════════════════════════════════════════════════════════════ await boss.schedule("compute:forecast", "0 8 * * *", {}, { retryLimit: 2, expireInSeconds: 600 }); // Hype Cycle Engine runs daily at 04:30 (after Mouser OEM scraper at 03:00) await boss.schedule("compute:hype-cycle", "30 4 * * *", {}, { retryLimit: 1, expireInSeconds: 600 }); // ══════════════════════════════════════════════════════════════════════ // NAS SYNC — nightly // ══════════════════════════════════════════════════════════════════════ await boss.schedule("sync:nas", "55 7 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 }); // Health check: every 3h — warns if any vendor has no new prices recently await boss.schedule("monitor:scraper-health", "17 */3 * * *", {}, { retryLimit: 1, expireInSeconds: 600 }); // Verification reconciliation: nightly at 01:00 UTC // Resets competitor_verified/fully_verified for any transceiver that no longer // has a real non-Flexoptix price in the last 30 days — prevents stale ★ 100% badges await boss.schedule("maintenance:reconcile-verification", "0 1 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 }); // Equivalence matching: nightly at 02:00 UTC (after reconcile) await boss.schedule("maintenance:find-equivalences", "0 2 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); // Re-research approved equivalences: daily at 03:00 UTC, processes 200 items per run await boss.schedule("maintenance:re-research-equivalences", "0 3 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); console.log("All schedules registered — 24/7 continuous scraping (60 jobs)"); } export async function registerWorkers(boss: PgBoss): Promise { // Lazy-load all scrapers const { scrapeFs } = await import("./scrapers/fs-com"); const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); const { scrapeSmartOptics } = await import("./scrapers/smartoptics"); const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner"); const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence"); const { scrapeNews } = await import("./scrapers/news"); const { scrape10Gtek } = await import("./scrapers/tenGtek"); const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog"); const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors"); const { scrapeAtgbics } = await import("./scrapers/atgbics"); const { scrapeProLabs } = await import("./scrapers/prolabs"); const { scrapeJuniperHct } = await import("./scrapers/juniper-hct"); const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl"); const { scrapeUfiSpace } = await import("./scrapers/ufispace"); const { scrapeEdgecore } = await import("./scrapers/edgecore"); const { scrapeSwitchAssets } = await import("./scrapers/switch-assets"); const { fetchSwitchImages } = await import("./scrapers/switch-image-fetcher"); const { fetchSwitchImagesPlaywright } = await import("./scrapers/switch-image-playwright"); const { scrapeFlexoptixCompatibility } = await import("./scrapers/flexoptix-compat"); // ── Prediction signal scrapers ──────────────────────────────────────── const { scrapeSecEdgar } = await import("./scrapers/sec-edgar"); const { scrapeGithubSignals } = await import("./scrapers/github-signals"); const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity"); const { scrapeAiClusters } = await import("./scrapers/ai-clusters"); const { scrapeDistributorLeads }= await import("./scrapers/distributor-leads"); const { scrapeStandardsTracker }= await import("./scrapers/standards-tracker"); const { runForecastEngine } = await import("./utils/forecast-engine"); // ── Playwright scrapers ─────────────────────────────────────────────── await boss.work("scrape:pricing:fs", async () => { // FS.com uses Playwright + Cloudflare bypass. On datacenter servers the // datacenter IP is blocked by Cloudflare WAF. Set SKIP_FS_SCRAPER=true to // skip on Erik; the Mac launchd cron handles FS.com from a residential IP. if (process.env["SKIP_FS_SCRAPER"] === "true") { console.log(`[${new Date().toISOString()}] FS.com pricing: SKIPPED (SKIP_FS_SCRAPER=true)`); return; } console.log(`[${new Date().toISOString()}] Running: FS.com pricing`); await scrapeFs(); }); await boss.work("scrape:pricing:10gtek", async () => { console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`); await scrape10Gtek(); }); await boss.work("scrape:pricing:atgbics", async () => { console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`); await scrapeAtgbics(); }); await boss.work("scrape:pricing:prolabs", async () => { console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`); await scrapeProLabs(); }); // ── Lightweight fetch/cheerio scrapers ─────────────────────────────── await boss.work("scrape:pricing:fluxlight", async () => { console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`); const { scrapeFluxlight } = await import("./scrapers/fluxlight"); await scrapeFluxlight(); }); await boss.work("scrape:pricing:gbics", async () => { console.log(`[${new Date().toISOString()}] Running: GBICS pricing`); const { scrapeGbics } = await import("./scrapers/gbics"); await scrapeGbics(); }); await boss.work("scrape:pricing:optcore", async () => { // Optcore.net WP REST API is blocked by Cloudflare WAF for Erik's datacenter // IP (82.165.222.127). Set SKIP_OPTCORE_SCRAPER=true to suppress the wasted // run. A residential IP (Mac launchd) would be needed to scrape Optcore. if (process.env["SKIP_OPTCORE_SCRAPER"] === "true") { console.log(`[${new Date().toISOString()}] Optcore pricing: SKIPPED (SKIP_OPTCORE_SCRAPER=true)`); return; } console.log(`[${new Date().toISOString()}] Running: Optcore pricing`); const { scrapeOptcore } = await import("./scrapers/optcore"); await scrapeOptcore(); }); await boss.work("scrape:pricing:champion-one", async () => { console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`); const { scrapeChampionOne } = await import("./scrapers/champion-one"); await scrapeChampionOne(); }); await boss.work("scrape:pricing:sfpcables", async () => { console.log(`[${new Date().toISOString()}] Running: SFPcables pricing`); const { scrapeSfpCables } = await import("./scrapers/sfpcables"); await scrapeSfpCables(); }); await boss.work("scrape:pricing:blueoptics", async () => { console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`); const { scrapeBlueOptics } = await import("./scrapers/blueoptics"); await scrapeBlueOptics(); }); await boss.work("scrape:pricing:fiber24", async () => { console.log(`[${new Date().toISOString()}] Running: Fiber24 pricing`); const { scrapeFiber24 } = await import("./scrapers/fiber24"); await scrapeFiber24(); }); await boss.work("scrape:pricing:tscom", async () => { console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`); const { scrapeTsCom } = await import("./scrapers/tscom"); await scrapeTsCom(); }); await boss.work("scrape:pricing:skylane", async () => { console.log(`[${new Date().toISOString()}] Running: Skylane pricing`); const { scrapeSkylane } = await import("./scrapers/skylane"); await scrapeSkylane(); }); await boss.work("scrape:pricing:ascentoptics", async () => { console.log(`[${new Date().toISOString()}] Running: Ascent Optics pricing`); const { scrapeAscentOptics } = await import("./scrapers/ascentoptics"); await scrapeAscentOptics(); }); await boss.work("scrape:pricing:gaotek", async () => { console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`); const { scrapeGaoTek } = await import("./scrapers/gaotek"); await scrapeGaoTek(); }); // ── Catalog scrapers ────────────────────────────────────────────────── await boss.work("scrape:pricing:flexoptix", async () => { console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog`); await scrapeFlexoptixCatalog(); }); await boss.work("scrape:catalog:smartoptics", async () => { console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`); await scrapeSmartOptics(); }); await boss.work("scrape:catalog:hubersuhner", async () => { console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`); await scrapeHuberSuhner(); }); await boss.work("scrape:catalog:eoptolink", async () => { console.log(`[${new Date().toISOString()}] Running: Eoptolink OEM catalog`); const { scrapeEoptolink } = await import("./scrapers/eoptolink"); await scrapeEoptolink(); }); await boss.work("scrape:catalog:arista-oem", async () => { console.log(`[${new Date().toISOString()}] Running: Arista OEM catalog seed`); const { scrapeAristaOem } = await import("./scrapers/arista-oem"); await scrapeAristaOem(); }); await boss.work("scrape:catalog:juniper-oem", async () => { console.log(`[${new Date().toISOString()}] Running: Juniper OEM catalog seed`); const { scrapeJuniperOem } = await import("./scrapers/juniper-oem"); await scrapeJuniperOem(); }); await boss.work("scrape:catalog:hpe-aruba-oem", async () => { console.log(`[${new Date().toISOString()}] Running: HPE/Aruba OEM catalog seed`); const { scrapeHpeArubaOem } = await import("./scrapers/hpe-aruba-oem"); await scrapeHpeArubaOem(); }); await boss.work("scrape:catalog:nokia-oem", async () => { console.log(`[${new Date().toISOString()}] Running: Nokia OEM catalog seed`); const { scrapeNokiaOem } = await import("./scrapers/nokia-oem"); await scrapeNokiaOem(); }); await boss.work("scrape:catalog:huawei-oem", async () => { console.log(`[${new Date().toISOString()}] Running: Huawei OEM catalog seed`); const { scrapeHuaweiOem } = await import("./scrapers/huawei-oem"); await scrapeHuaweiOem(); }); await boss.work("scrape:catalog:dell-emc-oem", async () => { console.log(`[${new Date().toISOString()}] Running: Dell EMC OEM catalog seed`); const { scrapeDellEmcOem } = await import("./scrapers/dell-emc-oem"); await scrapeDellEmcOem(); }); await boss.work("scrape:catalog:extreme-oem", async () => { console.log(`[${new Date().toISOString()}] Running: Extreme Networks OEM catalog seed`); const { scrapeExtremeOem } = await import("./scrapers/extreme-oem"); await scrapeExtremeOem(); }); // ── Vendor lists ────────────────────────────────────────────────────── await boss.work("scrape:vendors:flexoptix", async () => { console.log(`[${new Date().toISOString()}] Running: Flexoptix vendor list`); await scrapeFlexoptixVendors(); }); await boss.work("scrape:vendors:flexoptix-supported", async () => { console.log(`[${new Date().toISOString()}] Running: Flexoptix supported vendors`); await seedFlexoptixVendors(); }); // ── Compatibility scrapers ──────────────────────────────────────────── await boss.work("scrape:compat:flexoptix", async () => { console.log(`[${new Date().toISOString()}] Running: Flexoptix compatibility mapping`); if (!isLoadAcceptable(2.5)) { console.warn(`[${new Date().toISOString()}] ⚠ Load too high — skipping Flexoptix compat scrape`); return; } await scrapeFlexoptixCompatibility(); }); await boss.work("scrape:compat:cisco", async () => { console.log(`[${new Date().toISOString()}] Running: Cisco TMG compatibility`); await scrapeCiscoTmg(); }); await boss.work("scrape:compat:juniper", async () => { console.log(`[${new Date().toISOString()}] Running: Juniper HCT compatibility`); await scrapeJuniperHct(); }); await boss.work("scrape:compat:sonic", async () => { console.log(`[${new Date().toISOString()}] Running: SONiC HCL compatibility`); await scrapeSonicHcl(); }); await boss.work("scrape:compat:ufispace", async () => { console.log(`[${new Date().toISOString()}] Running: Ufispace switch data`); await scrapeUfiSpace(); }); await boss.work("scrape:compat:edgecore", async () => { console.log(`[${new Date().toISOString()}] Running: Edgecore switch data`); await scrapeEdgecore(); }); // ── Switch assets ───────────────────────────────────────────────────── await boss.work("scrape:assets:switches", async () => { console.log(`[${new Date().toISOString()}] Running: Switch assets enrichment`); await scrapeSwitchAssets(); }); await boss.work("scrape:images:switches", async () => { console.log(`[${new Date().toISOString()}] Running: Switch og:image fetcher`); if (!isLoadAcceptable(2.5)) { console.warn(`[${new Date().toISOString()}] ⚠ Load too high — skipping switch image fetch`); return; } await fetchSwitchImages(); }); await boss.work("scrape:images:switches:playwright", async () => { console.log(`[${new Date().toISOString()}] Running: Switch image fetcher (Playwright — bot-blocked vendors)`); if (!isLoadAcceptable(2.0)) { console.warn(`[${new Date().toISOString()}] ⚠ Load too high — skipping Playwright image fetch`); return; } await fetchSwitchImagesPlaywright(); }); // ── eBay enrichment ─────────────────────────────────────────────────── await boss.work("enrich:ebay-transceivers", async () => { console.log(`[${new Date().toISOString()}] Running: eBay transceiver pricing`); const { enrichTransceiversFromEbay } = await import("./scrapers/ebay-enricher"); await enrichTransceiversFromEbay(100); }); await boss.work("enrich:ebay-switches", async () => { console.log(`[${new Date().toISOString()}] Running: eBay switch enrichment`); const { enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher"); await enrichSwitchesFromEbay(30); }); // ── Intelligence & community ────────────────────────────────────────── await boss.work("scrape:market-intel", async () => { console.log(`[${new Date().toISOString()}] Running: Market intelligence`); await scrapeMarketIntelligence(); }); await boss.work("scrape:nog-talks", async () => { console.log(`[${new Date().toISOString()}] Running: NOG conference talks`); const { scrapeNogTalks } = await import("./scrapers/nog-talks"); await scrapeNogTalks(); }); await boss.work("scrape:community-issues", async () => { console.log(`[${new Date().toISOString()}] Running: Community issues`); const { scrapeAllSwitchIssues, scrapeTransceiverCompatIssues } = await import("./scrapers/community-issues"); await scrapeAllSwitchIssues(30); await scrapeTransceiverCompatIssues(15); }); await boss.work("scrape:datasheet-links", async () => { console.log(`[${new Date().toISOString()}] Running: Datasheet links`); const { findAndSeedDatasheetLinks } = await import("./scrapers/community-issues"); await findAndSeedDatasheetLinks(50); }); await boss.work("scrape:news", async () => { console.log(`[${new Date().toISOString()}] Running: News scraper`); await scrapeNews(); }); await boss.work("scrape:faq", async () => { console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`); }); await boss.work("scrape:docs", async () => { console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`); }); // ── Compute jobs ────────────────────────────────────────────────────── await boss.work("compute:abc", async () => { console.log(`[${new Date().toISOString()}] Computing: ABC classification`); const { computeAbcClassification } = await import("./scrapers/market-intelligence"); await computeAbcClassification(); }); await boss.work("compute:reorder-signals", async () => { console.log(`[${new Date().toISOString()}] Computing: Reorder signals`); const { computeReorderSignals } = await import("./scrapers/market-intelligence"); await computeReorderSignals(); }); // ── NAS sync ────────────────────────────────────────────────────────── await boss.work("sync:nas", async () => { console.log(`[${new Date().toISOString()}] Running: NAS sync to Fearghas`); const { runNightlyNasSync } = await import("./utils/nas-sync"); await runNightlyNasSync(); }); // ── Prediction signal scrapers ──────────────────────────────────────── await boss.work("scrape:signals:sec-edgar", async () => { console.log(`[${new Date().toISOString()}] Running: SEC EDGAR CapEx`); await scrapeSecEdgar(); }); await boss.work("scrape:signals:github", async () => { console.log(`[${new Date().toISOString()}] Running: GitHub tech signals`); await scrapeGithubSignals(); }); await boss.work("scrape:signals:ebay-velocity", async () => { console.log(`[${new Date().toISOString()}] Running: eBay sold velocity`); await scrapeEbayVelocity(); }); await boss.work("scrape:signals:ai-clusters", async () => { console.log(`[${new Date().toISOString()}] Running: AI cluster announcements`); await scrapeAiClusters(); }); await boss.work("scrape:signals:distributor-leads", async () => { console.log(`[${new Date().toISOString()}] Running: Distributor lead times`); await scrapeDistributorLeads(); }); await boss.work("scrape:signals:standards", async () => { console.log(`[${new Date().toISOString()}] Running: Standards tracker`); await scrapeStandardsTracker(); }); // ── Forecast engine ─────────────────────────────────────────────────── await boss.work("compute:forecast", async () => { console.log(`[${new Date().toISOString()}] Running: Forecast engine`); await runForecastEngine(); }); // ── Hype Cycle Engine (Norton-Bass diffusion model) ─────────────────── await boss.work("compute:hype-cycle", async () => { console.log(`[${new Date().toISOString()}] Running: Hype Cycle Engine (Norton-Bass)`); const { computeHypeCycle } = await import("./utils/hype-cycle-engine"); await computeHypeCycle(); }); // ── Form-factor coverage scrapers ───────────────────────────────────── await boss.work("scrape:pricing:comms-express", async () => { console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`); const { scrapeCommsExpress } = await import("./scrapers/comms-express"); await scrapeCommsExpress(); }); await boss.work("scrape:pricing:router-switch", async () => { console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`); const { scrapeRouterSwitch } = await import("./scrapers/router-switch"); await scrapeRouterSwitch(); }); await boss.work("scrape:pricing:multimode-inc", async () => { console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`); const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc"); await scrapeMultimodeInc(); }); await boss.work("scrape:pricing:optictransceiver", async () => { console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`); const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver"); await scrapeOpticTransceiver(); }); await boss.work("scrape:pricing:wiitek", async () => { console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`); const { scrapeWiitek } = await import("./scrapers/wiitek"); await scrapeWiitek(); }); await boss.work("scrape:pricing:naddod", async () => { console.log(`[${new Date().toISOString()}] Running: NADDOD pricing`); const { scrapeNaddod } = await import("./scrapers/naddod"); await scrapeNaddod(); }); await boss.work("scrape:pricing:qsfptek", async () => { console.log(`[${new Date().toISOString()}] Running: QSFPTEK pricing`); const { scrapeQsfptek } = await import("./scrapers/qsfptek"); await scrapeQsfptek(); }); await boss.work("scrape:pricing:addon", async () => { console.log(`[${new Date().toISOString()}] Running: AddOn Networks pricing`); const { scrapeAddonNetworks } = await import("./scrapers/addon-networks"); await scrapeAddonNetworks(); }); await boss.work("scrape:pricing:fibermall", async () => { console.log(`[${new Date().toISOString()}] Running: FiberMall pricing`); const { scrapeFiberMall } = await import("./scrapers/fibermall"); await scrapeFiberMall(); }); await boss.work("scrape:pricing:vcelink", async () => { console.log(`[${new Date().toISOString()}] Running: Vcelink pricing`); const { scrapeVcelink } = await import("./scrapers/vcelink"); await scrapeVcelink(); }); await boss.work("scrape:pricing:opticsbay", async () => { console.log(`[${new Date().toISOString()}] Running: OpticsBay pricing`); const { scrapeOpticsBay } = await import("./scrapers/opticsbay"); await scrapeOpticsBay(); }); // ── Mouser OEM reference prices ──────────────────────────────────────── await boss.work("scrape:pricing:mouser-oem", async () => { console.log(`[${new Date().toISOString()}] Running: Mouser OEM reference prices`); if (!process.env["MOUSER_API_KEY"]) { console.warn(" [mouser-oem] Skipping — MOUSER_API_KEY not set"); return; } const { scrapeMouser } = await import("./scrapers/mouser"); await scrapeMouser(); }); // ── Price denormalization refresh ───────────────────────────────────────── // Refreshes street_price_usd / price_verified_eur on the transceivers table from // the price_observations hypertable. Without this, denormalized prices go stale // even when scrapers are collecting new observations. await boss.work("compute:price-denorm", async () => { const { pool } = await import("./utils/db"); const ts = new Date().toISOString(); console.log(`[${ts}] Running: Price denormalization refresh`); const result = await pool.query(` UPDATE transceivers t SET price_verified_eur = sub.price_eur, street_price_usd = sub.price_usd, updated_at = NOW() FROM ( SELECT po.transceiver_id, MAX(po.price) FILTER (WHERE po.currency = 'EUR') AS price_eur, MAX(po.price) FILTER (WHERE po.currency = 'USD') AS price_usd FROM price_observations po WHERE po.time > NOW() - INTERVAL '180 days' AND po.price > 0 GROUP BY po.transceiver_id ) sub WHERE t.id = sub.transceiver_id AND (sub.price_eur IS NOT NULL OR sub.price_usd IS NOT NULL) `); console.log(`[price-denorm] refreshed ${result.rowCount} transceivers`); }); // ── Health monitor ────────────────────────────────────────────────────── await boss.work("monitor:scraper-health", async () => { const { pool } = await import("./utils/db"); // Vendors we expect to see prices from regularly. // Mapped: display name → pg-boss job name prefix (for last-run lookup). const EXPECTED_VENDORS: Array<{ name: string; jobName: string }> = [ { name: "FiberMall", jobName: "scrape:pricing:fibermall" }, { name: "QSFPTEK", jobName: "scrape:pricing:qsfptek" }, { name: "Flexoptix", jobName: "scrape:pricing:flexoptix" }, // FS.COM is skipped on Erik (SKIP_FS_SCRAPER=true) — Mac launchd handles it. // Exclude from monitor so stale last_seen doesn't trigger false positives. { name: "10Gtek", jobName: "scrape:pricing:10gtek" }, { name: "ATGBICS", jobName: "scrape:pricing:atgbics" }, { name: "GBICS", jobName: "scrape:pricing:gbics" }, { name: "SFPcables", jobName: "scrape:pricing:sfpcables" }, { name: "NADDOD", jobName: "scrape:pricing:naddod" }, ]; const vendorNames = EXPECTED_VENDORS.map((v) => v.name); const jobNames = EXPECTED_VENDORS.map((v) => v.jobName); // Price observation recency per vendor const priceResult = await pool.query(` SELECT v.name, SUM(CASE WHEN po.time > NOW() - INTERVAL '6 hours' THEN 1 ELSE 0 END) AS prices_6h, MAX(po.time) AS last_seen, EXTRACT(EPOCH FROM (NOW() - MAX(po.time))) / 3600.0 AS hours_since FROM vendors v LEFT JOIN price_observations po ON po.source_vendor_id = v.id WHERE v.name = ANY($1) GROUP BY v.name ORDER BY last_seen ASC NULLS FIRST `, [vendorNames]); // Last successful pg-boss job per vendor scraper (within last 26h — covers any // 2h-scheduled job at least once plus 2h slack for daemon restart delays). // Also pull the most recent failed job so we can distinguish "running but // prices stable" from "job consistently failing". const jobResult = await pool.query(` SELECT DISTINCT ON (name) name, state, completed_on FROM pgboss.job WHERE name = ANY($1) AND created_on > NOW() - INTERVAL '26 hours' ORDER BY name, created_on DESC `, [jobNames]); const jobMap = new Map(); for (const row of jobResult.rows) { jobMap.set(row.name as string, { state: row.state as string, completed_on: row.completed_on as Date | null }); } // Last COMPLETED job per vendor (to know when the job last ran successfully) const completedResult = await pool.query(` SELECT DISTINCT ON (name) name, completed_on AS last_completed FROM pgboss.job WHERE name = ANY($1) AND state = 'completed' AND created_on > NOW() - INTERVAL '26 hours' ORDER BY name, completed_on DESC `, [jobNames]); const completedMap = new Map(); for (const row of completedResult.rows) { if (row.last_completed) completedMap.set(row.name as string, new Date(row.last_completed as string)); } // Thresholds for alerting: // CRITICAL (🔴): no job completed in 26h AND last price > 168h (7 days) // — scraper genuinely not running or consistently failing // WARNING (🟡): no job completed in 26h AND last price > 48h // — scraper may be broken but recently seen // STABLE (✅): job completed in last 26h AND vendor has historical prices // — prices unchanged (hash dedup), scraper is healthy const CRITICAL_HOURS = 168; const WARN_HOURS = 48; const critical: string[] = []; const warnings: string[] = []; const stable: string[] = []; for (const row of priceResult.rows) { const h = parseFloat(row.hours_since ?? "9999"); const n = parseInt(row.prices_6h ?? "0", 10); if (n > 0) continue; // new prices written → definitely healthy const lastStr = row.last_seen ? `last price ${h.toFixed(1)}h ago (${new Date(row.last_seen as string).toISOString().slice(0, 16)})` : "NEVER scraped"; const vendor = EXPECTED_VENDORS.find((v) => v.name === row.name); const jobInfo = vendor ? jobMap.get(vendor.jobName) : undefined; const lastCompleted = vendor ? completedMap.get(vendor.jobName) : undefined; const jobStr = jobInfo ? ` | job=${jobInfo.state} at ${jobInfo.completed_on ? new Date(jobInfo.completed_on).toISOString().slice(11, 16) : "?"}` : " | job=not run in 26h"; // If the job completed successfully in the last 26h AND the vendor has // historical prices, prices are just stable (hash dedup) — not an outage. const jobRunningOk = !!lastCompleted && row.last_seen; if (jobRunningOk) { stable.push(`✅ ${row.name}: prices stable (${h.toFixed(1)}h unchanged, job OK)${jobStr}`); continue; } if (!row.last_seen || h > CRITICAL_HOURS) { critical.push(`🔴 ${row.name}: ${lastStr}${jobStr}`); } else if (h > WARN_HOURS) { warnings.push(`🟡 ${row.name}: ${lastStr}${jobStr}`); } else { stable.push(`✅ ${row.name}: prices stable (${h.toFixed(1)}h unchanged)${jobStr}`); } } if (critical.length > 0 || warnings.length > 0) { if (critical.length > 0) { console.error("=== 🔴 SCRAPER CRITICAL — vendors with no prices for 7+ days ==="); for (const p of critical) console.error(p); } if (warnings.length > 0) { console.warn("=== 🟡 SCRAPER WARNING — vendors with stale prices (48h+) ==="); for (const p of warnings) console.warn(p); } console.error("=== Check: pm2 logs tip-scraper-daemon ==="); } else { const activeCount = EXPECTED_VENDORS.length - stable.length; if (stable.length > 0) { console.log(`[monitor] Scraper health OK — ${activeCount} vendors active, ${stable.length} stable (no price changes)`); for (const s of stable) console.log(` ${s}`); } else { console.log(`[monitor] Scraper health OK — all ${EXPECTED_VENDORS.length} vendors active in last 6h`); } } }); // ── Verification reconciliation ───────────────────────────────────────── await boss.work("maintenance:reconcile-verification", async () => { const { pool } = await import("./utils/db"); // 1. Reset competitor_verified=false for products with no non-Flexoptix price in last 30 days const resetComp = await pool.query(` UPDATE transceivers t SET competitor_verified = false, competitor_verified_at = NULL WHERE competitor_verified = true AND NOT EXISTS ( SELECT 1 FROM price_observations po JOIN vendors v ON po.source_vendor_id = v.id WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '30 days' AND UPPER(v.name) NOT LIKE '%FLEXOPTIX%' ) `); // 2. Reset fully_verified=false for products that lost competitor_verified const resetFull = await pool.query(` UPDATE transceivers SET fully_verified = false, fully_verified_at = NULL WHERE fully_verified = true AND (competitor_verified = false OR price_verified = false OR image_verified = false OR details_verified = false) `); // 3. Set fully_verified=true for products that now meet all 4 criteria const setFull = await pool.query(` UPDATE transceivers SET fully_verified = true, fully_verified_at = COALESCE(fully_verified_at, NOW()) WHERE competitor_verified = true AND price_verified = true AND image_verified = true AND details_verified = true AND fully_verified = false `); console.log( `[reconcile] competitor_verified reset: ${resetComp.rowCount}, ` + `fully_verified cleared: ${resetFull.rowCount}, ` + `fully_verified earned: ${setFull.rowCount}` ); }); // ── Equivalence matching ──────────────────────────────────────────────── // Matches Flexoptix SKUs to technically equivalent competitor products by specs. // Confidence scoring: standard_name(35) + form_factor(25) + speed_gbps(20) + // fiber_type(10) + reach±25%(10) = 100 pts max // ≥0.85 → auto_approved + competitor_verified=true // 0.50–0.84 → pending (Manual Review queue in dashboard) // <0.50 → skipped await boss.work("maintenance:find-equivalences", async () => { const { pool } = await import("./utils/db"); const ts = new Date().toISOString(); console.log(`[${ts}] Running: Equivalence matching`); // Find all Flexoptix transceivers that are NOT yet competitor_verified const flexResult = await pool.query(` SELECT t.id, t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters, t.wavelengths, t.connector, t.wdm_type, t.coherent FROM transceivers t JOIN vendors v ON v.id = t.vendor_id WHERE UPPER(v.name) LIKE '%FLEXOPTIX%' AND t.competitor_verified = false `); let autoApproved = 0; let queued = 0; let skipped = 0; for (const fx of flexResult.rows) { // Find competitor transceivers with recent price observations and matching specs const candidates = await pool.query(` SELECT t.id AS competitor_id, t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters, t.wavelengths, t.connector, v.name AS vendor_name, MAX(po.time) AS last_price, COUNT(*) AS price_count FROM transceivers t JOIN vendors v ON v.id = t.vendor_id JOIN price_observations po ON po.transceiver_id = t.id WHERE UPPER(v.name) NOT LIKE '%FLEXOPTIX%' AND po.time > NOW() - INTERVAL '30 days' AND t.form_factor = $1 AND t.speed_gbps = $2 AND t.id != $3 GROUP BY t.id, t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters, t.wavelengths, t.connector, v.name `, [fx.form_factor, fx.speed_gbps, fx.id]); for (const cand of candidates.rows) { // Confidence scoring // Max points: form_factor(25) + speed_gbps(20) + standard_name(30) + // wavelength_nm(20) + fiber_type(10) + reach(10) = 115 let score = 0; const basis: string[] = []; // form_factor already matched (pre-filter), award points score += 25; basis.push("form_factor"); // speed_gbps already matched (pre-filter) score += 20; basis.push("speed_gbps"); // standard_name match (strong signal — e.g. "10GBASE-LR") if (fx.standard_name && cand.standard_name && fx.standard_name.trim().toUpperCase() === cand.standard_name.trim().toUpperCase()) { score += 30; basis.push("standard_name"); } // wavelength match — extract first numeric nm value and compare within ±15nm // "wavelengths" is text: "1310 nm", "850nm", "1270/1290/1310/1330 nm" etc. const extractNm = (w: string | null): number | null => { if (!w) return null; const m = w.match(/(\d{3,4})/); return m ? parseInt(m[1], 10) : null; }; const fxNm = extractNm(fx.wavelengths); const candNm = extractNm(cand.wavelengths); if (fxNm !== null && candNm !== null) { if (Math.abs(fxNm - candNm) <= 15) { score += 20; basis.push(`wavelength_${fxNm}nm`); } else { score -= 20; // hard penalize wrong wavelength (1310 vs 1550 = completely different product) } } // fiber_type match (SMF vs MMF — critical) if (fx.fiber_type && cand.fiber_type) { if (fx.fiber_type.trim().toUpperCase() === cand.fiber_type.trim().toUpperCase()) { score += 10; basis.push("fiber_type"); } else { score -= 15; // SMF vs MMF = wrong product } } // reach within ±25% if (fx.reach_meters && cand.reach_meters && fx.reach_meters > 0 && cand.reach_meters > 0) { const diff = Math.abs(fx.reach_meters - cand.reach_meters); const tolerance = Math.max(fx.reach_meters, 1) * 0.25; if (diff <= tolerance) { score += 10; basis.push("reach"); } else { score -= 15; // penalize mismatched reach } } else if (!fx.reach_meters && !cand.reach_meters) { score += 5; basis.push("reach_null"); } const confidence = Math.max(0, Math.min(1, score / 115)); if (confidence < 0.50) { skipped++; continue; } const notes = `${fx.part_number} ↔ ${cand.part_number} (${cand.vendor_name}) | ` + `basis: ${basis.join(", ")} | reach: ${fx.reach_meters}m vs ${cand.reach_meters}m | ` + `wavelength: ${fx.wavelengths||"?"} vs ${cand.wavelengths||"?"}`; // Upsert equivalence candidate const status = confidence >= 0.73 ? "auto_approved" : "pending"; await pool.query(` INSERT INTO transceiver_equivalences (flexoptix_id, competitor_id, confidence, match_basis, match_notes, status) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (flexoptix_id, competitor_id) DO UPDATE SET confidence = EXCLUDED.confidence, match_basis = EXCLUDED.match_basis, match_notes = EXCLUDED.match_notes, updated_at = NOW() WHERE transceiver_equivalences.status NOT IN ('approved', 'rejected') `, [fx.id, cand.competitor_id, confidence, basis, notes, status]); if (confidence >= 0.73) { // Auto-approve: set competitor_verified on the Flexoptix transceiver await pool.query(` UPDATE transceivers SET competitor_verified = true, competitor_verified_at = NOW() WHERE id = $1 AND competitor_verified = false `, [fx.id]); autoApproved++; } else { queued++; } } } console.log( `[find-equivalences] auto_approved: ${autoApproved}, ` + `queued for review: ${queued}, skipped (low confidence): ${skipped}` ); // After auto-approvals, rerun fully_verified check if (autoApproved > 0) { await pool.query(` UPDATE transceivers SET fully_verified = true, fully_verified_at = COALESCE(fully_verified_at, NOW()) WHERE competitor_verified = true AND price_verified = true AND image_verified = true AND details_verified = true AND fully_verified = false `); } }); // ── Re-research approved equivalences ──────────────────────────────────────── // Processes up to 200 approved equivalences per day that have re_research_due_at <= NOW(). // Re-runs the confidence check: if competitor still has recent prices and specs still match, // the approval is confirmed (re_researched_at = NOW(), next check in 30 days). // If confidence drops or competitor has no recent price: reverts to pending. await boss.work("maintenance:re-research-equivalences", async () => { const { pool } = await import("./utils/db"); const ts = new Date().toISOString(); console.log(`[${ts}] Running: Re-research approved equivalences`); const batch = await pool.query(` SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.confidence, fx.form_factor, fx.speed_gbps, fx.standard_name, fx.fiber_type, fx.reach_meters, fx.wavelengths FROM transceiver_equivalences eq JOIN transceivers fx ON eq.flexoptix_id = fx.id WHERE eq.status IN ('approved', 'auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW() ORDER BY eq.re_research_due_at ASC LIMIT 200 `); let confirmed = 0; let reverted = 0; for (const eq of batch.rows) { // Check if competitor still has a recent price observation const priceCheck = await pool.query(` SELECT COUNT(*) AS cnt FROM price_observations WHERE transceiver_id = $1 AND time > NOW() - INTERVAL '45 days' `, [eq.competitor_id]); const hasRecentPrice = parseInt(priceCheck.rows[0].cnt, 10) > 0; if (!hasRecentPrice) { // Competitor no longer carries this — revert to pending for manual review await pool.query(` UPDATE transceiver_equivalences SET status = 'pending', re_research_due_at = NULL, re_researched_at = NULL, match_notes = CONCAT(match_notes, E'\n[Re-research ' || NOW()::date || ': no recent price — reverted to pending]') WHERE id = $1 `, [eq.id]); // Reset competitor_verified if no other approved equivalence covers this transceiver await pool.query(` UPDATE transceivers SET competitor_verified = false, competitor_verified_at = NULL, fully_verified = false, fully_verified_at = NULL WHERE id = $1 AND NOT EXISTS ( SELECT 1 FROM transceiver_equivalences WHERE flexoptix_id = $1 AND status IN ('approved', 'auto_approved') AND id != $2 ) `, [eq.flexoptix_id, eq.id]); reverted++; } else { // Still valid — confirm and schedule next re-research in 30 days await pool.query(` UPDATE transceiver_equivalences SET re_researched_at = NOW(), re_research_due_at = NOW() + INTERVAL '30 days' WHERE id = $1 `, [eq.id]); confirmed++; } } console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`); }); console.log("All workers registered (79 jobs, 24/7 continuous)"); }