Rene Fichtmueller 6b39bb0930 fix: skip Optcore on Erik — Cloudflare blocks datacenter IP
optcore.net blocks Erik's IP (82.165.222.127) via Cloudflare WAF.
WP REST API returns HTML block page instead of JSON → 0 product URLs
→ 0 scraped pages every run. Add SKIP_OPTCORE_SCRAPER guard matching
the existing SKIP_FS_SCRAPER pattern. Set in ecosystem.config.js on
Erik. Residential IP (Mac launchd) would be needed to use this scraper.
2026-04-18 05:41:56 +02:00

1121 lines
57 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* pg-boss Job Scheduler — 24/7 Continuous Scraping
*
* ARCHITECTURE:
* - Erik (VPS, .82) : Playwright-heavy scrapers (FS.com, 10Gtek, ATGBICS, ProLabs)
* + all compatibility + eBay + compute + NAS sync
* - Raspberry Pi Fleet : Lightweight fetch/cheerio scrapers run continuously all day
* (BlueOptics, Fiber24, T&S Com, Fluxlight, GBICs, Optcore,
* Champion ONE, SFPCables, SmartOptics, HUBER+SUHNER, etc.)
*
* SCHEDULE PHILOSOPHY:
* - Playwright scrapers: every 8h (resource-heavy, VPS only)
* - Fetch/Cheerio scrapers: every 4h (lightweight, Pi-friendly)
* - Catalog scrapers (Flexoptix): every 2h (fast GraphQL, primary price source)
* - Compatibility matrices: every 12h (rarely change)
* - eBay enrichment: every 6h
* - Intelligence/community: every 6h
* - Compute jobs: after each pricing wave
* - NAS sync: nightly at 07:55
*/
import PgBoss from "pg-boss";
import { config } from "dotenv";
import { join } from "path";
// withIsolatedStorage removed — all Crawlee scrapers now use makeCrawleeConfig()
// for instance-level storage isolation. See packages/scraper/src/utils/crawlee-config.ts
config({ path: join(__dirname, "..", "..", "..", ".env") });
const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`;
export async function createScheduler(): Promise<PgBoss> {
const boss = new PgBoss({
connectionString,
retryLimit: 3,
retryDelay: 30,
retryBackoff: true,
expireInSeconds: 300,
monitorStateIntervalSeconds: 60,
});
boss.on("error", (error) => console.error("pg-boss error:", error));
await boss.start();
console.log("pg-boss scheduler started");
return boss;
}
export async function registerSchedules(boss: PgBoss): Promise<void> {
const queues = [
// ── Playwright scrapers (Erik, every 8h) ───────────────────────────
"scrape:pricing:fs",
"scrape:pricing:10gtek",
"scrape:pricing:atgbics",
"scrape:pricing:prolabs",
// ── Fetch/Cheerio scrapers (Pi-friendly, every 4h) ─────────────────
"scrape:pricing:fluxlight",
"scrape:pricing:gbics",
"scrape:pricing:optcore",
"scrape:pricing:champion-one",
"scrape:pricing:sfpcables",
"scrape:pricing:blueoptics",
"scrape:pricing:fiber24",
"scrape:pricing:tscom",
"scrape:pricing:skylane",
"scrape:pricing:ascentoptics",
"scrape:pricing:gaotek",
// ── Catalog scrapers (every 2h) ────────────────────────────────────
"scrape:pricing:flexoptix",
// ── Manufacturer catalogs (every 8h, no prices) ────────────────────
"scrape:catalog:smartoptics",
"scrape:catalog:hubersuhner",
"scrape:catalog:eoptolink",
// ── Vendor lists ───────────────────────────────────────────────────
"scrape:vendors:flexoptix",
"scrape:vendors:flexoptix-supported",
// ── Compatibility (every 12h) ──────────────────────────────────────
"scrape:compat:cisco",
"scrape:compat:juniper",
"scrape:compat:sonic",
"scrape:compat:ufispace",
"scrape:compat:edgecore",
// ── Switch enrichment (every 12h) ─────────────────────────────────
"scrape:assets:switches",
// ── eBay enrichment (every 6h) ────────────────────────────────────
"enrich:ebay-transceivers",
"enrich:ebay-switches",
// ── Intelligence & community (every 6h) ───────────────────────────
"scrape:market-intel",
"scrape:nog-talks",
"scrape:community-issues",
"scrape:datasheet-links",
"scrape:news",
"scrape:faq",
"scrape:docs",
// ── Compute (every 4h, after pricing waves) ───────────────────────
"compute:abc",
"compute:reorder-signals",
// ── New form-factor coverage scrapers (every 8h) ──────────────────
"scrape:pricing:comms-express",
"scrape:pricing:router-switch",
"scrape:pricing:multimode-inc",
"scrape:pricing:optictransceiver",
"scrape:pricing:wiitek",
// ── Fetch-based catalog+pricing scrapers (every 2h) ──────────────
"scrape:pricing:naddod",
"scrape:pricing:qsfptek",
"scrape:pricing:addon",
"scrape:pricing:fibermall",
"scrape:pricing:vcelink",
"scrape:pricing:opticsbay",
// ── OEM Reference Prices (Mouser API, once daily) ─────────────────
"scrape:pricing:mouser-oem",
// ── Prediction Signal Scrapers (new) ──────────────────────────────
"scrape:signals:sec-edgar",
"scrape:signals:github",
"scrape:signals:ebay-velocity",
"scrape:signals:ai-clusters",
"scrape:signals:distributor-leads",
"scrape:signals:standards",
// ── Forecast Engine ───────────────────────────────────────────────
"compute:forecast",
// ── Hype Cycle Engine (Norton-Bass, daily) ────────────────────────
"compute:hype-cycle",
// ── Sync ──────────────────────────────────────────────────────────
"sync:nas",
// ── Health Monitoring ─────────────────────────────────────────────
"monitor:scraper-health",
// ── Verification Reconciliation ───────────────────────────────────
"maintenance:reconcile-verification",
// ── Competitor Equivalence Matching ───────────────────────────────
"maintenance:find-equivalences",
// ── Re-Research approved equivalences ─────────────────────────────
"maintenance:re-research-equivalences",
];
for (const q of queues) {
await boss.createQueue(q).catch(() => { /* already exists */ });
}
// ══════════════════════════════════════════════════════════════════════
// ══════════════════════════════════════════════════════════════════════
// ALL PRICING SCRAPERS — 24/7, every 2h, staggered by 10min
// Goal: complete competitor coverage, no gaps, database always fresh
// ══════════════════════════════════════════════════════════════════════
// Playwright scrapers (resource-heavy) — every 2h, 10min apart
await boss.schedule("scrape:pricing:fs", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:10gtek", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:atgbics", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:prolabs", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Fetch/Cheerio scrapers (lightweight) — every 2h, 5min apart
await boss.schedule("scrape:pricing:fluxlight", "0 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:gbics", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:optcore", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:champion-one", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:sfpcables", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:blueoptics", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:fiber24", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:tscom", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:skylane", "40 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:ascentoptics", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:gaotek", "50 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Form-factor coverage scrapers — every 2h
await boss.schedule("scrape:pricing:comms-express", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:router-switch", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:multimode-inc", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Fetch-based scrapers running on Erik — every 2h, end-of-cycle slots
await boss.schedule("scrape:pricing:naddod", "48 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:qsfptek", "52 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:addon", "55 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:fibermall", "57 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:vcelink", "3 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:opticsbay", "7 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// OEM reference prices via Mouser API — once daily at 03:00 (slow: 2s/PID × 475 PIDs ≈ 16min)
// Requires MOUSER_API_KEY env var (free at mouser.com/api-hub)
await boss.schedule("scrape:pricing:mouser-oem", "0 3 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FLEXOPTIX CATALOG — every 2h (primary price source)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// MANUFACTURER CATALOGS — every 4h (product data, no prices)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:catalog:smartoptics", "10 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:catalog:hubersuhner", "25 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:catalog:eoptolink", "40 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// VENDOR LISTS — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:vendors:flexoptix", "0 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:vendors:flexoptix-supported", "15 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// ══════════════════════════════════════════════════════════════════════
// COMPATIBILITY MATRICES — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:compat:cisco", "0 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:juniper", "15 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:sonic", "30 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:ufispace", "45 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:compat:edgecore", "55 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// ══════════════════════════════════════════════════════════════════════
// SWITCH ASSETS — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// EBAY ENRICHMENT — every 6h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("enrich:ebay-transceivers", "0 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
await boss.schedule("enrich:ebay-switches", "30 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
// ══════════════════════════════════════════════════════════════════════
// INTELLIGENCE & COMMUNITY — every 6h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// NOG conference talks — weekly on Mondays 06:00 UTC
await boss.schedule("scrape:nog-talks", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 7200 });
await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:faq", "40 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:docs", "50 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// COMPUTE JOBS — every 4h (after pricing waves settle)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("compute:abc", "50 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
await boss.schedule("compute:reorder-signals", "55 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
// ══════════════════════════════════════════════════════════════════════
// PREDICTION SIGNAL SCRAPERS
// ══════════════════════════════════════════════════════════════════════
// SEC EDGAR CapEx — weekly Monday 06:00 (filings don't change that fast)
await boss.schedule("scrape:signals:sec-edgar", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 3600 });
// GitHub signals — weekly Sunday 05:00
await boss.schedule("scrape:signals:github", "0 5 * * 0", {}, { retryLimit: 2, expireInSeconds: 7200 });
// eBay sold velocity — every 12h (fast-moving market signal)
await boss.schedule("scrape:signals:ebay-velocity", "0 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// AI cluster RSS feeds — every 4h (news moves fast)
await boss.schedule("scrape:signals:ai-clusters", "10 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// Distributor lead times — daily 03:30 (stock changes overnight)
await boss.schedule("scrape:signals:distributor-leads","30 3 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Standards tracker — weekly Wednesday 04:00 (standards move slowly)
await boss.schedule("scrape:signals:standards", "0 4 * * 3", {}, { retryLimit: 1, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FORECAST ENGINE — daily at 08:00 (after all nightly scrapers done)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("compute:forecast", "0 8 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
// Hype Cycle Engine runs daily at 04:30 (after Mouser OEM scraper at 03:00)
await boss.schedule("compute:hype-cycle", "30 4 * * *", {}, { retryLimit: 1, expireInSeconds: 600 });
// ══════════════════════════════════════════════════════════════════════
// NAS SYNC — nightly
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("sync:nas", "55 7 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 });
// Health check: every 3h — warns if any vendor has no new prices recently
await boss.schedule("monitor:scraper-health", "17 */3 * * *", {}, { retryLimit: 1, expireInSeconds: 600 });
// Verification reconciliation: nightly at 01:00 UTC
// Resets competitor_verified/fully_verified for any transceiver that no longer
// has a real non-Flexoptix price in the last 30 days — prevents stale ★ 100% badges
await boss.schedule("maintenance:reconcile-verification", "0 1 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 });
// Equivalence matching: nightly at 02:00 UTC (after reconcile)
await boss.schedule("maintenance:find-equivalences", "0 2 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
// Re-research approved equivalences: daily at 03:00 UTC, processes 200 items per run
await boss.schedule("maintenance:re-research-equivalences", "0 3 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
console.log("All schedules registered — 24/7 continuous scraping (57 jobs)");
}
export async function registerWorkers(boss: PgBoss): Promise<void> {
// Lazy-load all scrapers
const { scrapeFs } = await import("./scrapers/fs-com");
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
const { scrapeSmartOptics } = await import("./scrapers/smartoptics");
const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner");
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
const { scrapeNews } = await import("./scrapers/news");
const { scrape10Gtek } = await import("./scrapers/tenGtek");
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors");
const { scrapeAtgbics } = await import("./scrapers/atgbics");
const { scrapeProLabs } = await import("./scrapers/prolabs");
const { scrapeJuniperHct } = await import("./scrapers/juniper-hct");
const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl");
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
const { scrapeEdgecore } = await import("./scrapers/edgecore");
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
// ── Prediction signal scrapers ────────────────────────────────────────
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
const { scrapeGithubSignals } = await import("./scrapers/github-signals");
const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity");
const { scrapeAiClusters } = await import("./scrapers/ai-clusters");
const { scrapeDistributorLeads }= await import("./scrapers/distributor-leads");
const { scrapeStandardsTracker }= await import("./scrapers/standards-tracker");
const { runForecastEngine } = await import("./utils/forecast-engine");
// ── Playwright scrapers ───────────────────────────────────────────────
await boss.work("scrape:pricing:fs", async () => {
// FS.com uses Playwright + Cloudflare bypass. On datacenter servers the
// datacenter IP is blocked by Cloudflare WAF. Set SKIP_FS_SCRAPER=true to
// skip on Erik; the Mac launchd cron handles FS.com from a residential IP.
if (process.env["SKIP_FS_SCRAPER"] === "true") {
console.log(`[${new Date().toISOString()}] FS.com pricing: SKIPPED (SKIP_FS_SCRAPER=true)`);
return;
}
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
await scrapeFs();
});
await boss.work("scrape:pricing:10gtek", async () => {
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
await scrape10Gtek();
});
await boss.work("scrape:pricing:atgbics", async () => {
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
await scrapeAtgbics();
});
await boss.work("scrape:pricing:prolabs", async () => {
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
await scrapeProLabs();
});
// ── Lightweight fetch/cheerio scrapers ───────────────────────────────
await boss.work("scrape:pricing:fluxlight", async () => {
console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`);
const { scrapeFluxlight } = await import("./scrapers/fluxlight");
await scrapeFluxlight();
});
await boss.work("scrape:pricing:gbics", async () => {
console.log(`[${new Date().toISOString()}] Running: GBICS pricing`);
const { scrapeGbics } = await import("./scrapers/gbics");
await scrapeGbics();
});
await boss.work("scrape:pricing:optcore", async () => {
// Optcore.net WP REST API is blocked by Cloudflare WAF for Erik's datacenter
// IP (82.165.222.127). Set SKIP_OPTCORE_SCRAPER=true to suppress the wasted
// run. A residential IP (Mac launchd) would be needed to scrape Optcore.
if (process.env["SKIP_OPTCORE_SCRAPER"] === "true") {
console.log(`[${new Date().toISOString()}] Optcore pricing: SKIPPED (SKIP_OPTCORE_SCRAPER=true)`);
return;
}
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
const { scrapeOptcore } = await import("./scrapers/optcore");
await scrapeOptcore();
});
await boss.work("scrape:pricing:champion-one", async () => {
console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`);
const { scrapeChampionOne } = await import("./scrapers/champion-one");
await scrapeChampionOne();
});
await boss.work("scrape:pricing:sfpcables", async () => {
console.log(`[${new Date().toISOString()}] Running: SFPcables pricing`);
const { scrapeSfpCables } = await import("./scrapers/sfpcables");
await scrapeSfpCables();
});
await boss.work("scrape:pricing:blueoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`);
const { scrapeBlueOptics } = await import("./scrapers/blueoptics");
await scrapeBlueOptics();
});
await boss.work("scrape:pricing:fiber24", async () => {
console.log(`[${new Date().toISOString()}] Running: Fiber24 pricing`);
const { scrapeFiber24 } = await import("./scrapers/fiber24");
await scrapeFiber24();
});
await boss.work("scrape:pricing:tscom", async () => {
console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`);
const { scrapeTsCom } = await import("./scrapers/tscom");
await scrapeTsCom();
});
await boss.work("scrape:pricing:skylane", async () => {
console.log(`[${new Date().toISOString()}] Running: Skylane pricing`);
const { scrapeSkylane } = await import("./scrapers/skylane");
await scrapeSkylane();
});
await boss.work("scrape:pricing:ascentoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: Ascent Optics pricing`);
const { scrapeAscentOptics } = await import("./scrapers/ascentoptics");
await scrapeAscentOptics();
});
await boss.work("scrape:pricing:gaotek", async () => {
console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`);
const { scrapeGaoTek } = await import("./scrapers/gaotek");
await scrapeGaoTek();
});
// ── Catalog scrapers ──────────────────────────────────────────────────
await boss.work("scrape:pricing:flexoptix", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog`);
await scrapeFlexoptixCatalog();
});
await boss.work("scrape:catalog:smartoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`);
await scrapeSmartOptics();
});
await boss.work("scrape:catalog:hubersuhner", async () => {
console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`);
await scrapeHuberSuhner();
});
await boss.work("scrape:catalog:eoptolink", async () => {
console.log(`[${new Date().toISOString()}] Running: Eoptolink OEM catalog`);
const { scrapeEoptolink } = await import("./scrapers/eoptolink");
await scrapeEoptolink();
});
// ── Vendor lists ──────────────────────────────────────────────────────
await boss.work("scrape:vendors:flexoptix", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix vendor list`);
await scrapeFlexoptixVendors();
});
await boss.work("scrape:vendors:flexoptix-supported", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix supported vendors`);
await seedFlexoptixVendors();
});
// ── Compatibility scrapers ────────────────────────────────────────────
await boss.work("scrape:compat:cisco", async () => {
console.log(`[${new Date().toISOString()}] Running: Cisco TMG compatibility`);
await scrapeCiscoTmg();
});
await boss.work("scrape:compat:juniper", async () => {
console.log(`[${new Date().toISOString()}] Running: Juniper HCT compatibility`);
await scrapeJuniperHct();
});
await boss.work("scrape:compat:sonic", async () => {
console.log(`[${new Date().toISOString()}] Running: SONiC HCL compatibility`);
await scrapeSonicHcl();
});
await boss.work("scrape:compat:ufispace", async () => {
console.log(`[${new Date().toISOString()}] Running: Ufispace switch data`);
await scrapeUfiSpace();
});
await boss.work("scrape:compat:edgecore", async () => {
console.log(`[${new Date().toISOString()}] Running: Edgecore switch data`);
await scrapeEdgecore();
});
// ── Switch assets ─────────────────────────────────────────────────────
await boss.work("scrape:assets:switches", async () => {
console.log(`[${new Date().toISOString()}] Running: Switch assets enrichment`);
await scrapeSwitchAssets();
});
// ── eBay enrichment ───────────────────────────────────────────────────
await boss.work("enrich:ebay-transceivers", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay transceiver pricing`);
const { enrichTransceiversFromEbay } = await import("./scrapers/ebay-enricher");
await enrichTransceiversFromEbay(100);
});
await boss.work("enrich:ebay-switches", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay switch enrichment`);
const { enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher");
await enrichSwitchesFromEbay(30);
});
// ── Intelligence & community ──────────────────────────────────────────
await boss.work("scrape:market-intel", async () => {
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
await scrapeMarketIntelligence();
});
await boss.work("scrape:nog-talks", async () => {
console.log(`[${new Date().toISOString()}] Running: NOG conference talks`);
const { scrapeNogTalks } = await import("./scrapers/nog-talks");
await scrapeNogTalks();
});
await boss.work("scrape:community-issues", async () => {
console.log(`[${new Date().toISOString()}] Running: Community issues`);
const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues");
await scrapeAllSwitchIssues(30);
});
await boss.work("scrape:datasheet-links", async () => {
console.log(`[${new Date().toISOString()}] Running: Datasheet links`);
const { findAndSeedDatasheetLinks } = await import("./scrapers/community-issues");
await findAndSeedDatasheetLinks(50);
});
await boss.work("scrape:news", async () => {
console.log(`[${new Date().toISOString()}] Running: News scraper`);
await scrapeNews();
});
await boss.work("scrape:faq", async () => {
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
});
await boss.work("scrape:docs", async () => {
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
});
// ── Compute jobs ──────────────────────────────────────────────────────
await boss.work("compute:abc", async () => {
console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
const { computeAbcClassification } = await import("./scrapers/market-intelligence");
await computeAbcClassification();
});
await boss.work("compute:reorder-signals", async () => {
console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
const { computeReorderSignals } = await import("./scrapers/market-intelligence");
await computeReorderSignals();
});
// ── NAS sync ──────────────────────────────────────────────────────────
await boss.work("sync:nas", async () => {
console.log(`[${new Date().toISOString()}] Running: NAS sync to Fearghas`);
const { runNightlyNasSync } = await import("./utils/nas-sync");
await runNightlyNasSync();
});
// ── Prediction signal scrapers ────────────────────────────────────────
await boss.work("scrape:signals:sec-edgar", async () => {
console.log(`[${new Date().toISOString()}] Running: SEC EDGAR CapEx`);
await scrapeSecEdgar();
});
await boss.work("scrape:signals:github", async () => {
console.log(`[${new Date().toISOString()}] Running: GitHub tech signals`);
await scrapeGithubSignals();
});
await boss.work("scrape:signals:ebay-velocity", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay sold velocity`);
await scrapeEbayVelocity();
});
await boss.work("scrape:signals:ai-clusters", async () => {
console.log(`[${new Date().toISOString()}] Running: AI cluster announcements`);
await scrapeAiClusters();
});
await boss.work("scrape:signals:distributor-leads", async () => {
console.log(`[${new Date().toISOString()}] Running: Distributor lead times`);
await scrapeDistributorLeads();
});
await boss.work("scrape:signals:standards", async () => {
console.log(`[${new Date().toISOString()}] Running: Standards tracker`);
await scrapeStandardsTracker();
});
// ── Forecast engine ───────────────────────────────────────────────────
await boss.work("compute:forecast", async () => {
console.log(`[${new Date().toISOString()}] Running: Forecast engine`);
await runForecastEngine();
});
// ── Hype Cycle Engine (Norton-Bass diffusion model) ───────────────────
await boss.work("compute:hype-cycle", async () => {
console.log(`[${new Date().toISOString()}] Running: Hype Cycle Engine (Norton-Bass)`);
const { computeHypeCycle } = await import("./utils/hype-cycle-engine");
await computeHypeCycle();
});
// ── Form-factor coverage scrapers ─────────────────────────────────────
await boss.work("scrape:pricing:comms-express", async () => {
console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`);
const { scrapeCommsExpress } = await import("./scrapers/comms-express");
await scrapeCommsExpress();
});
await boss.work("scrape:pricing:router-switch", async () => {
console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`);
const { scrapeRouterSwitch } = await import("./scrapers/router-switch");
await scrapeRouterSwitch();
});
await boss.work("scrape:pricing:multimode-inc", async () => {
console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`);
const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc");
await scrapeMultimodeInc();
});
await boss.work("scrape:pricing:optictransceiver", async () => {
console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`);
const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver");
await scrapeOpticTransceiver();
});
await boss.work("scrape:pricing:wiitek", async () => {
console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`);
const { scrapeWiitek } = await import("./scrapers/wiitek");
await scrapeWiitek();
});
await boss.work("scrape:pricing:naddod", async () => {
console.log(`[${new Date().toISOString()}] Running: NADDOD pricing`);
const { scrapeNaddod } = await import("./scrapers/naddod");
await scrapeNaddod();
});
await boss.work("scrape:pricing:qsfptek", async () => {
console.log(`[${new Date().toISOString()}] Running: QSFPTEK pricing`);
const { scrapeQsfptek } = await import("./scrapers/qsfptek");
await scrapeQsfptek();
});
await boss.work("scrape:pricing:addon", async () => {
console.log(`[${new Date().toISOString()}] Running: AddOn Networks pricing`);
const { scrapeAddonNetworks } = await import("./scrapers/addon-networks");
await scrapeAddonNetworks();
});
await boss.work("scrape:pricing:fibermall", async () => {
console.log(`[${new Date().toISOString()}] Running: FiberMall pricing`);
const { scrapeFiberMall } = await import("./scrapers/fibermall");
await scrapeFiberMall();
});
await boss.work("scrape:pricing:vcelink", async () => {
console.log(`[${new Date().toISOString()}] Running: Vcelink pricing`);
const { scrapeVcelink } = await import("./scrapers/vcelink");
await scrapeVcelink();
});
await boss.work("scrape:pricing:opticsbay", async () => {
console.log(`[${new Date().toISOString()}] Running: OpticsBay pricing`);
const { scrapeOpticsBay } = await import("./scrapers/opticsbay");
await scrapeOpticsBay();
});
// ── Mouser OEM reference prices ────────────────────────────────────────
await boss.work("scrape:pricing:mouser-oem", async () => {
console.log(`[${new Date().toISOString()}] Running: Mouser OEM reference prices`);
if (!process.env["MOUSER_API_KEY"]) {
console.warn(" [mouser-oem] Skipping — MOUSER_API_KEY not set");
return;
}
const { scrapeMouser } = await import("./scrapers/mouser");
await scrapeMouser();
});
// ── Health monitor ──────────────────────────────────────────────────────
await boss.work("monitor:scraper-health", async () => {
const { pool } = await import("./utils/db");
// Vendors we expect to see prices from regularly.
// Mapped: display name → pg-boss job name prefix (for last-run lookup).
const EXPECTED_VENDORS: Array<{ name: string; jobName: string }> = [
{ name: "FiberMall", jobName: "scrape:pricing:fibermall" },
{ name: "QSFPTEK", jobName: "scrape:pricing:qsfptek" },
{ name: "Flexoptix", jobName: "scrape:pricing:flexoptix" },
// FS.COM is skipped on Erik (SKIP_FS_SCRAPER=true) — Mac launchd handles it.
// Exclude from monitor so stale last_seen doesn't trigger false positives.
{ name: "10Gtek", jobName: "scrape:pricing:10gtek" },
{ name: "ATGBICS", jobName: "scrape:pricing:atgbics" },
{ name: "GBICS", jobName: "scrape:pricing:gbics" },
{ name: "SFPcables", jobName: "scrape:pricing:sfpcables" },
{ name: "NADDOD", jobName: "scrape:pricing:naddod" },
];
const vendorNames = EXPECTED_VENDORS.map((v) => v.name);
const jobNames = EXPECTED_VENDORS.map((v) => v.jobName);
// Price observation recency per vendor
const priceResult = await pool.query(`
SELECT v.name,
SUM(CASE WHEN po.time > NOW() - INTERVAL '6 hours' THEN 1 ELSE 0 END) AS prices_6h,
MAX(po.time) AS last_seen,
EXTRACT(EPOCH FROM (NOW() - MAX(po.time))) / 3600.0 AS hours_since
FROM vendors v
LEFT JOIN price_observations po ON po.source_vendor_id = v.id
WHERE v.name = ANY($1)
GROUP BY v.name
ORDER BY last_seen ASC NULLS FIRST
`, [vendorNames]);
// Last successful pg-boss job per vendor scraper (within last 26h — covers any
// 2h-scheduled job at least once plus 2h slack for daemon restart delays).
// Also pull the most recent failed job so we can distinguish "running but
// prices stable" from "job consistently failing".
const jobResult = await pool.query(`
SELECT DISTINCT ON (name) name, state, completed_on
FROM pgboss.job
WHERE name = ANY($1)
AND created_on > NOW() - INTERVAL '26 hours'
ORDER BY name, created_on DESC
`, [jobNames]);
const jobMap = new Map<string, { state: string; completed_on: Date | null }>();
for (const row of jobResult.rows) {
jobMap.set(row.name as string, { state: row.state as string, completed_on: row.completed_on as Date | null });
}
// Last COMPLETED job per vendor (to know when the job last ran successfully)
const completedResult = await pool.query(`
SELECT DISTINCT ON (name) name, completed_on AS last_completed
FROM pgboss.job
WHERE name = ANY($1)
AND state = 'completed'
AND created_on > NOW() - INTERVAL '26 hours'
ORDER BY name, completed_on DESC
`, [jobNames]);
const completedMap = new Map<string, Date>();
for (const row of completedResult.rows) {
if (row.last_completed) completedMap.set(row.name as string, new Date(row.last_completed as string));
}
// Thresholds for alerting:
// CRITICAL (🔴): no job completed in 26h AND last price > 168h (7 days)
// — scraper genuinely not running or consistently failing
// WARNING (🟡): no job completed in 26h AND last price > 48h
// — scraper may be broken but recently seen
// STABLE (✅): job completed in last 26h AND vendor has historical prices
// — prices unchanged (hash dedup), scraper is healthy
const CRITICAL_HOURS = 168;
const WARN_HOURS = 48;
const critical: string[] = [];
const warnings: string[] = [];
const stable: string[] = [];
for (const row of priceResult.rows) {
const h = parseFloat(row.hours_since ?? "9999");
const n = parseInt(row.prices_6h ?? "0", 10);
if (n > 0) continue; // new prices written → definitely healthy
const lastStr = row.last_seen
? `last price ${h.toFixed(1)}h ago (${new Date(row.last_seen as string).toISOString().slice(0, 16)})`
: "NEVER scraped";
const vendor = EXPECTED_VENDORS.find((v) => v.name === row.name);
const jobInfo = vendor ? jobMap.get(vendor.jobName) : undefined;
const lastCompleted = vendor ? completedMap.get(vendor.jobName) : undefined;
const jobStr = jobInfo
? ` | job=${jobInfo.state} at ${jobInfo.completed_on ? new Date(jobInfo.completed_on).toISOString().slice(11, 16) : "?"}`
: " | job=not run in 26h";
// If the job completed successfully in the last 26h AND the vendor has
// historical prices, prices are just stable (hash dedup) — not an outage.
const jobRunningOk = !!lastCompleted && row.last_seen;
if (jobRunningOk) {
stable.push(`${row.name}: prices stable (${h.toFixed(1)}h unchanged, job OK)${jobStr}`);
continue;
}
if (!row.last_seen || h > CRITICAL_HOURS) {
critical.push(`🔴 ${row.name}: ${lastStr}${jobStr}`);
} else if (h > WARN_HOURS) {
warnings.push(`🟡 ${row.name}: ${lastStr}${jobStr}`);
} else {
stable.push(`${row.name}: prices stable (${h.toFixed(1)}h unchanged)${jobStr}`);
}
}
if (critical.length > 0 || warnings.length > 0) {
if (critical.length > 0) {
console.error("=== 🔴 SCRAPER CRITICAL — vendors with no prices for 7+ days ===");
for (const p of critical) console.error(p);
}
if (warnings.length > 0) {
console.warn("=== 🟡 SCRAPER WARNING — vendors with stale prices (48h+) ===");
for (const p of warnings) console.warn(p);
}
console.error("=== Check: pm2 logs tip-scraper-daemon ===");
} else {
const activeCount = EXPECTED_VENDORS.length - stable.length;
if (stable.length > 0) {
console.log(`[monitor] Scraper health OK — ${activeCount} vendors active, ${stable.length} stable (no price changes)`);
for (const s of stable) console.log(` ${s}`);
} else {
console.log(`[monitor] Scraper health OK — all ${EXPECTED_VENDORS.length} vendors active in last 6h`);
}
}
});
// ── Verification reconciliation ─────────────────────────────────────────
await boss.work("maintenance:reconcile-verification", async () => {
const { pool } = await import("./utils/db");
// 1. Reset competitor_verified=false for products with no non-Flexoptix price in last 30 days
const resetComp = await pool.query(`
UPDATE transceivers t
SET competitor_verified = false,
competitor_verified_at = NULL
WHERE competitor_verified = true
AND NOT EXISTS (
SELECT 1 FROM price_observations po
JOIN vendors v ON po.source_vendor_id = v.id
WHERE po.transceiver_id = t.id
AND po.time > NOW() - INTERVAL '30 days'
AND UPPER(v.name) NOT LIKE '%FLEXOPTIX%'
)
`);
// 2. Reset fully_verified=false for products that lost competitor_verified
const resetFull = await pool.query(`
UPDATE transceivers
SET fully_verified = false,
fully_verified_at = NULL
WHERE fully_verified = true
AND (competitor_verified = false OR price_verified = false OR image_verified = false OR details_verified = false)
`);
// 3. Set fully_verified=true for products that now meet all 4 criteria
const setFull = await pool.query(`
UPDATE transceivers
SET fully_verified = true,
fully_verified_at = COALESCE(fully_verified_at, NOW())
WHERE competitor_verified = true
AND price_verified = true
AND image_verified = true
AND details_verified = true
AND fully_verified = false
`);
console.log(
`[reconcile] competitor_verified reset: ${resetComp.rowCount}, ` +
`fully_verified cleared: ${resetFull.rowCount}, ` +
`fully_verified earned: ${setFull.rowCount}`
);
});
// ── Equivalence matching ────────────────────────────────────────────────
// Matches Flexoptix SKUs to technically equivalent competitor products by specs.
// Confidence scoring: standard_name(35) + form_factor(25) + speed_gbps(20) +
// fiber_type(10) + reach±25%(10) = 100 pts max
// ≥0.85 → auto_approved + competitor_verified=true
// 0.500.84 → pending (Manual Review queue in dashboard)
// <0.50 → skipped
await boss.work("maintenance:find-equivalences", async () => {
const { pool } = await import("./utils/db");
const ts = new Date().toISOString();
console.log(`[${ts}] Running: Equivalence matching`);
// Find all Flexoptix transceivers that are NOT yet competitor_verified
const flexResult = await pool.query(`
SELECT t.id, t.part_number, t.standard_name, t.form_factor,
t.speed_gbps, t.fiber_type, t.reach_meters, t.wavelengths,
t.connector, t.wdm_type, t.coherent
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id
WHERE UPPER(v.name) LIKE '%FLEXOPTIX%'
AND t.competitor_verified = false
`);
let autoApproved = 0;
let queued = 0;
let skipped = 0;
for (const fx of flexResult.rows) {
// Find competitor transceivers with recent price observations and matching specs
const candidates = await pool.query(`
SELECT t.id AS competitor_id, t.part_number, t.standard_name,
t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters,
t.wavelengths, t.connector, v.name AS vendor_name,
MAX(po.time) AS last_price, COUNT(*) AS price_count
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id
JOIN price_observations po ON po.transceiver_id = t.id
WHERE UPPER(v.name) NOT LIKE '%FLEXOPTIX%'
AND po.time > NOW() - INTERVAL '30 days'
AND t.form_factor = $1
AND t.speed_gbps = $2
AND t.id != $3
GROUP BY t.id, t.part_number, t.standard_name, t.form_factor,
t.speed_gbps, t.fiber_type, t.reach_meters,
t.wavelengths, t.connector, v.name
`, [fx.form_factor, fx.speed_gbps, fx.id]);
for (const cand of candidates.rows) {
// Confidence scoring
// Max points: form_factor(25) + speed_gbps(20) + standard_name(30) +
// wavelength_nm(20) + fiber_type(10) + reach(10) = 115
let score = 0;
const basis: string[] = [];
// form_factor already matched (pre-filter), award points
score += 25; basis.push("form_factor");
// speed_gbps already matched (pre-filter)
score += 20; basis.push("speed_gbps");
// standard_name match (strong signal — e.g. "10GBASE-LR")
if (fx.standard_name && cand.standard_name &&
fx.standard_name.trim().toUpperCase() === cand.standard_name.trim().toUpperCase()) {
score += 30; basis.push("standard_name");
}
// wavelength match — extract first numeric nm value and compare within ±15nm
// "wavelengths" is text: "1310 nm", "850nm", "1270/1290/1310/1330 nm" etc.
const extractNm = (w: string | null): number | null => {
if (!w) return null;
const m = w.match(/(\d{3,4})/);
return m ? parseInt(m[1], 10) : null;
};
const fxNm = extractNm(fx.wavelengths);
const candNm = extractNm(cand.wavelengths);
if (fxNm !== null && candNm !== null) {
if (Math.abs(fxNm - candNm) <= 15) {
score += 20; basis.push(`wavelength_${fxNm}nm`);
} else {
score -= 20; // hard penalize wrong wavelength (1310 vs 1550 = completely different product)
}
}
// fiber_type match (SMF vs MMF — critical)
if (fx.fiber_type && cand.fiber_type) {
if (fx.fiber_type.trim().toUpperCase() === cand.fiber_type.trim().toUpperCase()) {
score += 10; basis.push("fiber_type");
} else {
score -= 15; // SMF vs MMF = wrong product
}
}
// reach within ±25%
if (fx.reach_meters && cand.reach_meters && fx.reach_meters > 0 && cand.reach_meters > 0) {
const diff = Math.abs(fx.reach_meters - cand.reach_meters);
const tolerance = Math.max(fx.reach_meters, 1) * 0.25;
if (diff <= tolerance) {
score += 10; basis.push("reach");
} else {
score -= 15; // penalize mismatched reach
}
} else if (!fx.reach_meters && !cand.reach_meters) {
score += 5; basis.push("reach_null");
}
const confidence = Math.max(0, Math.min(1, score / 115));
if (confidence < 0.50) { skipped++; continue; }
const notes = `${fx.part_number}${cand.part_number} (${cand.vendor_name}) | ` +
`basis: ${basis.join(", ")} | reach: ${fx.reach_meters}m vs ${cand.reach_meters}m | ` +
`wavelength: ${fx.wavelengths||"?"} vs ${cand.wavelengths||"?"}`;
// Upsert equivalence candidate
const status = confidence >= 0.73 ? "auto_approved" : "pending";
await pool.query(`
INSERT INTO transceiver_equivalences
(flexoptix_id, competitor_id, confidence, match_basis, match_notes, status)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (flexoptix_id, competitor_id) DO UPDATE SET
confidence = EXCLUDED.confidence,
match_basis = EXCLUDED.match_basis,
match_notes = EXCLUDED.match_notes,
updated_at = NOW()
WHERE transceiver_equivalences.status NOT IN ('approved', 'rejected')
`, [fx.id, cand.competitor_id, confidence, basis, notes, status]);
if (confidence >= 0.73) {
// Auto-approve: set competitor_verified on the Flexoptix transceiver
await pool.query(`
UPDATE transceivers
SET competitor_verified = true,
competitor_verified_at = NOW()
WHERE id = $1 AND competitor_verified = false
`, [fx.id]);
autoApproved++;
} else {
queued++;
}
}
}
console.log(
`[find-equivalences] auto_approved: ${autoApproved}, ` +
`queued for review: ${queued}, skipped (low confidence): ${skipped}`
);
// After auto-approvals, rerun fully_verified check
if (autoApproved > 0) {
await pool.query(`
UPDATE transceivers
SET fully_verified = true,
fully_verified_at = COALESCE(fully_verified_at, NOW())
WHERE competitor_verified = true
AND price_verified = true
AND image_verified = true
AND details_verified = true
AND fully_verified = false
`);
}
});
// ── Re-research approved equivalences ────────────────────────────────────────
// Processes up to 200 approved equivalences per day that have re_research_due_at <= NOW().
// Re-runs the confidence check: if competitor still has recent prices and specs still match,
// the approval is confirmed (re_researched_at = NOW(), next check in 30 days).
// If confidence drops or competitor has no recent price: reverts to pending.
await boss.work("maintenance:re-research-equivalences", async () => {
const { pool } = await import("./utils/db");
const ts = new Date().toISOString();
console.log(`[${ts}] Running: Re-research approved equivalences`);
const batch = await pool.query(`
SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.confidence,
fx.form_factor, fx.speed_gbps, fx.standard_name, fx.fiber_type,
fx.reach_meters, fx.wavelengths
FROM transceiver_equivalences eq
JOIN transceivers fx ON eq.flexoptix_id = fx.id
WHERE eq.status IN ('approved', 'auto_approved')
AND eq.re_research_due_at IS NOT NULL
AND eq.re_research_due_at <= NOW()
ORDER BY eq.re_research_due_at ASC
LIMIT 200
`);
let confirmed = 0;
let reverted = 0;
for (const eq of batch.rows) {
// Check if competitor still has a recent price observation
const priceCheck = await pool.query(`
SELECT COUNT(*) AS cnt
FROM price_observations
WHERE transceiver_id = $1 AND time > NOW() - INTERVAL '45 days'
`, [eq.competitor_id]);
const hasRecentPrice = parseInt(priceCheck.rows[0].cnt, 10) > 0;
if (!hasRecentPrice) {
// Competitor no longer carries this — revert to pending for manual review
await pool.query(`
UPDATE transceiver_equivalences
SET status = 'pending', re_research_due_at = NULL, re_researched_at = NULL,
match_notes = CONCAT(match_notes, E'\n[Re-research ' || NOW()::date || ': no recent price — reverted to pending]')
WHERE id = $1
`, [eq.id]);
// Reset competitor_verified if no other approved equivalence covers this transceiver
await pool.query(`
UPDATE transceivers
SET competitor_verified = false, competitor_verified_at = NULL,
fully_verified = false, fully_verified_at = NULL
WHERE id = $1
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences
WHERE flexoptix_id = $1
AND status IN ('approved', 'auto_approved')
AND id != $2
)
`, [eq.flexoptix_id, eq.id]);
reverted++;
} else {
// Still valid — confirm and schedule next re-research in 30 days
await pool.query(`
UPDATE transceiver_equivalences
SET re_researched_at = NOW(),
re_research_due_at = NOW() + INTERVAL '30 days'
WHERE id = $1
`, [eq.id]);
confirmed++;
}
}
console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`);
});
console.log("All workers registered (76 jobs, 24/7 continuous)");
}