623 lines
35 KiB
TypeScript

/**
* pg-boss Job Scheduler — 24/7 Continuous Scraping
*
* ARCHITECTURE:
* - Erik (VPS, .82) : Playwright-heavy scrapers (FS.com, 10Gtek, ATGBICS, ProLabs)
* + all compatibility + eBay + compute + NAS sync
* - Raspberry Pi Fleet : Lightweight fetch/cheerio scrapers run continuously all day
* (BlueOptics, Fiber24, T&S Com, Fluxlight, GBICs, Optcore,
* Champion ONE, SFPCables, SmartOptics, HUBER+SUHNER, etc.)
*
* SCHEDULE PHILOSOPHY:
* - Playwright scrapers: every 8h (resource-heavy, VPS only)
* - Fetch/Cheerio scrapers: every 4h (lightweight, Pi-friendly)
* - Catalog scrapers (Flexoptix): every 2h (fast GraphQL, primary price source)
* - Compatibility matrices: every 12h (rarely change)
* - eBay enrichment: every 6h
* - Intelligence/community: every 6h
* - Compute jobs: after each pricing wave
* - NAS sync: nightly at 07:55
*/
import PgBoss from "pg-boss";
import { config } from "dotenv";
import { join } from "path";
import { rmSync, mkdirSync } from "fs";
/** Run a scraper with an isolated Crawlee storage directory to prevent queue collisions */
async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promise<void> {
const dir = join(__dirname, "..", "..", "..", `storage-${name}`);
// Pre-create Crawlee's internal subdirectory tree to avoid ENOENT races
mkdirSync(join(dir, "request_queues", "default"), { recursive: true });
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
const prev = process.env.CRAWLEE_STORAGE_DIR;
process.env.CRAWLEE_STORAGE_DIR = dir;
try {
await fn();
} finally {
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
}
}
config({ path: join(__dirname, "..", "..", "..", ".env") });
const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`;
export async function createScheduler(): Promise<PgBoss> {
const boss = new PgBoss({
connectionString,
retryLimit: 3,
retryDelay: 30,
retryBackoff: true,
expireInSeconds: 300,
monitorStateIntervalSeconds: 60,
});
boss.on("error", (error) => console.error("pg-boss error:", error));
await boss.start();
console.log("pg-boss scheduler started");
return boss;
}
export async function registerSchedules(boss: PgBoss): Promise<void> {
const queues = [
// ── Playwright scrapers (Erik, every 8h) ───────────────────────────
"scrape:pricing:fs",
"scrape:pricing:10gtek",
"scrape:pricing:atgbics",
"scrape:pricing:prolabs",
// ── Fetch/Cheerio scrapers (Pi-friendly, every 4h) ─────────────────
"scrape:pricing:fluxlight",
"scrape:pricing:gbics",
"scrape:pricing:optcore",
"scrape:pricing:champion-one",
"scrape:pricing:sfpcables",
"scrape:pricing:blueoptics",
"scrape:pricing:fiber24",
"scrape:pricing:tscom",
"scrape:pricing:skylane",
"scrape:pricing:ascentoptics",
"scrape:pricing:gaotek",
// ── Catalog scrapers (every 2h) ────────────────────────────────────
"scrape:pricing:flexoptix",
// ── Manufacturer catalogs (every 8h, no prices) ────────────────────
"scrape:catalog:smartoptics",
"scrape:catalog:hubersuhner",
// ── Vendor lists ───────────────────────────────────────────────────
"scrape:vendors:flexoptix",
"scrape:vendors:flexoptix-supported",
// ── Compatibility (every 12h) ──────────────────────────────────────
"scrape:compat:cisco",
"scrape:compat:juniper",
"scrape:compat:sonic",
"scrape:compat:ufispace",
"scrape:compat:edgecore",
// ── Switch enrichment (every 12h) ─────────────────────────────────
"scrape:assets:switches",
// ── eBay enrichment (every 6h) ────────────────────────────────────
"enrich:ebay-transceivers",
"enrich:ebay-switches",
// ── Intelligence & community (every 6h) ───────────────────────────
"scrape:market-intel",
"scrape:nog-talks",
"scrape:community-issues",
"scrape:datasheet-links",
"scrape:news",
"scrape:faq",
"scrape:docs",
// ── Compute (every 4h, after pricing waves) ───────────────────────
"compute:abc",
"compute:reorder-signals",
// ── New form-factor coverage scrapers (every 8h) ──────────────────
"scrape:pricing:comms-express",
"scrape:pricing:router-switch",
"scrape:pricing:multimode-inc",
"scrape:pricing:optictransceiver",
"scrape:pricing:wiitek",
// ── Prediction Signal Scrapers (new) ──────────────────────────────
"scrape:signals:sec-edgar",
"scrape:signals:github",
"scrape:signals:ebay-velocity",
"scrape:signals:ai-clusters",
"scrape:signals:distributor-leads",
"scrape:signals:standards",
// ── Forecast Engine ───────────────────────────────────────────────
"compute:forecast",
// ── Sync ──────────────────────────────────────────────────────────
"sync:nas",
];
for (const q of queues) {
await boss.createQueue(q).catch(() => { /* already exists */ });
}
// ══════════════════════════════════════════════════════════════════════
// PLAYWRIGHT SCRAPERS — priority competitors every 2h, others every 4h
// ══════════════════════════════════════════════════════════════════════
// FS.com: every 2h — primary competitor, highest data value
await boss.schedule("scrape:pricing:fs", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 });
// 10Gtek: every 2h offset by 20min
await boss.schedule("scrape:pricing:10gtek", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ATGBICS: every 4h (staggered)
await boss.schedule("scrape:pricing:atgbics", "50 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ProLabs: every 2h offset by 40min
await boss.schedule("scrape:pricing:prolabs", "40 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FETCH/CHEERIO SCRAPERS — every 4h (lightweight, Pi-friendly)
// ══════════════════════════════════════════════════════════════════════
// Fluxlight: 00:05, 04:05, 08:05, 12:05, 16:05, 20:05
await boss.schedule("scrape:pricing:fluxlight", "5 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// GBICs: 00:15, 04:15, 08:15, 12:15, 16:15, 20:15
await boss.schedule("scrape:pricing:gbics", "15 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Optcore: 00:30, 04:30, 08:30, 12:30, 16:30, 20:30
await boss.schedule("scrape:pricing:optcore", "30 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Champion ONE: 00:45, 04:45, 08:45, 12:45, 16:45, 20:45
await boss.schedule("scrape:pricing:champion-one", "45 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// SFPCables: 01:00, 05:00, 09:00, 13:00, 17:00, 21:00
await boss.schedule("scrape:pricing:sfpcables", "0 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// BlueOptics: 01:15, 05:15, 09:15, 13:15, 17:15, 21:15
await boss.schedule("scrape:pricing:blueoptics", "15 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ShopFiber24: 01:30, 05:30, 09:30, 13:30, 17:30, 21:30
await boss.schedule("scrape:pricing:fiber24", "30 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// T&S Communication: 01:45, 05:45, 09:45, 13:45, 17:45, 21:45
await boss.schedule("scrape:pricing:tscom", "45 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Skylane: 02:00, 06:00, 10:00, 14:00, 18:00, 22:00
await boss.schedule("scrape:pricing:skylane", "0 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// AscentOptics: 02:15, 06:15, 10:15, 14:15, 18:15, 22:15
await boss.schedule("scrape:pricing:ascentoptics", "15 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// GAO Tek: 02:30, 06:30, 10:30, 14:30, 18:30, 22:30
await boss.schedule("scrape:pricing:gaotek", "30 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// CATALOG SCRAPERS — Flexoptix every 2h (primary price source)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// MANUFACTURER CATALOGS — every 8h (product data, no prices)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FORM-FACTOR COVERAGE SCRAPERS — every 8h (CFP, CSFP, SFP-DD, legacy)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:pricing:comms-express", "40 2,10,18 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:router-switch", "0 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:multimode-inc", "20 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:optictransceiver", "45 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:wiitek", "5 4,12,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// VENDOR LISTS — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:vendors:flexoptix", "0 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:vendors:flexoptix-supported", "15 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// ══════════════════════════════════════════════════════════════════════
// COMPATIBILITY MATRICES — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:compat:cisco", "0 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:juniper", "15 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:sonic", "30 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:ufispace", "45 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:compat:edgecore", "55 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// ══════════════════════════════════════════════════════════════════════
// SWITCH ASSETS — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// EBAY ENRICHMENT — every 6h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("enrich:ebay-transceivers", "0 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
await boss.schedule("enrich:ebay-switches", "30 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
// ══════════════════════════════════════════════════════════════════════
// INTELLIGENCE & COMMUNITY — every 6h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// NOG conference talks — weekly on Mondays 06:00 UTC
await boss.schedule("scrape:nog-talks", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 7200 });
await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:faq", "40 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:docs", "50 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// COMPUTE JOBS — every 4h (after pricing waves settle)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("compute:abc", "50 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
await boss.schedule("compute:reorder-signals", "55 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
// ══════════════════════════════════════════════════════════════════════
// PREDICTION SIGNAL SCRAPERS
// ══════════════════════════════════════════════════════════════════════
// SEC EDGAR CapEx — weekly Monday 06:00 (filings don't change that fast)
await boss.schedule("scrape:signals:sec-edgar", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 3600 });
// GitHub signals — weekly Sunday 05:00
await boss.schedule("scrape:signals:github", "0 5 * * 0", {}, { retryLimit: 2, expireInSeconds: 7200 });
// eBay sold velocity — every 12h (fast-moving market signal)
await boss.schedule("scrape:signals:ebay-velocity", "0 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// AI cluster RSS feeds — every 4h (news moves fast)
await boss.schedule("scrape:signals:ai-clusters", "10 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// Distributor lead times — daily 03:30 (stock changes overnight)
await boss.schedule("scrape:signals:distributor-leads","30 3 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Standards tracker — weekly Wednesday 04:00 (standards move slowly)
await boss.schedule("scrape:signals:standards", "0 4 * * 3", {}, { retryLimit: 1, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FORECAST ENGINE — daily at 08:00 (after all nightly scrapers done)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("compute:forecast", "0 8 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
// ══════════════════════════════════════════════════════════════════════
// NAS SYNC — nightly
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("sync:nas", "55 7 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 });
console.log("All schedules registered — 24/7 continuous scraping (50 jobs)");
}
export async function registerWorkers(boss: PgBoss): Promise<void> {
// Lazy-load all scrapers
const { scrapeFs } = await import("./scrapers/fs-com");
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
const { scrapeOptcore } = await import("./scrapers/optcore");
const { scrape10Gtek } = await import("./scrapers/tenGtek");
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors");
const { scrapeNews } = await import("./scrapers/news");
const { scrapeAtgbics } = await import("./scrapers/atgbics");
const { scrapeProLabs } = await import("./scrapers/prolabs");
const { scrapeChampionOne } = await import("./scrapers/champion-one");
const { scrapeFluxlight } = await import("./scrapers/fluxlight");
const { scrapeGbics } = await import("./scrapers/gbics");
const { scrapeSfpCables } = await import("./scrapers/sfpcables");
const { scrapeJuniperHct } = await import("./scrapers/juniper-hct");
const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl");
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
const { scrapeEdgecore } = await import("./scrapers/edgecore");
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
const { scrapeBlueOptics } = await import("./scrapers/blueoptics");
const { scrapeFiber24 } = await import("./scrapers/fiber24");
const { scrapeTsCom } = await import("./scrapers/tscom");
const { scrapeSmartOptics } = await import("./scrapers/smartoptics");
const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner");
const { scrapeSkylane } = await import("./scrapers/skylane");
const { scrapeAscentOptics } = await import("./scrapers/ascentoptics");
const { scrapeGaoTek } = await import("./scrapers/gaotek");
// ── Prediction signal scrapers ────────────────────────────────────────
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
const { scrapeGithubSignals } = await import("./scrapers/github-signals");
const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity");
const { scrapeAiClusters } = await import("./scrapers/ai-clusters");
const { scrapeDistributorLeads }= await import("./scrapers/distributor-leads");
const { scrapeStandardsTracker }= await import("./scrapers/standards-tracker");
const { runForecastEngine } = await import("./utils/forecast-engine");
// ── Playwright scrapers ───────────────────────────────────────────────
await boss.work("scrape:pricing:fs", async () => {
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
await withIsolatedStorage("fs", scrapeFs);
});
await boss.work("scrape:pricing:10gtek", async () => {
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
await withIsolatedStorage("10gtek", scrape10Gtek);
});
await boss.work("scrape:pricing:atgbics", async () => {
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
await withIsolatedStorage("atgbics", scrapeAtgbics);
});
await boss.work("scrape:pricing:prolabs", async () => {
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
await withIsolatedStorage("prolabs", scrapeProLabs);
});
// ── Fetch/Cheerio scrapers ────────────────────────────────────────────
await boss.work("scrape:pricing:fluxlight", async () => {
console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`);
await withIsolatedStorage("fluxlight", scrapeFluxlight);
});
await boss.work("scrape:pricing:gbics", async () => {
console.log(`[${new Date().toISOString()}] Running: GBICs pricing`);
await withIsolatedStorage("gbics", scrapeGbics);
});
await boss.work("scrape:pricing:optcore", async () => {
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
await withIsolatedStorage("optcore", scrapeOptcore);
});
await boss.work("scrape:pricing:champion-one", async () => {
console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`);
await withIsolatedStorage("champion-one", scrapeChampionOne);
});
await boss.work("scrape:pricing:sfpcables", async () => {
console.log(`[${new Date().toISOString()}] Running: SFPCables pricing`);
await withIsolatedStorage("sfpcables", scrapeSfpCables);
});
await boss.work("scrape:pricing:blueoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`);
await withIsolatedStorage("blueoptics", scrapeBlueOptics);
});
await boss.work("scrape:pricing:fiber24", async () => {
console.log(`[${new Date().toISOString()}] Running: ShopFiber24 pricing`);
await withIsolatedStorage("fiber24", scrapeFiber24);
});
await boss.work("scrape:pricing:tscom", async () => {
console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`);
await withIsolatedStorage("tscom", scrapeTsCom);
});
await boss.work("scrape:pricing:skylane", async () => {
console.log(`[${new Date().toISOString()}] Running: Skylane Optics pricing`);
await withIsolatedStorage("skylane", scrapeSkylane);
});
await boss.work("scrape:pricing:ascentoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: AscentOptics pricing`);
await withIsolatedStorage("ascentoptics", scrapeAscentOptics);
});
await boss.work("scrape:pricing:gaotek", async () => {
console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`);
await withIsolatedStorage("gaotek", scrapeGaoTek);
});
// ── Catalog scrapers ──────────────────────────────────────────────────
await boss.work("scrape:pricing:flexoptix", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog`);
await scrapeFlexoptixCatalog();
});
await boss.work("scrape:catalog:smartoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`);
await withIsolatedStorage("smartoptics", scrapeSmartOptics);
});
await boss.work("scrape:catalog:hubersuhner", async () => {
console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`);
await withIsolatedStorage("hubersuhner", scrapeHuberSuhner);
});
// ── Vendor lists ──────────────────────────────────────────────────────
await boss.work("scrape:vendors:flexoptix", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix vendor list`);
await scrapeFlexoptixVendors();
});
await boss.work("scrape:vendors:flexoptix-supported", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix supported vendors`);
await seedFlexoptixVendors();
});
// ── Compatibility scrapers ────────────────────────────────────────────
await boss.work("scrape:compat:cisco", async () => {
console.log(`[${new Date().toISOString()}] Running: Cisco TMG compatibility`);
await withIsolatedStorage("cisco", scrapeCiscoTmg);
});
await boss.work("scrape:compat:juniper", async () => {
console.log(`[${new Date().toISOString()}] Running: Juniper HCT compatibility`);
await withIsolatedStorage("juniper", scrapeJuniperHct);
});
await boss.work("scrape:compat:sonic", async () => {
console.log(`[${new Date().toISOString()}] Running: SONiC HCL compatibility`);
await withIsolatedStorage("sonic", scrapeSonicHcl);
});
await boss.work("scrape:compat:ufispace", async () => {
console.log(`[${new Date().toISOString()}] Running: Ufispace switch data`);
await withIsolatedStorage("ufispace", scrapeUfiSpace);
});
await boss.work("scrape:compat:edgecore", async () => {
console.log(`[${new Date().toISOString()}] Running: Edgecore switch data`);
await withIsolatedStorage("edgecore", scrapeEdgecore);
});
// ── Switch assets ─────────────────────────────────────────────────────
await boss.work("scrape:assets:switches", async () => {
console.log(`[${new Date().toISOString()}] Running: Switch assets enrichment`);
await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets());
});
// ── eBay enrichment ───────────────────────────────────────────────────
await boss.work("enrich:ebay-transceivers", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay transceiver pricing`);
const { enrichTransceiversFromEbay } = await import("./scrapers/ebay-enricher");
await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100));
});
await boss.work("enrich:ebay-switches", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay switch enrichment`);
const { enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher");
await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30));
});
// ── Intelligence & community ──────────────────────────────────────────
await boss.work("scrape:market-intel", async () => {
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
});
await boss.work("scrape:nog-talks", async () => {
console.log(`[${new Date().toISOString()}] Running: NOG conference talks`);
const { scrapeNogTalks } = await import("./scrapers/nog-talks");
await scrapeNogTalks();
});
await boss.work("scrape:community-issues", async () => {
console.log(`[${new Date().toISOString()}] Running: Community issues`);
const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues");
await withIsolatedStorage("community-issues", () => scrapeAllSwitchIssues(30));
});
await boss.work("scrape:datasheet-links", async () => {
console.log(`[${new Date().toISOString()}] Running: Datasheet links`);
const { findAndSeedDatasheetLinks } = await import("./scrapers/community-issues");
await findAndSeedDatasheetLinks(50);
});
await boss.work("scrape:news", async () => {
console.log(`[${new Date().toISOString()}] Running: News aggregation`);
await scrapeNews();
});
await boss.work("scrape:faq", async () => {
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
});
await boss.work("scrape:docs", async () => {
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
});
// ── Compute jobs ──────────────────────────────────────────────────────
await boss.work("compute:abc", async () => {
console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
const { computeAbcClassification } = await import("./scrapers/market-intelligence");
await computeAbcClassification();
});
await boss.work("compute:reorder-signals", async () => {
console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
const { computeReorderSignals } = await import("./scrapers/market-intelligence");
await computeReorderSignals();
});
// ── NAS sync ──────────────────────────────────────────────────────────
await boss.work("sync:nas", async () => {
console.log(`[${new Date().toISOString()}] Running: NAS sync to Fearghas`);
const { runNightlyNasSync } = await import("./utils/nas-sync");
await runNightlyNasSync();
});
// ── Prediction signal scrapers ────────────────────────────────────────
await boss.work("scrape:signals:sec-edgar", async () => {
console.log(`[${new Date().toISOString()}] Running: SEC EDGAR CapEx`);
await scrapeSecEdgar();
});
await boss.work("scrape:signals:github", async () => {
console.log(`[${new Date().toISOString()}] Running: GitHub tech signals`);
await scrapeGithubSignals();
});
await boss.work("scrape:signals:ebay-velocity", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay sold velocity`);
await scrapeEbayVelocity();
});
await boss.work("scrape:signals:ai-clusters", async () => {
console.log(`[${new Date().toISOString()}] Running: AI cluster announcements`);
await scrapeAiClusters();
});
await boss.work("scrape:signals:distributor-leads", async () => {
console.log(`[${new Date().toISOString()}] Running: Distributor lead times`);
await scrapeDistributorLeads();
});
await boss.work("scrape:signals:standards", async () => {
console.log(`[${new Date().toISOString()}] Running: Standards tracker`);
await scrapeStandardsTracker();
});
// ── Forecast engine ───────────────────────────────────────────────────
await boss.work("compute:forecast", async () => {
console.log(`[${new Date().toISOString()}] Running: Forecast engine`);
await runForecastEngine();
});
// ── Form-factor coverage scrapers ─────────────────────────────────────
await boss.work("scrape:pricing:comms-express", async () => {
console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`);
const { scrapeCommsExpress } = await import("./scrapers/comms-express");
await scrapeCommsExpress();
});
await boss.work("scrape:pricing:router-switch", async () => {
console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`);
const { scrapeRouterSwitch } = await import("./scrapers/router-switch");
await scrapeRouterSwitch();
});
await boss.work("scrape:pricing:multimode-inc", async () => {
console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`);
const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc");
await scrapeMultimodeInc();
});
await boss.work("scrape:pricing:optictransceiver", async () => {
console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`);
const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver");
await scrapeOpticTransceiver();
});
await boss.work("scrape:pricing:wiitek", async () => {
console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`);
const { scrapeWiitek } = await import("./scrapers/wiitek");
await scrapeWiitek();
});
console.log("All workers registered (55 jobs, 24/7 continuous)");
}