623 lines
35 KiB
TypeScript
623 lines
35 KiB
TypeScript
/**
|
|
* pg-boss Job Scheduler — 24/7 Continuous Scraping
|
|
*
|
|
* ARCHITECTURE:
|
|
* - Erik (VPS, .82) : Playwright-heavy scrapers (FS.com, 10Gtek, ATGBICS, ProLabs)
|
|
* + all compatibility + eBay + compute + NAS sync
|
|
* - Raspberry Pi Fleet : Lightweight fetch/cheerio scrapers run continuously all day
|
|
* (BlueOptics, Fiber24, T&S Com, Fluxlight, GBICs, Optcore,
|
|
* Champion ONE, SFPCables, SmartOptics, HUBER+SUHNER, etc.)
|
|
*
|
|
* SCHEDULE PHILOSOPHY:
|
|
* - Playwright scrapers: every 8h (resource-heavy, VPS only)
|
|
* - Fetch/Cheerio scrapers: every 4h (lightweight, Pi-friendly)
|
|
* - Catalog scrapers (Flexoptix): every 2h (fast GraphQL, primary price source)
|
|
* - Compatibility matrices: every 12h (rarely change)
|
|
* - eBay enrichment: every 6h
|
|
* - Intelligence/community: every 6h
|
|
* - Compute jobs: after each pricing wave
|
|
* - NAS sync: nightly at 07:55
|
|
*/
|
|
import PgBoss from "pg-boss";
|
|
import { config } from "dotenv";
|
|
import { join } from "path";
|
|
import { rmSync, mkdirSync } from "fs";
|
|
|
|
/** Run a scraper with an isolated Crawlee storage directory to prevent queue collisions */
|
|
async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promise<void> {
|
|
const dir = join(__dirname, "..", "..", "..", `storage-${name}`);
|
|
// Pre-create Crawlee's internal subdirectory tree to avoid ENOENT races
|
|
mkdirSync(join(dir, "request_queues", "default"), { recursive: true });
|
|
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
|
|
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
|
|
const prev = process.env.CRAWLEE_STORAGE_DIR;
|
|
process.env.CRAWLEE_STORAGE_DIR = dir;
|
|
try {
|
|
await fn();
|
|
} finally {
|
|
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
|
|
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
|
}
|
|
}
|
|
|
|
config({ path: join(__dirname, "..", "..", "..", ".env") });
|
|
|
|
const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`;
|
|
|
|
export async function createScheduler(): Promise<PgBoss> {
|
|
const boss = new PgBoss({
|
|
connectionString,
|
|
retryLimit: 3,
|
|
retryDelay: 30,
|
|
retryBackoff: true,
|
|
expireInSeconds: 300,
|
|
monitorStateIntervalSeconds: 60,
|
|
});
|
|
|
|
boss.on("error", (error) => console.error("pg-boss error:", error));
|
|
|
|
await boss.start();
|
|
console.log("pg-boss scheduler started");
|
|
|
|
return boss;
|
|
}
|
|
|
|
export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|
const queues = [
|
|
// ── Playwright scrapers (Erik, every 8h) ───────────────────────────
|
|
"scrape:pricing:fs",
|
|
"scrape:pricing:10gtek",
|
|
"scrape:pricing:atgbics",
|
|
"scrape:pricing:prolabs",
|
|
// ── Fetch/Cheerio scrapers (Pi-friendly, every 4h) ─────────────────
|
|
"scrape:pricing:fluxlight",
|
|
"scrape:pricing:gbics",
|
|
"scrape:pricing:optcore",
|
|
"scrape:pricing:champion-one",
|
|
"scrape:pricing:sfpcables",
|
|
"scrape:pricing:blueoptics",
|
|
"scrape:pricing:fiber24",
|
|
"scrape:pricing:tscom",
|
|
"scrape:pricing:skylane",
|
|
"scrape:pricing:ascentoptics",
|
|
"scrape:pricing:gaotek",
|
|
// ── Catalog scrapers (every 2h) ────────────────────────────────────
|
|
"scrape:pricing:flexoptix",
|
|
// ── Manufacturer catalogs (every 8h, no prices) ────────────────────
|
|
"scrape:catalog:smartoptics",
|
|
"scrape:catalog:hubersuhner",
|
|
// ── Vendor lists ───────────────────────────────────────────────────
|
|
"scrape:vendors:flexoptix",
|
|
"scrape:vendors:flexoptix-supported",
|
|
// ── Compatibility (every 12h) ──────────────────────────────────────
|
|
"scrape:compat:cisco",
|
|
"scrape:compat:juniper",
|
|
"scrape:compat:sonic",
|
|
"scrape:compat:ufispace",
|
|
"scrape:compat:edgecore",
|
|
// ── Switch enrichment (every 12h) ─────────────────────────────────
|
|
"scrape:assets:switches",
|
|
// ── eBay enrichment (every 6h) ────────────────────────────────────
|
|
"enrich:ebay-transceivers",
|
|
"enrich:ebay-switches",
|
|
// ── Intelligence & community (every 6h) ───────────────────────────
|
|
"scrape:market-intel",
|
|
"scrape:nog-talks",
|
|
"scrape:community-issues",
|
|
"scrape:datasheet-links",
|
|
"scrape:news",
|
|
"scrape:faq",
|
|
"scrape:docs",
|
|
// ── Compute (every 4h, after pricing waves) ───────────────────────
|
|
"compute:abc",
|
|
"compute:reorder-signals",
|
|
// ── New form-factor coverage scrapers (every 8h) ──────────────────
|
|
"scrape:pricing:comms-express",
|
|
"scrape:pricing:router-switch",
|
|
"scrape:pricing:multimode-inc",
|
|
"scrape:pricing:optictransceiver",
|
|
"scrape:pricing:wiitek",
|
|
// ── Prediction Signal Scrapers (new) ──────────────────────────────
|
|
"scrape:signals:sec-edgar",
|
|
"scrape:signals:github",
|
|
"scrape:signals:ebay-velocity",
|
|
"scrape:signals:ai-clusters",
|
|
"scrape:signals:distributor-leads",
|
|
"scrape:signals:standards",
|
|
// ── Forecast Engine ───────────────────────────────────────────────
|
|
"compute:forecast",
|
|
// ── Sync ──────────────────────────────────────────────────────────
|
|
"sync:nas",
|
|
];
|
|
|
|
for (const q of queues) {
|
|
await boss.createQueue(q).catch(() => { /* already exists */ });
|
|
}
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// PLAYWRIGHT SCRAPERS — priority competitors every 2h, others every 4h
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
// FS.com: every 2h — primary competitor, highest data value
|
|
await boss.schedule("scrape:pricing:fs", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 });
|
|
|
|
// 10Gtek: every 2h offset by 20min
|
|
await boss.schedule("scrape:pricing:10gtek", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ATGBICS: every 4h (staggered)
|
|
await boss.schedule("scrape:pricing:atgbics", "50 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ProLabs: every 2h offset by 40min
|
|
await boss.schedule("scrape:pricing:prolabs", "40 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// FETCH/CHEERIO SCRAPERS — every 4h (lightweight, Pi-friendly)
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
// Fluxlight: 00:05, 04:05, 08:05, 12:05, 16:05, 20:05
|
|
await boss.schedule("scrape:pricing:fluxlight", "5 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// GBICs: 00:15, 04:15, 08:15, 12:15, 16:15, 20:15
|
|
await boss.schedule("scrape:pricing:gbics", "15 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// Optcore: 00:30, 04:30, 08:30, 12:30, 16:30, 20:30
|
|
await boss.schedule("scrape:pricing:optcore", "30 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// Champion ONE: 00:45, 04:45, 08:45, 12:45, 16:45, 20:45
|
|
await boss.schedule("scrape:pricing:champion-one", "45 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// SFPCables: 01:00, 05:00, 09:00, 13:00, 17:00, 21:00
|
|
await boss.schedule("scrape:pricing:sfpcables", "0 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// BlueOptics: 01:15, 05:15, 09:15, 13:15, 17:15, 21:15
|
|
await boss.schedule("scrape:pricing:blueoptics", "15 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ShopFiber24: 01:30, 05:30, 09:30, 13:30, 17:30, 21:30
|
|
await boss.schedule("scrape:pricing:fiber24", "30 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// T&S Communication: 01:45, 05:45, 09:45, 13:45, 17:45, 21:45
|
|
await boss.schedule("scrape:pricing:tscom", "45 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// Skylane: 02:00, 06:00, 10:00, 14:00, 18:00, 22:00
|
|
await boss.schedule("scrape:pricing:skylane", "0 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// AscentOptics: 02:15, 06:15, 10:15, 14:15, 18:15, 22:15
|
|
await boss.schedule("scrape:pricing:ascentoptics", "15 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// GAO Tek: 02:30, 06:30, 10:30, 14:30, 18:30, 22:30
|
|
await boss.schedule("scrape:pricing:gaotek", "30 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// CATALOG SCRAPERS — Flexoptix every 2h (primary price source)
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// MANUFACTURER CATALOGS — every 8h (product data, no prices)
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// FORM-FACTOR COVERAGE SCRAPERS — every 8h (CFP, CSFP, SFP-DD, legacy)
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("scrape:pricing:comms-express", "40 2,10,18 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
|
|
await boss.schedule("scrape:pricing:router-switch", "0 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
|
|
await boss.schedule("scrape:pricing:multimode-inc", "20 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:pricing:optictransceiver", "45 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:pricing:wiitek", "5 4,12,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// VENDOR LISTS — every 12h
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("scrape:vendors:flexoptix", "0 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
|
await boss.schedule("scrape:vendors:flexoptix-supported", "15 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// COMPATIBILITY MATRICES — every 12h
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("scrape:compat:cisco", "0 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:compat:juniper", "15 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:compat:sonic", "30 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:compat:ufispace", "45 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
|
await boss.schedule("scrape:compat:edgecore", "55 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// SWITCH ASSETS — every 12h
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// EBAY ENRICHMENT — every 6h
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("enrich:ebay-transceivers", "0 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
|
|
await boss.schedule("enrich:ebay-switches", "30 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// INTELLIGENCE & COMMUNITY — every 6h
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
// NOG conference talks — weekly on Mondays 06:00 UTC
|
|
await boss.schedule("scrape:nog-talks", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 7200 });
|
|
await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
|
await boss.schedule("scrape:faq", "40 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
await boss.schedule("scrape:docs", "50 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// COMPUTE JOBS — every 4h (after pricing waves settle)
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("compute:abc", "50 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
|
|
await boss.schedule("compute:reorder-signals", "55 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// PREDICTION SIGNAL SCRAPERS
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
// SEC EDGAR CapEx — weekly Monday 06:00 (filings don't change that fast)
|
|
await boss.schedule("scrape:signals:sec-edgar", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
// GitHub signals — weekly Sunday 05:00
|
|
await boss.schedule("scrape:signals:github", "0 5 * * 0", {}, { retryLimit: 2, expireInSeconds: 7200 });
|
|
// eBay sold velocity — every 12h (fast-moving market signal)
|
|
await boss.schedule("scrape:signals:ebay-velocity", "0 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
// AI cluster RSS feeds — every 4h (news moves fast)
|
|
await boss.schedule("scrape:signals:ai-clusters", "10 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
|
// Distributor lead times — daily 03:30 (stock changes overnight)
|
|
await boss.schedule("scrape:signals:distributor-leads","30 3 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
|
// Standards tracker — weekly Wednesday 04:00 (standards move slowly)
|
|
await boss.schedule("scrape:signals:standards", "0 4 * * 3", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// FORECAST ENGINE — daily at 08:00 (after all nightly scrapers done)
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("compute:forecast", "0 8 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
|
|
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
// NAS SYNC — nightly
|
|
// ══════════════════════════════════════════════════════════════════════
|
|
|
|
await boss.schedule("sync:nas", "55 7 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 });
|
|
|
|
console.log("All schedules registered — 24/7 continuous scraping (50 jobs)");
|
|
}
|
|
|
|
export async function registerWorkers(boss: PgBoss): Promise<void> {
|
|
// Lazy-load all scrapers
|
|
const { scrapeFs } = await import("./scrapers/fs-com");
|
|
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
|
|
const { scrapeOptcore } = await import("./scrapers/optcore");
|
|
const { scrape10Gtek } = await import("./scrapers/tenGtek");
|
|
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
|
|
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
|
|
const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors");
|
|
const { scrapeNews } = await import("./scrapers/news");
|
|
const { scrapeAtgbics } = await import("./scrapers/atgbics");
|
|
const { scrapeProLabs } = await import("./scrapers/prolabs");
|
|
const { scrapeChampionOne } = await import("./scrapers/champion-one");
|
|
const { scrapeFluxlight } = await import("./scrapers/fluxlight");
|
|
const { scrapeGbics } = await import("./scrapers/gbics");
|
|
const { scrapeSfpCables } = await import("./scrapers/sfpcables");
|
|
const { scrapeJuniperHct } = await import("./scrapers/juniper-hct");
|
|
const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl");
|
|
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
|
|
const { scrapeEdgecore } = await import("./scrapers/edgecore");
|
|
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
|
|
const { scrapeBlueOptics } = await import("./scrapers/blueoptics");
|
|
const { scrapeFiber24 } = await import("./scrapers/fiber24");
|
|
const { scrapeTsCom } = await import("./scrapers/tscom");
|
|
const { scrapeSmartOptics } = await import("./scrapers/smartoptics");
|
|
const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner");
|
|
const { scrapeSkylane } = await import("./scrapers/skylane");
|
|
const { scrapeAscentOptics } = await import("./scrapers/ascentoptics");
|
|
const { scrapeGaoTek } = await import("./scrapers/gaotek");
|
|
// ── Prediction signal scrapers ────────────────────────────────────────
|
|
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
|
|
const { scrapeGithubSignals } = await import("./scrapers/github-signals");
|
|
const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity");
|
|
const { scrapeAiClusters } = await import("./scrapers/ai-clusters");
|
|
const { scrapeDistributorLeads }= await import("./scrapers/distributor-leads");
|
|
const { scrapeStandardsTracker }= await import("./scrapers/standards-tracker");
|
|
const { runForecastEngine } = await import("./utils/forecast-engine");
|
|
|
|
// ── Playwright scrapers ───────────────────────────────────────────────
|
|
|
|
await boss.work("scrape:pricing:fs", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
|
|
await withIsolatedStorage("fs", scrapeFs);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:10gtek", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
|
|
await withIsolatedStorage("10gtek", scrape10Gtek);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:atgbics", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
|
|
await withIsolatedStorage("atgbics", scrapeAtgbics);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:prolabs", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
|
|
await withIsolatedStorage("prolabs", scrapeProLabs);
|
|
});
|
|
|
|
// ── Fetch/Cheerio scrapers ────────────────────────────────────────────
|
|
|
|
await boss.work("scrape:pricing:fluxlight", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`);
|
|
await withIsolatedStorage("fluxlight", scrapeFluxlight);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:gbics", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: GBICs pricing`);
|
|
await withIsolatedStorage("gbics", scrapeGbics);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:optcore", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
|
|
await withIsolatedStorage("optcore", scrapeOptcore);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:champion-one", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`);
|
|
await withIsolatedStorage("champion-one", scrapeChampionOne);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:sfpcables", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: SFPCables pricing`);
|
|
await withIsolatedStorage("sfpcables", scrapeSfpCables);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:blueoptics", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`);
|
|
await withIsolatedStorage("blueoptics", scrapeBlueOptics);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:fiber24", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: ShopFiber24 pricing`);
|
|
await withIsolatedStorage("fiber24", scrapeFiber24);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:tscom", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`);
|
|
await withIsolatedStorage("tscom", scrapeTsCom);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:skylane", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Skylane Optics pricing`);
|
|
await withIsolatedStorage("skylane", scrapeSkylane);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:ascentoptics", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: AscentOptics pricing`);
|
|
await withIsolatedStorage("ascentoptics", scrapeAscentOptics);
|
|
});
|
|
|
|
await boss.work("scrape:pricing:gaotek", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`);
|
|
await withIsolatedStorage("gaotek", scrapeGaoTek);
|
|
});
|
|
|
|
// ── Catalog scrapers ──────────────────────────────────────────────────
|
|
|
|
await boss.work("scrape:pricing:flexoptix", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog`);
|
|
await scrapeFlexoptixCatalog();
|
|
});
|
|
|
|
await boss.work("scrape:catalog:smartoptics", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`);
|
|
await withIsolatedStorage("smartoptics", scrapeSmartOptics);
|
|
});
|
|
|
|
await boss.work("scrape:catalog:hubersuhner", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`);
|
|
await withIsolatedStorage("hubersuhner", scrapeHuberSuhner);
|
|
});
|
|
|
|
// ── Vendor lists ──────────────────────────────────────────────────────
|
|
|
|
await boss.work("scrape:vendors:flexoptix", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Flexoptix vendor list`);
|
|
await scrapeFlexoptixVendors();
|
|
});
|
|
|
|
await boss.work("scrape:vendors:flexoptix-supported", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Flexoptix supported vendors`);
|
|
await seedFlexoptixVendors();
|
|
});
|
|
|
|
// ── Compatibility scrapers ────────────────────────────────────────────
|
|
|
|
await boss.work("scrape:compat:cisco", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Cisco TMG compatibility`);
|
|
await withIsolatedStorage("cisco", scrapeCiscoTmg);
|
|
});
|
|
|
|
await boss.work("scrape:compat:juniper", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Juniper HCT compatibility`);
|
|
await withIsolatedStorage("juniper", scrapeJuniperHct);
|
|
});
|
|
|
|
await boss.work("scrape:compat:sonic", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: SONiC HCL compatibility`);
|
|
await withIsolatedStorage("sonic", scrapeSonicHcl);
|
|
});
|
|
|
|
await boss.work("scrape:compat:ufispace", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Ufispace switch data`);
|
|
await withIsolatedStorage("ufispace", scrapeUfiSpace);
|
|
});
|
|
|
|
await boss.work("scrape:compat:edgecore", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Edgecore switch data`);
|
|
await withIsolatedStorage("edgecore", scrapeEdgecore);
|
|
});
|
|
|
|
// ── Switch assets ─────────────────────────────────────────────────────
|
|
|
|
await boss.work("scrape:assets:switches", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Switch assets enrichment`);
|
|
await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets());
|
|
});
|
|
|
|
// ── eBay enrichment ───────────────────────────────────────────────────
|
|
|
|
await boss.work("enrich:ebay-transceivers", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: eBay transceiver pricing`);
|
|
const { enrichTransceiversFromEbay } = await import("./scrapers/ebay-enricher");
|
|
await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100));
|
|
});
|
|
|
|
await boss.work("enrich:ebay-switches", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: eBay switch enrichment`);
|
|
const { enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher");
|
|
await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30));
|
|
});
|
|
|
|
// ── Intelligence & community ──────────────────────────────────────────
|
|
|
|
await boss.work("scrape:market-intel", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
|
|
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
|
|
await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
|
|
});
|
|
|
|
await boss.work("scrape:nog-talks", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: NOG conference talks`);
|
|
const { scrapeNogTalks } = await import("./scrapers/nog-talks");
|
|
await scrapeNogTalks();
|
|
});
|
|
|
|
await boss.work("scrape:community-issues", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Community issues`);
|
|
const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues");
|
|
await withIsolatedStorage("community-issues", () => scrapeAllSwitchIssues(30));
|
|
});
|
|
|
|
await boss.work("scrape:datasheet-links", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Datasheet links`);
|
|
const { findAndSeedDatasheetLinks } = await import("./scrapers/community-issues");
|
|
await findAndSeedDatasheetLinks(50);
|
|
});
|
|
|
|
await boss.work("scrape:news", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: News aggregation`);
|
|
await scrapeNews();
|
|
});
|
|
|
|
await boss.work("scrape:faq", async () => {
|
|
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
|
|
});
|
|
|
|
await boss.work("scrape:docs", async () => {
|
|
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
|
|
});
|
|
|
|
// ── Compute jobs ──────────────────────────────────────────────────────
|
|
|
|
await boss.work("compute:abc", async () => {
|
|
console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
|
|
const { computeAbcClassification } = await import("./scrapers/market-intelligence");
|
|
await computeAbcClassification();
|
|
});
|
|
|
|
await boss.work("compute:reorder-signals", async () => {
|
|
console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
|
|
const { computeReorderSignals } = await import("./scrapers/market-intelligence");
|
|
await computeReorderSignals();
|
|
});
|
|
|
|
// ── NAS sync ──────────────────────────────────────────────────────────
|
|
|
|
await boss.work("sync:nas", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: NAS sync to Fearghas`);
|
|
const { runNightlyNasSync } = await import("./utils/nas-sync");
|
|
await runNightlyNasSync();
|
|
});
|
|
|
|
// ── Prediction signal scrapers ────────────────────────────────────────
|
|
|
|
await boss.work("scrape:signals:sec-edgar", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: SEC EDGAR CapEx`);
|
|
await scrapeSecEdgar();
|
|
});
|
|
|
|
await boss.work("scrape:signals:github", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: GitHub tech signals`);
|
|
await scrapeGithubSignals();
|
|
});
|
|
|
|
await boss.work("scrape:signals:ebay-velocity", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: eBay sold velocity`);
|
|
await scrapeEbayVelocity();
|
|
});
|
|
|
|
await boss.work("scrape:signals:ai-clusters", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: AI cluster announcements`);
|
|
await scrapeAiClusters();
|
|
});
|
|
|
|
await boss.work("scrape:signals:distributor-leads", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Distributor lead times`);
|
|
await scrapeDistributorLeads();
|
|
});
|
|
|
|
await boss.work("scrape:signals:standards", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Standards tracker`);
|
|
await scrapeStandardsTracker();
|
|
});
|
|
|
|
// ── Forecast engine ───────────────────────────────────────────────────
|
|
|
|
await boss.work("compute:forecast", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Forecast engine`);
|
|
await runForecastEngine();
|
|
});
|
|
|
|
// ── Form-factor coverage scrapers ─────────────────────────────────────
|
|
|
|
await boss.work("scrape:pricing:comms-express", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`);
|
|
const { scrapeCommsExpress } = await import("./scrapers/comms-express");
|
|
await scrapeCommsExpress();
|
|
});
|
|
|
|
await boss.work("scrape:pricing:router-switch", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`);
|
|
const { scrapeRouterSwitch } = await import("./scrapers/router-switch");
|
|
await scrapeRouterSwitch();
|
|
});
|
|
|
|
await boss.work("scrape:pricing:multimode-inc", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`);
|
|
const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc");
|
|
await scrapeMultimodeInc();
|
|
});
|
|
|
|
await boss.work("scrape:pricing:optictransceiver", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`);
|
|
const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver");
|
|
await scrapeOpticTransceiver();
|
|
});
|
|
|
|
await boss.work("scrape:pricing:wiitek", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`);
|
|
const { scrapeWiitek } = await import("./scrapers/wiitek");
|
|
await scrapeWiitek();
|
|
});
|
|
|
|
console.log("All workers registered (55 jobs, 24/7 continuous)");
|
|
}
|