Rene Fichtmueller 61685f3959 Merge remote-tracking branch 'github/main'
# Conflicts:
#	packages/api/src/llm/fo-blog-pipeline.ts
#	packages/api/src/routes/blog.ts
#	packages/scraper/src/scheduler.ts
#	packages/scraper/src/scrapers/fs-com.ts
#	packages/scraper/src/scrapers/gbics.ts
2026-04-06 18:03:36 +02:00

593 lines
34 KiB
TypeScript

/**
* pg-boss Job Scheduler — 24/7 Continuous Scraping
*
* ARCHITECTURE:
* - Erik (VPS, .82) : Playwright-heavy scrapers (FS.com, 10Gtek, ATGBICS, ProLabs)
* + all compatibility + eBay + compute + NAS sync
* - Raspberry Pi Fleet : Lightweight fetch/cheerio scrapers run continuously all day
* (BlueOptics, Fiber24, T&S Com, Fluxlight, GBICs, Optcore,
* Champion ONE, SFPCables, SmartOptics, HUBER+SUHNER, etc.)
*
* SCHEDULE PHILOSOPHY:
* - Playwright scrapers: every 8h (resource-heavy, VPS only)
* - Fetch/Cheerio scrapers: every 4h (lightweight, Pi-friendly)
* - Catalog scrapers (Flexoptix): every 2h (fast GraphQL, primary price source)
* - Compatibility matrices: every 12h (rarely change)
* - eBay enrichment: every 6h
* - Intelligence/community: every 6h
* - Compute jobs: after each pricing wave
* - NAS sync: nightly at 07:55
*/
import PgBoss from "pg-boss";
import { config } from "dotenv";
import { join } from "path";
import { rmSync, mkdirSync } from "fs";
/** Run a scraper with an isolated Crawlee storage directory to prevent queue collisions */
async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promise<void> {
const dir = join(__dirname, "..", "..", "..", `storage-${name}`);
// Pre-create Crawlee's internal subdirectory tree to avoid ENOENT races
mkdirSync(join(dir, "request_queues", "default"), { recursive: true });
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
const prev = process.env.CRAWLEE_STORAGE_DIR;
process.env.CRAWLEE_STORAGE_DIR = dir;
try {
await fn();
} finally {
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
// Clean up after successful run
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
}
}
config({ path: join(__dirname, "..", "..", "..", ".env") });
const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`;
export async function createScheduler(): Promise<PgBoss> {
const boss = new PgBoss({
connectionString,
retryLimit: 3,
retryDelay: 30,
retryBackoff: true,
expireInSeconds: 300,
monitorStateIntervalSeconds: 60,
});
boss.on("error", (error) => console.error("pg-boss error:", error));
await boss.start();
console.log("pg-boss scheduler started");
return boss;
}
export async function registerSchedules(boss: PgBoss): Promise<void> {
const queues = [
// ── Playwright scrapers (Erik, every 8h) ───────────────────────────
"scrape:pricing:fs",
"scrape:pricing:10gtek",
"scrape:pricing:atgbics",
"scrape:pricing:prolabs",
// ── Fetch/Cheerio scrapers (Pi-friendly, every 4h) ─────────────────
"scrape:pricing:fluxlight",
"scrape:pricing:gbics",
"scrape:pricing:optcore",
"scrape:pricing:champion-one",
"scrape:pricing:sfpcables",
"scrape:pricing:blueoptics",
"scrape:pricing:fiber24",
"scrape:pricing:tscom",
"scrape:pricing:skylane",
"scrape:pricing:ascentoptics",
"scrape:pricing:gaotek",
// ── Catalog scrapers (every 2h) ────────────────────────────────────
"scrape:pricing:flexoptix",
// ── Manufacturer catalogs (every 8h, no prices) ────────────────────
"scrape:catalog:smartoptics",
"scrape:catalog:hubersuhner",
// ── Vendor lists ───────────────────────────────────────────────────
"scrape:vendors:flexoptix",
"scrape:vendors:flexoptix-supported",
// ── Compatibility (every 12h) ──────────────────────────────────────
"scrape:compat:cisco",
"scrape:compat:juniper",
"scrape:compat:sonic",
"scrape:compat:ufispace",
"scrape:compat:edgecore",
// ── Switch enrichment (every 12h) ─────────────────────────────────
"scrape:assets:switches",
// ── eBay enrichment (every 6h) ────────────────────────────────────
"enrich:ebay-transceivers",
"enrich:ebay-switches",
// ── Intelligence & community (every 6h) ───────────────────────────
"scrape:market-intel",
"scrape:nog-talks",
"scrape:community-issues",
"scrape:datasheet-links",
"scrape:news",
"scrape:faq",
"scrape:docs",
// ── Compute (every 4h, after pricing waves) ───────────────────────
"compute:abc",
"compute:reorder-signals",
// ── New form-factor coverage scrapers (every 8h) ──────────────────
"scrape:pricing:comms-express",
"scrape:pricing:router-switch",
"scrape:pricing:multimode-inc",
"scrape:pricing:optictransceiver",
"scrape:pricing:wiitek",
// ── Prediction Signal Scrapers (new) ──────────────────────────────
"scrape:signals:sec-edgar",
"scrape:signals:github",
"scrape:signals:ebay-velocity",
"scrape:signals:ai-clusters",
"scrape:signals:distributor-leads",
"scrape:signals:standards",
// ── Forecast Engine ───────────────────────────────────────────────
"compute:forecast",
// ── Sync ──────────────────────────────────────────────────────────
"sync:nas",
];
for (const q of queues) {
await boss.createQueue(q).catch(() => { /* already exists */ });
}
// ══════════════════════════════════════════════════════════════════════
// ══════════════════════════════════════════════════════════════════════
// ALL PRICING SCRAPERS — 24/7, every 2h, staggered by 10min
// Goal: complete competitor coverage, no gaps, database always fresh
// ══════════════════════════════════════════════════════════════════════
// Playwright scrapers (resource-heavy) — every 2h, 10min apart
await boss.schedule("scrape:pricing:fs", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:10gtek", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:atgbics", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:prolabs", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Fetch/Cheerio scrapers (lightweight) — every 2h, 5min apart
await boss.schedule("scrape:pricing:fluxlight", "0 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:gbics", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:optcore", "10 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:champion-one", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:sfpcables", "20 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:blueoptics", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:fiber24", "30 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:tscom", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:skylane", "40 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:ascentoptics", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:gaotek", "50 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Form-factor coverage scrapers — every 2h
await boss.schedule("scrape:pricing:comms-express", "5 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:router-switch", "15 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:multimode-inc", "25 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FLEXOPTIX CATALOG — every 2h (primary price source)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// MANUFACTURER CATALOGS — every 4h (product data, no prices)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:catalog:smartoptics", "10 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:catalog:hubersuhner", "25 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// VENDOR LISTS — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:vendors:flexoptix", "0 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:vendors:flexoptix-supported", "15 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// ══════════════════════════════════════════════════════════════════════
// COMPATIBILITY MATRICES — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:compat:cisco", "0 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:juniper", "15 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:sonic", "30 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:compat:ufispace", "45 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:compat:edgecore", "55 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// ══════════════════════════════════════════════════════════════════════
// SWITCH ASSETS — every 12h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// EBAY ENRICHMENT — every 6h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("enrich:ebay-transceivers", "0 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
await boss.schedule("enrich:ebay-switches", "30 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
// ══════════════════════════════════════════════════════════════════════
// INTELLIGENCE & COMMUNITY — every 6h
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// NOG conference talks — weekly on Mondays 06:00 UTC
await boss.schedule("scrape:nog-talks", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 7200 });
await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
await boss.schedule("scrape:faq", "40 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:docs", "50 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// COMPUTE JOBS — every 4h (after pricing waves settle)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("compute:abc", "50 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
await boss.schedule("compute:reorder-signals", "55 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
// ══════════════════════════════════════════════════════════════════════
// PREDICTION SIGNAL SCRAPERS
// ══════════════════════════════════════════════════════════════════════
// SEC EDGAR CapEx — weekly Monday 06:00 (filings don't change that fast)
await boss.schedule("scrape:signals:sec-edgar", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 3600 });
// GitHub signals — weekly Sunday 05:00
await boss.schedule("scrape:signals:github", "0 5 * * 0", {}, { retryLimit: 2, expireInSeconds: 7200 });
// eBay sold velocity — every 12h (fast-moving market signal)
await boss.schedule("scrape:signals:ebay-velocity", "0 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// AI cluster RSS feeds — every 4h (news moves fast)
await boss.schedule("scrape:signals:ai-clusters", "10 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
// Distributor lead times — daily 03:30 (stock changes overnight)
await boss.schedule("scrape:signals:distributor-leads","30 3 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Standards tracker — weekly Wednesday 04:00 (standards move slowly)
await boss.schedule("scrape:signals:standards", "0 4 * * 3", {}, { retryLimit: 1, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FORECAST ENGINE — daily at 08:00 (after all nightly scrapers done)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("compute:forecast", "0 8 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
// ══════════════════════════════════════════════════════════════════════
// NAS SYNC — nightly
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("sync:nas", "55 7 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 });
console.log("All schedules registered — 24/7 continuous scraping (50 jobs)");
}
export async function registerWorkers(boss: PgBoss): Promise<void> {
// Lazy-load all scrapers
const { scrapeFs } = await import("./scrapers/fs-com");
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
const { scrapeOptcore } = await import("./scrapers/optcore");
const { scrape10Gtek } = await import("./scrapers/tenGtek");
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors");
const { scrapeNews } = await import("./scrapers/news");
const { scrapeAtgbics } = await import("./scrapers/atgbics");
const { scrapeProLabs } = await import("./scrapers/prolabs");
const { scrapeChampionOne } = await import("./scrapers/champion-one");
const { scrapeFluxlight } = await import("./scrapers/fluxlight");
const { scrapeGbics } = await import("./scrapers/gbics");
const { scrapeSfpCables } = await import("./scrapers/sfpcables");
const { scrapeJuniperHct } = await import("./scrapers/juniper-hct");
const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl");
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
const { scrapeEdgecore } = await import("./scrapers/edgecore");
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
const { scrapeBlueOptics } = await import("./scrapers/blueoptics");
const { scrapeFiber24 } = await import("./scrapers/fiber24");
const { scrapeTsCom } = await import("./scrapers/tscom");
const { scrapeSmartOptics } = await import("./scrapers/smartoptics");
const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner");
const { scrapeSkylane } = await import("./scrapers/skylane");
const { scrapeAscentOptics } = await import("./scrapers/ascentoptics");
const { scrapeGaoTek } = await import("./scrapers/gaotek");
// ── Prediction signal scrapers ────────────────────────────────────────
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
const { scrapeGithubSignals } = await import("./scrapers/github-signals");
const { scrapeEbayVelocity } = await import("./scrapers/ebay-velocity");
const { scrapeAiClusters } = await import("./scrapers/ai-clusters");
const { scrapeDistributorLeads }= await import("./scrapers/distributor-leads");
const { scrapeStandardsTracker }= await import("./scrapers/standards-tracker");
const { runForecastEngine } = await import("./utils/forecast-engine");
// ── Playwright scrapers ───────────────────────────────────────────────
await boss.work("scrape:pricing:fs", async () => {
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
await withIsolatedStorage("fs", scrapeFs);
});
await boss.work("scrape:pricing:10gtek", async () => {
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
await withIsolatedStorage("10gtek", scrape10Gtek);
});
await boss.work("scrape:pricing:atgbics", async () => {
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
await withIsolatedStorage("atgbics", scrapeAtgbics);
});
await boss.work("scrape:pricing:prolabs", async () => {
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
await withIsolatedStorage("prolabs", scrapeProLabs);
});
// ── Fetch/Cheerio scrapers ────────────────────────────────────────────
await boss.work("scrape:pricing:fluxlight", async () => {
console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`);
await withIsolatedStorage("fluxlight", scrapeFluxlight);
});
await boss.work("scrape:pricing:gbics", async () => {
console.log(`[${new Date().toISOString()}] Running: GBICs pricing`);
await withIsolatedStorage("gbics", scrapeGbics);
});
await boss.work("scrape:pricing:optcore", async () => {
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
await withIsolatedStorage("optcore", scrapeOptcore);
});
await boss.work("scrape:pricing:champion-one", async () => {
console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`);
await withIsolatedStorage("champion-one", scrapeChampionOne);
});
await boss.work("scrape:pricing:sfpcables", async () => {
console.log(`[${new Date().toISOString()}] Running: SFPCables pricing`);
await withIsolatedStorage("sfpcables", scrapeSfpCables);
});
await boss.work("scrape:pricing:blueoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`);
await withIsolatedStorage("blueoptics", scrapeBlueOptics);
});
await boss.work("scrape:pricing:fiber24", async () => {
console.log(`[${new Date().toISOString()}] Running: ShopFiber24 pricing`);
await withIsolatedStorage("fiber24", scrapeFiber24);
});
await boss.work("scrape:pricing:tscom", async () => {
console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`);
await withIsolatedStorage("tscom", scrapeTsCom);
});
await boss.work("scrape:pricing:skylane", async () => {
console.log(`[${new Date().toISOString()}] Running: Skylane Optics pricing`);
await withIsolatedStorage("skylane", scrapeSkylane);
});
await boss.work("scrape:pricing:ascentoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: AscentOptics pricing`);
await withIsolatedStorage("ascentoptics", scrapeAscentOptics);
});
await boss.work("scrape:pricing:gaotek", async () => {
console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`);
await withIsolatedStorage("gaotek", scrapeGaoTek);
});
// ── Catalog scrapers ──────────────────────────────────────────────────
await boss.work("scrape:pricing:flexoptix", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog`);
await scrapeFlexoptixCatalog();
});
await boss.work("scrape:catalog:smartoptics", async () => {
console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`);
await withIsolatedStorage("smartoptics", scrapeSmartOptics);
});
await boss.work("scrape:catalog:hubersuhner", async () => {
console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`);
await withIsolatedStorage("hubersuhner", scrapeHuberSuhner);
});
// ── Vendor lists ──────────────────────────────────────────────────────
await boss.work("scrape:vendors:flexoptix", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix vendor list`);
await scrapeFlexoptixVendors();
});
await boss.work("scrape:vendors:flexoptix-supported", async () => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix supported vendors`);
await seedFlexoptixVendors();
});
// ── Compatibility scrapers ────────────────────────────────────────────
await boss.work("scrape:compat:cisco", async () => {
console.log(`[${new Date().toISOString()}] Running: Cisco TMG compatibility`);
await withIsolatedStorage("cisco", scrapeCiscoTmg);
});
await boss.work("scrape:compat:juniper", async () => {
console.log(`[${new Date().toISOString()}] Running: Juniper HCT compatibility`);
await withIsolatedStorage("juniper", scrapeJuniperHct);
});
await boss.work("scrape:compat:sonic", async () => {
console.log(`[${new Date().toISOString()}] Running: SONiC HCL compatibility`);
await withIsolatedStorage("sonic", scrapeSonicHcl);
});
await boss.work("scrape:compat:ufispace", async () => {
console.log(`[${new Date().toISOString()}] Running: Ufispace switch data`);
await withIsolatedStorage("ufispace", scrapeUfiSpace);
});
await boss.work("scrape:compat:edgecore", async () => {
console.log(`[${new Date().toISOString()}] Running: Edgecore switch data`);
await withIsolatedStorage("edgecore", scrapeEdgecore);
});
// ── Switch assets ─────────────────────────────────────────────────────
await boss.work("scrape:assets:switches", async () => {
console.log(`[${new Date().toISOString()}] Running: Switch assets enrichment`);
await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets());
});
// ── eBay enrichment ───────────────────────────────────────────────────
await boss.work("enrich:ebay-transceivers", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay transceiver pricing`);
const { enrichTransceiversFromEbay } = await import("./scrapers/ebay-enricher");
await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100));
});
await boss.work("enrich:ebay-switches", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay switch enrichment`);
const { enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher");
await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30));
});
// ── Intelligence & community ──────────────────────────────────────────
await boss.work("scrape:market-intel", async () => {
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
});
await boss.work("scrape:nog-talks", async () => {
console.log(`[${new Date().toISOString()}] Running: NOG conference talks`);
const { scrapeNogTalks } = await import("./scrapers/nog-talks");
await scrapeNogTalks();
});
await boss.work("scrape:community-issues", async () => {
console.log(`[${new Date().toISOString()}] Running: Community issues`);
const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues");
await withIsolatedStorage("community-issues", () => scrapeAllSwitchIssues(30));
});
await boss.work("scrape:datasheet-links", async () => {
console.log(`[${new Date().toISOString()}] Running: Datasheet links`);
const { findAndSeedDatasheetLinks } = await import("./scrapers/community-issues");
await findAndSeedDatasheetLinks(50);
});
await boss.work("scrape:news", async () => {
console.log(`[${new Date().toISOString()}] Running: News aggregation`);
await scrapeNews();
});
await boss.work("scrape:faq", async () => {
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
});
await boss.work("scrape:docs", async () => {
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
});
// ── Compute jobs ──────────────────────────────────────────────────────
await boss.work("compute:abc", async () => {
console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
const { computeAbcClassification } = await import("./scrapers/market-intelligence");
await computeAbcClassification();
});
await boss.work("compute:reorder-signals", async () => {
console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
const { computeReorderSignals } = await import("./scrapers/market-intelligence");
await computeReorderSignals();
});
// ── NAS sync ──────────────────────────────────────────────────────────
await boss.work("sync:nas", async () => {
console.log(`[${new Date().toISOString()}] Running: NAS sync to Fearghas`);
const { runNightlyNasSync } = await import("./utils/nas-sync");
await runNightlyNasSync();
});
// ── Prediction signal scrapers ────────────────────────────────────────
await boss.work("scrape:signals:sec-edgar", async () => {
console.log(`[${new Date().toISOString()}] Running: SEC EDGAR CapEx`);
await scrapeSecEdgar();
});
await boss.work("scrape:signals:github", async () => {
console.log(`[${new Date().toISOString()}] Running: GitHub tech signals`);
await scrapeGithubSignals();
});
await boss.work("scrape:signals:ebay-velocity", async () => {
console.log(`[${new Date().toISOString()}] Running: eBay sold velocity`);
await scrapeEbayVelocity();
});
await boss.work("scrape:signals:ai-clusters", async () => {
console.log(`[${new Date().toISOString()}] Running: AI cluster announcements`);
await scrapeAiClusters();
});
await boss.work("scrape:signals:distributor-leads", async () => {
console.log(`[${new Date().toISOString()}] Running: Distributor lead times`);
await scrapeDistributorLeads();
});
await boss.work("scrape:signals:standards", async () => {
console.log(`[${new Date().toISOString()}] Running: Standards tracker`);
await scrapeStandardsTracker();
});
// ── Forecast engine ───────────────────────────────────────────────────
await boss.work("compute:forecast", async () => {
console.log(`[${new Date().toISOString()}] Running: Forecast engine`);
await runForecastEngine();
});
// ── Form-factor coverage scrapers ─────────────────────────────────────
await boss.work("scrape:pricing:comms-express", async () => {
console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`);
const { scrapeCommsExpress } = await import("./scrapers/comms-express");
await scrapeCommsExpress();
});
await boss.work("scrape:pricing:router-switch", async () => {
console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`);
const { scrapeRouterSwitch } = await import("./scrapers/router-switch");
await scrapeRouterSwitch();
});
await boss.work("scrape:pricing:multimode-inc", async () => {
console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`);
const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc");
await scrapeMultimodeInc();
});
await boss.work("scrape:pricing:optictransceiver", async () => {
console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`);
const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver");
await scrapeOpticTransceiver();
});
await boss.work("scrape:pricing:wiitek", async () => {
console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`);
const { scrapeWiitek } = await import("./scrapers/wiitek");
await scrapeWiitek();
});
console.log("All workers registered (55 jobs, 24/7 continuous)");
}