diff --git a/packages/scraper/src/index.ts b/packages/scraper/src/index.ts index ca13099..3fc0384 100644 --- a/packages/scraper/src/index.ts +++ b/packages/scraper/src/index.ts @@ -170,6 +170,26 @@ async function runScheduler(): Promise { console.log("Mode: Scheduler (pg-boss)\n"); const boss = await createScheduler(); + + // Cleanup zombie jobs left by previous daemon crash. + // Active jobs > 5 min old on startup = orphaned (prev daemon died mid-run). + // They won't be re-queued until next cron tick without this cleanup. + try { + const { rowCount } = await pool.query(` + UPDATE pgboss.job + SET state = 'failed'::pgboss.job_state, + completed_on = NOW(), + output = '{"message":"startup_zombie_cleanup"}'::jsonb + WHERE state = 'active' + AND started_on < NOW() - INTERVAL '5 minutes' + `); + if (rowCount && rowCount > 0) { + console.log(`Startup cleanup: ${rowCount} zombie job(s) marked failed.\n`); + } + } catch (err) { + console.warn("Startup zombie cleanup failed (non-fatal):", (err as Error).message); + } + await registerSchedules(boss); await registerWorkers(boss); diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 891cd59..ef2afcc 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -118,6 +118,10 @@ export async function registerSchedules(boss: PgBoss): Promise { "scrape:pricing:multimode-inc", "scrape:pricing:optictransceiver", "scrape:pricing:wiitek", + // ── Fetch-based catalog+pricing scrapers (every 2h) ────────────── + "scrape:pricing:naddod", + "scrape:pricing:qsfptek", + "scrape:pricing:addon", // ── Prediction Signal Scrapers (new) ────────────────────────────── "scrape:signals:sec-edgar", "scrape:signals:github", @@ -167,6 +171,11 @@ export async function registerSchedules(boss: PgBoss): Promise { await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + // Fetch-based scrapers running on Erik — every 2h, end-of-cycle slots + await boss.schedule("scrape:pricing:naddod", "48 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:qsfptek", "52 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:addon", "55 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + // ══════════════════════════════════════════════════════════════════════ // FLEXOPTIX CATALOG — every 2h (primary price source) // ══════════════════════════════════════════════════════════════════════ @@ -513,5 +522,23 @@ export async function registerWorkers(boss: PgBoss): Promise { await scrapeWiitek(); }); - console.log("All workers registered (55 jobs, 24/7 continuous)"); + await boss.work("scrape:pricing:naddod", async () => { + console.log(`[${new Date().toISOString()}] Running: NADDOD pricing`); + const { scrapeNaddod } = await import("./scrapers/naddod"); + await scrapeNaddod(); + }); + + await boss.work("scrape:pricing:qsfptek", async () => { + console.log(`[${new Date().toISOString()}] Running: QSFPTEK pricing`); + const { scrapeQsfptek } = await import("./scrapers/qsfptek"); + await scrapeQsfptek(); + }); + + await boss.work("scrape:pricing:addon", async () => { + console.log(`[${new Date().toISOString()}] Running: AddOn Networks pricing`); + const { scrapeAddonNetworks } = await import("./scrapers/addon-networks"); + await scrapeAddonNetworks(); + }); + + console.log("All workers registered (58 jobs, 24/7 continuous)"); } diff --git a/packages/scraper/src/scrapers/comms-express.ts b/packages/scraper/src/scrapers/comms-express.ts index a41afb3..1f7e2ce 100644 --- a/packages/scraper/src/scrapers/comms-express.ts +++ b/packages/scraper/src/scrapers/comms-express.ts @@ -103,16 +103,14 @@ export async function scrapeCommsExpress(): Promise { try { const products = await fetchCategory(cat, vendorId); for (const p of products) { - const transceiverResult = await findOrCreateScrapedTransceiver({ + const transceiverId = await findOrCreateScrapedTransceiver({ partNumber: p.partNumber, vendorId, formFactor: p.formFactor, - name: p.name, - url: p.url, }); const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`); const isNew = await upsertPriceObservation({ - transceiverId: transceiverResult.id, + transceiverId, sourceVendorId: vendorId, price: p.price, currency: p.currency, diff --git a/packages/scraper/src/scrapers/community-issues.ts b/packages/scraper/src/scrapers/community-issues.ts index a88a3fb..6aa0669 100644 --- a/packages/scraper/src/scrapers/community-issues.ts +++ b/packages/scraper/src/scrapers/community-issues.ts @@ -243,12 +243,12 @@ If no issues found, return []`; const issue: ExtractedIssue = { productModel: model, title: intelResult.title.substring(0, 200), - summary: intelResult.description?.substring(0, 500) || "", - severity: determineSeverity(intelResult.description || intelResult.title), - issueTags: extractIssueTags(`${intelResult.title} ${intelResult.description}`), + summary: intelResult.summary?.substring(0, 500) || "", + severity: determineSeverity(intelResult.summary || intelResult.title), + issueTags: extractIssueTags(`${intelResult.title} ${intelResult.summary}`), affectedFirmware: null, fixFirmware: null, - dateReported: intelResult.publishedDate || null, + dateReported: intelResult.published_at || null, isResolved: false, confidence: intelResult.confidence || 0.6, }; diff --git a/packages/scraper/src/scrapers/ebay-enricher.ts b/packages/scraper/src/scrapers/ebay-enricher.ts index 99999c4..7d1dd4e 100644 --- a/packages/scraper/src/scrapers/ebay-enricher.ts +++ b/packages/scraper/src/scrapers/ebay-enricher.ts @@ -73,10 +73,11 @@ function extractWarranty(text: string): number | null { return null; } -async function parseSearchResults($: cheerio.CheerioAPI, baseUrl: string): Promise> { +// eslint-disable-next-line @typescript-eslint/no-explicit-any +async function parseSearchResults($: any, baseUrl: string): Promise> { const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = []; - $(".s-item").each((_, el) => { + $(".s-item").each((_: number, el: unknown) => { const titleEl = $(el).find(".s-item__title"); const priceEl = $(el).find(".s-item__price"); const condEl = $(el).find(".SECONDARY_INFO"); @@ -151,7 +152,6 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise { if (price <= 0) continue; try { - const t = await findOrCreateScrapedTransceiver({ - partNumber, vendorId, formFactor: cat.form_factor, name, - url: href.startsWith("http") ? href : `${BASE}${href}`, + const transceiverId = await findOrCreateScrapedTransceiver({ + partNumber, vendorId, formFactor: cat.form_factor, }); const isNew = await upsertPriceObservation({ - transceiverId: t.id, sourceVendorId: vendorId, + transceiverId, sourceVendorId: vendorId, price, currency: currency || "USD", stockLevel: "unknown", url: href.startsWith("http") ? href : `${BASE}${href}`, diff --git a/packages/scraper/src/scrapers/optictransceiver.ts b/packages/scraper/src/scrapers/optictransceiver.ts index 0b8a154..ef640d0 100644 --- a/packages/scraper/src/scrapers/optictransceiver.ts +++ b/packages/scraper/src/scrapers/optictransceiver.ts @@ -65,12 +65,11 @@ async function scrapeCategory(path: string, form_factor: string, vendorId: strin if (price <= 0) continue; try { - const t = await findOrCreateScrapedTransceiver({ - partNumber, vendorId, formFactor: form_factor, name, - url: href.startsWith("http") ? href : `${BASE}${href}`, + const transceiverId = await findOrCreateScrapedTransceiver({ + partNumber, vendorId, formFactor: form_factor, }); await upsertPriceObservation({ - transceiverId: t.id, sourceVendorId: vendorId, + transceiverId, sourceVendorId: vendorId, price, currency: currency || "USD", stockLevel: "unknown", url: href.startsWith("http") ? href : `${BASE}${href}`, diff --git a/packages/scraper/src/scrapers/router-switch.ts b/packages/scraper/src/scrapers/router-switch.ts index a3ac564..76f4a28 100644 --- a/packages/scraper/src/scrapers/router-switch.ts +++ b/packages/scraper/src/scrapers/router-switch.ts @@ -62,15 +62,13 @@ async function fetchPage(catUrl: string, form_factor: string, vendorId: string, const productUrl = href.startsWith("http") ? href : `${BASE}${href}`; try { - const t = await findOrCreateScrapedTransceiver({ + const transceiverId = await findOrCreateScrapedTransceiver({ partNumber, vendorId, formFactor: form_factor, - name, - url: productUrl, }); await upsertPriceObservation({ - transceiverId: t.id, + transceiverId, sourceVendorId: vendorId, price, currency: currency || "USD",