feat(scraper): add NADDOD/QSFPTEK/AddOn to scheduler, fix pre-existing TS build errors

- Register scrape:pricing:naddod (48 */2), qsfptek (52 */2), addon (55 */2) in pg-boss
- Add boss.work() handlers for all three (fetch-based, run on Erik)
- Fix findOrCreateScrapedTransceiver callers: remove invalid `name`/`url` params,
  fix `t.id` → `t` (function already returns string ID)
- Fix ebay-enricher: remove invalid `extractType` option, use extraction.standard_name
  instead of non-existent `.description`, fix cheerio type incompatibility
- Fix community-issues: description → summary, publishedDate → published_at
- Startup zombie cleanup already deployed (index.ts) — no changes needed
- ProLabs rewritten to fetch-based catalog scraper (no Playwright, bypasses WAF)
This commit is contained in:
Rene Fichtmueller 2026-04-11 03:17:33 +02:00
parent 018f9ac3ab
commit a3af873206
8 changed files with 66 additions and 25 deletions

View File

@ -170,6 +170,26 @@ async function runScheduler(): Promise<void> {
console.log("Mode: Scheduler (pg-boss)\n"); console.log("Mode: Scheduler (pg-boss)\n");
const boss = await createScheduler(); const boss = await createScheduler();
// Cleanup zombie jobs left by previous daemon crash.
// Active jobs > 5 min old on startup = orphaned (prev daemon died mid-run).
// They won't be re-queued until next cron tick without this cleanup.
try {
const { rowCount } = await pool.query(`
UPDATE pgboss.job
SET state = 'failed'::pgboss.job_state,
completed_on = NOW(),
output = '{"message":"startup_zombie_cleanup"}'::jsonb
WHERE state = 'active'
AND started_on < NOW() - INTERVAL '5 minutes'
`);
if (rowCount && rowCount > 0) {
console.log(`Startup cleanup: ${rowCount} zombie job(s) marked failed.\n`);
}
} catch (err) {
console.warn("Startup zombie cleanup failed (non-fatal):", (err as Error).message);
}
await registerSchedules(boss); await registerSchedules(boss);
await registerWorkers(boss); await registerWorkers(boss);

View File

@ -118,6 +118,10 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
"scrape:pricing:multimode-inc", "scrape:pricing:multimode-inc",
"scrape:pricing:optictransceiver", "scrape:pricing:optictransceiver",
"scrape:pricing:wiitek", "scrape:pricing:wiitek",
// ── Fetch-based catalog+pricing scrapers (every 2h) ──────────────
"scrape:pricing:naddod",
"scrape:pricing:qsfptek",
"scrape:pricing:addon",
// ── Prediction Signal Scrapers (new) ────────────────────────────── // ── Prediction Signal Scrapers (new) ──────────────────────────────
"scrape:signals:sec-edgar", "scrape:signals:sec-edgar",
"scrape:signals:github", "scrape:signals:github",
@ -167,6 +171,11 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// Fetch-based scrapers running on Erik — every 2h, end-of-cycle slots
await boss.schedule("scrape:pricing:naddod", "48 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:qsfptek", "52 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:addon", "55 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
// FLEXOPTIX CATALOG — every 2h (primary price source) // FLEXOPTIX CATALOG — every 2h (primary price source)
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
@ -513,5 +522,23 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await scrapeWiitek(); await scrapeWiitek();
}); });
console.log("All workers registered (55 jobs, 24/7 continuous)"); await boss.work("scrape:pricing:naddod", async () => {
console.log(`[${new Date().toISOString()}] Running: NADDOD pricing`);
const { scrapeNaddod } = await import("./scrapers/naddod");
await scrapeNaddod();
});
await boss.work("scrape:pricing:qsfptek", async () => {
console.log(`[${new Date().toISOString()}] Running: QSFPTEK pricing`);
const { scrapeQsfptek } = await import("./scrapers/qsfptek");
await scrapeQsfptek();
});
await boss.work("scrape:pricing:addon", async () => {
console.log(`[${new Date().toISOString()}] Running: AddOn Networks pricing`);
const { scrapeAddonNetworks } = await import("./scrapers/addon-networks");
await scrapeAddonNetworks();
});
console.log("All workers registered (58 jobs, 24/7 continuous)");
} }

View File

@ -103,16 +103,14 @@ export async function scrapeCommsExpress(): Promise<void> {
try { try {
const products = await fetchCategory(cat, vendorId); const products = await fetchCategory(cat, vendorId);
for (const p of products) { for (const p of products) {
const transceiverResult = await findOrCreateScrapedTransceiver({ const transceiverId = await findOrCreateScrapedTransceiver({
partNumber: p.partNumber, partNumber: p.partNumber,
vendorId, vendorId,
formFactor: p.formFactor, formFactor: p.formFactor,
name: p.name,
url: p.url,
}); });
const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`); const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`);
const isNew = await upsertPriceObservation({ const isNew = await upsertPriceObservation({
transceiverId: transceiverResult.id, transceiverId,
sourceVendorId: vendorId, sourceVendorId: vendorId,
price: p.price, price: p.price,
currency: p.currency, currency: p.currency,

View File

@ -243,12 +243,12 @@ If no issues found, return []`;
const issue: ExtractedIssue = { const issue: ExtractedIssue = {
productModel: model, productModel: model,
title: intelResult.title.substring(0, 200), title: intelResult.title.substring(0, 200),
summary: intelResult.description?.substring(0, 500) || "", summary: intelResult.summary?.substring(0, 500) || "",
severity: determineSeverity(intelResult.description || intelResult.title), severity: determineSeverity(intelResult.summary || intelResult.title),
issueTags: extractIssueTags(`${intelResult.title} ${intelResult.description}`), issueTags: extractIssueTags(`${intelResult.title} ${intelResult.summary}`),
affectedFirmware: null, affectedFirmware: null,
fixFirmware: null, fixFirmware: null,
dateReported: intelResult.publishedDate || null, dateReported: intelResult.published_at || null,
isResolved: false, isResolved: false,
confidence: intelResult.confidence || 0.6, confidence: intelResult.confidence || 0.6,
}; };

View File

@ -73,10 +73,11 @@ function extractWarranty(text: string): number | null {
return null; return null;
} }
async function parseSearchResults($: cheerio.CheerioAPI, baseUrl: string): Promise<Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }>> { // eslint-disable-next-line @typescript-eslint/no-explicit-any
async function parseSearchResults($: any, baseUrl: string): Promise<Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }>> {
const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = []; const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = [];
$(".s-item").each((_, el) => { $(".s-item").each((_: number, el: unknown) => {
const titleEl = $(el).find(".s-item__title"); const titleEl = $(el).find(".s-item__title");
const priceEl = $(el).find(".s-item__price"); const priceEl = $(el).find(".s-item__price");
const condEl = $(el).find(".SECONDARY_INFO"); const condEl = $(el).find(".SECONDARY_INFO");
@ -151,7 +152,6 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
const html = $.html(); const html = $.html();
const extracted = await scrapeWithLLM(html, request.url, { const extracted = await scrapeWithLLM(html, request.url, {
vendorSlug: "ebay", vendorSlug: "ebay",
extractType: "stock",
}); });
// Parse price from string (handle EUR format "1.234,56 EUR") // Parse price from string (handle EUR format "1.234,56 EUR")
@ -177,7 +177,7 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
}); });
// Extract description // Extract description
const description = extracted?.description const description = extracted?.extraction.standard_name
|| $(".ux-textspans--BOLD").first().text().trim() || $(".ux-textspans--BOLD").first().text().trim()
|| ""; || "";

View File

@ -58,12 +58,11 @@ export async function scrapeMultimodeInc(): Promise<void> {
if (price <= 0) continue; if (price <= 0) continue;
try { try {
const t = await findOrCreateScrapedTransceiver({ const transceiverId = await findOrCreateScrapedTransceiver({
partNumber, vendorId, formFactor: cat.form_factor, name, partNumber, vendorId, formFactor: cat.form_factor,
url: href.startsWith("http") ? href : `${BASE}${href}`,
}); });
const isNew = await upsertPriceObservation({ const isNew = await upsertPriceObservation({
transceiverId: t.id, sourceVendorId: vendorId, transceiverId, sourceVendorId: vendorId,
price, currency: currency || "USD", price, currency: currency || "USD",
stockLevel: "unknown", stockLevel: "unknown",
url: href.startsWith("http") ? href : `${BASE}${href}`, url: href.startsWith("http") ? href : `${BASE}${href}`,

View File

@ -65,12 +65,11 @@ async function scrapeCategory(path: string, form_factor: string, vendorId: strin
if (price <= 0) continue; if (price <= 0) continue;
try { try {
const t = await findOrCreateScrapedTransceiver({ const transceiverId = await findOrCreateScrapedTransceiver({
partNumber, vendorId, formFactor: form_factor, name, partNumber, vendorId, formFactor: form_factor,
url: href.startsWith("http") ? href : `${BASE}${href}`,
}); });
await upsertPriceObservation({ await upsertPriceObservation({
transceiverId: t.id, sourceVendorId: vendorId, transceiverId, sourceVendorId: vendorId,
price, currency: currency || "USD", price, currency: currency || "USD",
stockLevel: "unknown", stockLevel: "unknown",
url: href.startsWith("http") ? href : `${BASE}${href}`, url: href.startsWith("http") ? href : `${BASE}${href}`,

View File

@ -62,15 +62,13 @@ async function fetchPage(catUrl: string, form_factor: string, vendorId: string,
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`; const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
try { try {
const t = await findOrCreateScrapedTransceiver({ const transceiverId = await findOrCreateScrapedTransceiver({
partNumber, partNumber,
vendorId, vendorId,
formFactor: form_factor, formFactor: form_factor,
name,
url: productUrl,
}); });
await upsertPriceObservation({ await upsertPriceObservation({
transceiverId: t.id, transceiverId,
sourceVendorId: vendorId, sourceVendorId: vendorId,
price, price,
currency: currency || "USD", currency: currency || "USD",