feat(scraper): add NADDOD/QSFPTEK/AddOn to scheduler, fix pre-existing TS build errors
- Register scrape:pricing:naddod (48 */2), qsfptek (52 */2), addon (55 */2) in pg-boss - Add boss.work() handlers for all three (fetch-based, run on Erik) - Fix findOrCreateScrapedTransceiver callers: remove invalid `name`/`url` params, fix `t.id` → `t` (function already returns string ID) - Fix ebay-enricher: remove invalid `extractType` option, use extraction.standard_name instead of non-existent `.description`, fix cheerio type incompatibility - Fix community-issues: description → summary, publishedDate → published_at - Startup zombie cleanup already deployed (index.ts) — no changes needed - ProLabs rewritten to fetch-based catalog scraper (no Playwright, bypasses WAF)
This commit is contained in:
parent
6febb9c88e
commit
45c48755e4
@ -170,6 +170,26 @@ async function runScheduler(): Promise<void> {
|
||||
console.log("Mode: Scheduler (pg-boss)\n");
|
||||
|
||||
const boss = await createScheduler();
|
||||
|
||||
// Cleanup zombie jobs left by previous daemon crash.
|
||||
// Active jobs > 5 min old on startup = orphaned (prev daemon died mid-run).
|
||||
// They won't be re-queued until next cron tick without this cleanup.
|
||||
try {
|
||||
const { rowCount } = await pool.query(`
|
||||
UPDATE pgboss.job
|
||||
SET state = 'failed'::pgboss.job_state,
|
||||
completed_on = NOW(),
|
||||
output = '{"message":"startup_zombie_cleanup"}'::jsonb
|
||||
WHERE state = 'active'
|
||||
AND started_on < NOW() - INTERVAL '5 minutes'
|
||||
`);
|
||||
if (rowCount && rowCount > 0) {
|
||||
console.log(`Startup cleanup: ${rowCount} zombie job(s) marked failed.\n`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn("Startup zombie cleanup failed (non-fatal):", (err as Error).message);
|
||||
}
|
||||
|
||||
await registerSchedules(boss);
|
||||
await registerWorkers(boss);
|
||||
|
||||
|
||||
@ -118,6 +118,10 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
||||
"scrape:pricing:multimode-inc",
|
||||
"scrape:pricing:optictransceiver",
|
||||
"scrape:pricing:wiitek",
|
||||
// ── Fetch-based catalog+pricing scrapers (every 2h) ──────────────
|
||||
"scrape:pricing:naddod",
|
||||
"scrape:pricing:qsfptek",
|
||||
"scrape:pricing:addon",
|
||||
// ── Prediction Signal Scrapers (new) ──────────────────────────────
|
||||
"scrape:signals:sec-edgar",
|
||||
"scrape:signals:github",
|
||||
@ -167,6 +171,11 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
||||
await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
|
||||
// Fetch-based scrapers running on Erik — every 2h, end-of-cycle slots
|
||||
await boss.schedule("scrape:pricing:naddod", "48 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:pricing:qsfptek", "52 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:pricing:addon", "55 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// FLEXOPTIX CATALOG — every 2h (primary price source)
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
@ -513,5 +522,23 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
await scrapeWiitek();
|
||||
});
|
||||
|
||||
console.log("All workers registered (55 jobs, 24/7 continuous)");
|
||||
await boss.work("scrape:pricing:naddod", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: NADDOD pricing`);
|
||||
const { scrapeNaddod } = await import("./scrapers/naddod");
|
||||
await scrapeNaddod();
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:qsfptek", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: QSFPTEK pricing`);
|
||||
const { scrapeQsfptek } = await import("./scrapers/qsfptek");
|
||||
await scrapeQsfptek();
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:addon", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: AddOn Networks pricing`);
|
||||
const { scrapeAddonNetworks } = await import("./scrapers/addon-networks");
|
||||
await scrapeAddonNetworks();
|
||||
});
|
||||
|
||||
console.log("All workers registered (58 jobs, 24/7 continuous)");
|
||||
}
|
||||
|
||||
@ -103,16 +103,14 @@ export async function scrapeCommsExpress(): Promise<void> {
|
||||
try {
|
||||
const products = await fetchCategory(cat, vendorId);
|
||||
for (const p of products) {
|
||||
const transceiverResult = await findOrCreateScrapedTransceiver({
|
||||
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: p.partNumber,
|
||||
vendorId,
|
||||
formFactor: p.formFactor,
|
||||
name: p.name,
|
||||
url: p.url,
|
||||
});
|
||||
const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`);
|
||||
const isNew = await upsertPriceObservation({
|
||||
transceiverId: transceiverResult.id,
|
||||
transceiverId,
|
||||
sourceVendorId: vendorId,
|
||||
price: p.price,
|
||||
currency: p.currency,
|
||||
|
||||
@ -243,12 +243,12 @@ If no issues found, return []`;
|
||||
const issue: ExtractedIssue = {
|
||||
productModel: model,
|
||||
title: intelResult.title.substring(0, 200),
|
||||
summary: intelResult.description?.substring(0, 500) || "",
|
||||
severity: determineSeverity(intelResult.description || intelResult.title),
|
||||
issueTags: extractIssueTags(`${intelResult.title} ${intelResult.description}`),
|
||||
summary: intelResult.summary?.substring(0, 500) || "",
|
||||
severity: determineSeverity(intelResult.summary || intelResult.title),
|
||||
issueTags: extractIssueTags(`${intelResult.title} ${intelResult.summary}`),
|
||||
affectedFirmware: null,
|
||||
fixFirmware: null,
|
||||
dateReported: intelResult.publishedDate || null,
|
||||
dateReported: intelResult.published_at || null,
|
||||
isResolved: false,
|
||||
confidence: intelResult.confidence || 0.6,
|
||||
};
|
||||
|
||||
@ -73,10 +73,11 @@ function extractWarranty(text: string): number | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function parseSearchResults($: cheerio.CheerioAPI, baseUrl: string): Promise<Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }>> {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
async function parseSearchResults($: any, baseUrl: string): Promise<Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }>> {
|
||||
const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = [];
|
||||
|
||||
$(".s-item").each((_, el) => {
|
||||
$(".s-item").each((_: number, el: unknown) => {
|
||||
const titleEl = $(el).find(".s-item__title");
|
||||
const priceEl = $(el).find(".s-item__price");
|
||||
const condEl = $(el).find(".SECONDARY_INFO");
|
||||
@ -151,7 +152,6 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
|
||||
const html = $.html();
|
||||
const extracted = await scrapeWithLLM(html, request.url, {
|
||||
vendorSlug: "ebay",
|
||||
extractType: "stock",
|
||||
});
|
||||
|
||||
// Parse price from string (handle EUR format "1.234,56 EUR")
|
||||
@ -177,7 +177,7 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
|
||||
});
|
||||
|
||||
// Extract description
|
||||
const description = extracted?.description
|
||||
const description = extracted?.extraction.standard_name
|
||||
|| $(".ux-textspans--BOLD").first().text().trim()
|
||||
|| "";
|
||||
|
||||
|
||||
@ -58,12 +58,11 @@ export async function scrapeMultimodeInc(): Promise<void> {
|
||||
if (price <= 0) continue;
|
||||
|
||||
try {
|
||||
const t = await findOrCreateScrapedTransceiver({
|
||||
partNumber, vendorId, formFactor: cat.form_factor, name,
|
||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||
partNumber, vendorId, formFactor: cat.form_factor,
|
||||
});
|
||||
const isNew = await upsertPriceObservation({
|
||||
transceiverId: t.id, sourceVendorId: vendorId,
|
||||
transceiverId, sourceVendorId: vendorId,
|
||||
price, currency: currency || "USD",
|
||||
stockLevel: "unknown",
|
||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||
|
||||
@ -65,12 +65,11 @@ async function scrapeCategory(path: string, form_factor: string, vendorId: strin
|
||||
if (price <= 0) continue;
|
||||
|
||||
try {
|
||||
const t = await findOrCreateScrapedTransceiver({
|
||||
partNumber, vendorId, formFactor: form_factor, name,
|
||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||
partNumber, vendorId, formFactor: form_factor,
|
||||
});
|
||||
await upsertPriceObservation({
|
||||
transceiverId: t.id, sourceVendorId: vendorId,
|
||||
transceiverId, sourceVendorId: vendorId,
|
||||
price, currency: currency || "USD",
|
||||
stockLevel: "unknown",
|
||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||
|
||||
@ -62,15 +62,13 @@ async function fetchPage(catUrl: string, form_factor: string, vendorId: string,
|
||||
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
|
||||
|
||||
try {
|
||||
const t = await findOrCreateScrapedTransceiver({
|
||||
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||
partNumber,
|
||||
vendorId,
|
||||
formFactor: form_factor,
|
||||
name,
|
||||
url: productUrl,
|
||||
});
|
||||
await upsertPriceObservation({
|
||||
transceiverId: t.id,
|
||||
transceiverId,
|
||||
sourceVendorId: vendorId,
|
||||
price,
|
||||
currency: currency || "USD",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user