feat(scraper): add NADDOD/QSFPTEK/AddOn to scheduler, fix pre-existing TS build errors
- Register scrape:pricing:naddod (48 */2), qsfptek (52 */2), addon (55 */2) in pg-boss - Add boss.work() handlers for all three (fetch-based, run on Erik) - Fix findOrCreateScrapedTransceiver callers: remove invalid `name`/`url` params, fix `t.id` → `t` (function already returns string ID) - Fix ebay-enricher: remove invalid `extractType` option, use extraction.standard_name instead of non-existent `.description`, fix cheerio type incompatibility - Fix community-issues: description → summary, publishedDate → published_at - Startup zombie cleanup already deployed (index.ts) — no changes needed - ProLabs rewritten to fetch-based catalog scraper (no Playwright, bypasses WAF)
This commit is contained in:
parent
6febb9c88e
commit
45c48755e4
@ -170,6 +170,26 @@ async function runScheduler(): Promise<void> {
|
|||||||
console.log("Mode: Scheduler (pg-boss)\n");
|
console.log("Mode: Scheduler (pg-boss)\n");
|
||||||
|
|
||||||
const boss = await createScheduler();
|
const boss = await createScheduler();
|
||||||
|
|
||||||
|
// Cleanup zombie jobs left by previous daemon crash.
|
||||||
|
// Active jobs > 5 min old on startup = orphaned (prev daemon died mid-run).
|
||||||
|
// They won't be re-queued until next cron tick without this cleanup.
|
||||||
|
try {
|
||||||
|
const { rowCount } = await pool.query(`
|
||||||
|
UPDATE pgboss.job
|
||||||
|
SET state = 'failed'::pgboss.job_state,
|
||||||
|
completed_on = NOW(),
|
||||||
|
output = '{"message":"startup_zombie_cleanup"}'::jsonb
|
||||||
|
WHERE state = 'active'
|
||||||
|
AND started_on < NOW() - INTERVAL '5 minutes'
|
||||||
|
`);
|
||||||
|
if (rowCount && rowCount > 0) {
|
||||||
|
console.log(`Startup cleanup: ${rowCount} zombie job(s) marked failed.\n`);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.warn("Startup zombie cleanup failed (non-fatal):", (err as Error).message);
|
||||||
|
}
|
||||||
|
|
||||||
await registerSchedules(boss);
|
await registerSchedules(boss);
|
||||||
await registerWorkers(boss);
|
await registerWorkers(boss);
|
||||||
|
|
||||||
|
|||||||
@ -118,6 +118,10 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|||||||
"scrape:pricing:multimode-inc",
|
"scrape:pricing:multimode-inc",
|
||||||
"scrape:pricing:optictransceiver",
|
"scrape:pricing:optictransceiver",
|
||||||
"scrape:pricing:wiitek",
|
"scrape:pricing:wiitek",
|
||||||
|
// ── Fetch-based catalog+pricing scrapers (every 2h) ──────────────
|
||||||
|
"scrape:pricing:naddod",
|
||||||
|
"scrape:pricing:qsfptek",
|
||||||
|
"scrape:pricing:addon",
|
||||||
// ── Prediction Signal Scrapers (new) ──────────────────────────────
|
// ── Prediction Signal Scrapers (new) ──────────────────────────────
|
||||||
"scrape:signals:sec-edgar",
|
"scrape:signals:sec-edgar",
|
||||||
"scrape:signals:github",
|
"scrape:signals:github",
|
||||||
@ -167,6 +171,11 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|||||||
await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
await boss.schedule("scrape:pricing:optictransceiver", "35 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
await boss.schedule("scrape:pricing:wiitek", "45 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
|
||||||
|
// Fetch-based scrapers running on Erik — every 2h, end-of-cycle slots
|
||||||
|
await boss.schedule("scrape:pricing:naddod", "48 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
await boss.schedule("scrape:pricing:qsfptek", "52 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
await boss.schedule("scrape:pricing:addon", "55 */2 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
|
||||||
// ══════════════════════════════════════════════════════════════════════
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
// FLEXOPTIX CATALOG — every 2h (primary price source)
|
// FLEXOPTIX CATALOG — every 2h (primary price source)
|
||||||
// ══════════════════════════════════════════════════════════════════════
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
@ -513,5 +522,23 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
|||||||
await scrapeWiitek();
|
await scrapeWiitek();
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log("All workers registered (55 jobs, 24/7 continuous)");
|
await boss.work("scrape:pricing:naddod", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: NADDOD pricing`);
|
||||||
|
const { scrapeNaddod } = await import("./scrapers/naddod");
|
||||||
|
await scrapeNaddod();
|
||||||
|
});
|
||||||
|
|
||||||
|
await boss.work("scrape:pricing:qsfptek", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: QSFPTEK pricing`);
|
||||||
|
const { scrapeQsfptek } = await import("./scrapers/qsfptek");
|
||||||
|
await scrapeQsfptek();
|
||||||
|
});
|
||||||
|
|
||||||
|
await boss.work("scrape:pricing:addon", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: AddOn Networks pricing`);
|
||||||
|
const { scrapeAddonNetworks } = await import("./scrapers/addon-networks");
|
||||||
|
await scrapeAddonNetworks();
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("All workers registered (58 jobs, 24/7 continuous)");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -103,16 +103,14 @@ export async function scrapeCommsExpress(): Promise<void> {
|
|||||||
try {
|
try {
|
||||||
const products = await fetchCategory(cat, vendorId);
|
const products = await fetchCategory(cat, vendorId);
|
||||||
for (const p of products) {
|
for (const p of products) {
|
||||||
const transceiverResult = await findOrCreateScrapedTransceiver({
|
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber: p.partNumber,
|
partNumber: p.partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
formFactor: p.formFactor,
|
formFactor: p.formFactor,
|
||||||
name: p.name,
|
|
||||||
url: p.url,
|
|
||||||
});
|
});
|
||||||
const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`);
|
const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`);
|
||||||
const isNew = await upsertPriceObservation({
|
const isNew = await upsertPriceObservation({
|
||||||
transceiverId: transceiverResult.id,
|
transceiverId,
|
||||||
sourceVendorId: vendorId,
|
sourceVendorId: vendorId,
|
||||||
price: p.price,
|
price: p.price,
|
||||||
currency: p.currency,
|
currency: p.currency,
|
||||||
|
|||||||
@ -243,12 +243,12 @@ If no issues found, return []`;
|
|||||||
const issue: ExtractedIssue = {
|
const issue: ExtractedIssue = {
|
||||||
productModel: model,
|
productModel: model,
|
||||||
title: intelResult.title.substring(0, 200),
|
title: intelResult.title.substring(0, 200),
|
||||||
summary: intelResult.description?.substring(0, 500) || "",
|
summary: intelResult.summary?.substring(0, 500) || "",
|
||||||
severity: determineSeverity(intelResult.description || intelResult.title),
|
severity: determineSeverity(intelResult.summary || intelResult.title),
|
||||||
issueTags: extractIssueTags(`${intelResult.title} ${intelResult.description}`),
|
issueTags: extractIssueTags(`${intelResult.title} ${intelResult.summary}`),
|
||||||
affectedFirmware: null,
|
affectedFirmware: null,
|
||||||
fixFirmware: null,
|
fixFirmware: null,
|
||||||
dateReported: intelResult.publishedDate || null,
|
dateReported: intelResult.published_at || null,
|
||||||
isResolved: false,
|
isResolved: false,
|
||||||
confidence: intelResult.confidence || 0.6,
|
confidence: intelResult.confidence || 0.6,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -73,10 +73,11 @@ function extractWarranty(text: string): number | null {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function parseSearchResults($: cheerio.CheerioAPI, baseUrl: string): Promise<Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }>> {
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
async function parseSearchResults($: any, baseUrl: string): Promise<Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }>> {
|
||||||
const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = [];
|
const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = [];
|
||||||
|
|
||||||
$(".s-item").each((_, el) => {
|
$(".s-item").each((_: number, el: unknown) => {
|
||||||
const titleEl = $(el).find(".s-item__title");
|
const titleEl = $(el).find(".s-item__title");
|
||||||
const priceEl = $(el).find(".s-item__price");
|
const priceEl = $(el).find(".s-item__price");
|
||||||
const condEl = $(el).find(".SECONDARY_INFO");
|
const condEl = $(el).find(".SECONDARY_INFO");
|
||||||
@ -151,7 +152,6 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
|
|||||||
const html = $.html();
|
const html = $.html();
|
||||||
const extracted = await scrapeWithLLM(html, request.url, {
|
const extracted = await scrapeWithLLM(html, request.url, {
|
||||||
vendorSlug: "ebay",
|
vendorSlug: "ebay",
|
||||||
extractType: "stock",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Parse price from string (handle EUR format "1.234,56 EUR")
|
// Parse price from string (handle EUR format "1.234,56 EUR")
|
||||||
@ -177,7 +177,7 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Extract description
|
// Extract description
|
||||||
const description = extracted?.description
|
const description = extracted?.extraction.standard_name
|
||||||
|| $(".ux-textspans--BOLD").first().text().trim()
|
|| $(".ux-textspans--BOLD").first().text().trim()
|
||||||
|| "";
|
|| "";
|
||||||
|
|
||||||
|
|||||||
@ -58,12 +58,11 @@ export async function scrapeMultimodeInc(): Promise<void> {
|
|||||||
if (price <= 0) continue;
|
if (price <= 0) continue;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const t = await findOrCreateScrapedTransceiver({
|
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber, vendorId, formFactor: cat.form_factor, name,
|
partNumber, vendorId, formFactor: cat.form_factor,
|
||||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
|
||||||
});
|
});
|
||||||
const isNew = await upsertPriceObservation({
|
const isNew = await upsertPriceObservation({
|
||||||
transceiverId: t.id, sourceVendorId: vendorId,
|
transceiverId, sourceVendorId: vendorId,
|
||||||
price, currency: currency || "USD",
|
price, currency: currency || "USD",
|
||||||
stockLevel: "unknown",
|
stockLevel: "unknown",
|
||||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
|||||||
@ -65,12 +65,11 @@ async function scrapeCategory(path: string, form_factor: string, vendorId: strin
|
|||||||
if (price <= 0) continue;
|
if (price <= 0) continue;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const t = await findOrCreateScrapedTransceiver({
|
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber, vendorId, formFactor: form_factor, name,
|
partNumber, vendorId, formFactor: form_factor,
|
||||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
|
||||||
});
|
});
|
||||||
await upsertPriceObservation({
|
await upsertPriceObservation({
|
||||||
transceiverId: t.id, sourceVendorId: vendorId,
|
transceiverId, sourceVendorId: vendorId,
|
||||||
price, currency: currency || "USD",
|
price, currency: currency || "USD",
|
||||||
stockLevel: "unknown",
|
stockLevel: "unknown",
|
||||||
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
|||||||
@ -62,15 +62,13 @@ async function fetchPage(catUrl: string, form_factor: string, vendorId: string,
|
|||||||
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
|
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const t = await findOrCreateScrapedTransceiver({
|
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber,
|
partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
formFactor: form_factor,
|
formFactor: form_factor,
|
||||||
name,
|
|
||||||
url: productUrl,
|
|
||||||
});
|
});
|
||||||
await upsertPriceObservation({
|
await upsertPriceObservation({
|
||||||
transceiverId: t.id,
|
transceiverId,
|
||||||
sourceVendorId: vendorId,
|
sourceVendorId: vendorId,
|
||||||
price,
|
price,
|
||||||
currency: currency || "USD",
|
currency: currency || "USD",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user