fix: improve scraper health monitor — tiered alerts, suppress stable-price false positives

Previous logic fired an alert whenever prices_6h=0, even when prices
were genuinely stable (content hash dedup prevents duplicate inserts).
This caused Flexoptix, ATGBICS and others to trigger alerts every 3h
despite their scrapers running successfully.

New logic:
  🔴 CRITICAL: last price > 7 days (genuine failure)
  🟡 WARNING:  last price 48h–7 days (possibly stale)
   STABLE:   last price ≤48h, 0 new (prices unchanged, scraper OK)

Also shows pg-boss job state/time alongside each vendor for faster
root-cause diagnosis. Trimmed EXPECTED_VENDORS to vendors with actual
scraper implementations (removed never-scraped placeholders).
This commit is contained in:
Rene Fichtmueller 2026-04-18 02:54:28 +02:00
parent 4d94aa20ba
commit 84eb6e3149

View File

@ -685,15 +685,25 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await boss.work("monitor:scraper-health", async () => {
const { pool } = await import("./utils/db");
// Vendors we expect to see prices from regularly
const EXPECTED_VENDORS = [
"FiberMall", "QSFPTEK", "Flexoptix", "FS.COM", "10Gtek",
"ATGBICS", "GBICS", "BlueOptics", "ShopFiber24", "T&S Communication",
"Fluxlight", "Optcore", "Champion ONE", "SFPcables",
"Vcelink", "OpticsBay",
// Vendors we expect to see prices from regularly.
// Mapped: display name → pg-boss job name prefix (for last-run lookup).
const EXPECTED_VENDORS: Array<{ name: string; jobName: string }> = [
{ name: "FiberMall", jobName: "scrape:pricing:fibermall" },
{ name: "QSFPTEK", jobName: "scrape:pricing:qsfptek" },
{ name: "Flexoptix", jobName: "scrape:pricing:flexoptix" },
{ name: "FS.COM", jobName: "scrape:pricing:fs" },
{ name: "10Gtek", jobName: "scrape:pricing:10gtek" },
{ name: "ATGBICS", jobName: "scrape:pricing:atgbics" },
{ name: "GBICS", jobName: "scrape:pricing:gbics" },
{ name: "SFPcables", jobName: "scrape:pricing:sfpcables" },
{ name: "NADDOD", jobName: "scrape:pricing:naddod" },
];
const result = await pool.query(`
const vendorNames = EXPECTED_VENDORS.map((v) => v.name);
const jobNames = EXPECTED_VENDORS.map((v) => v.jobName);
// Price observation recency per vendor
const priceResult = await pool.query(`
SELECT v.name,
SUM(CASE WHEN po.time > NOW() - INTERVAL '6 hours' THEN 1 ELSE 0 END) AS prices_6h,
MAX(po.time) AS last_seen,
@ -703,27 +713,76 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
WHERE v.name = ANY($1)
GROUP BY v.name
ORDER BY last_seen ASC NULLS FIRST
`, [EXPECTED_VENDORS]);
`, [vendorNames]);
const problems: string[] = [];
for (const row of result.rows) {
// Last successful pg-boss job per vendor scraper (within last 12h)
const jobResult = await pool.query(`
SELECT DISTINCT ON (name) name, state, completed_on
FROM pgboss.job
WHERE name = ANY($1)
AND created_on > NOW() - INTERVAL '12 hours'
ORDER BY name, created_on DESC
`, [jobNames]);
const jobMap = new Map<string, { state: string; completed_on: Date | null }>();
for (const row of jobResult.rows) {
jobMap.set(row.name as string, { state: row.state as string, completed_on: row.completed_on as Date | null });
}
// Thresholds for alerting:
// CRITICAL (🔴): last price > 168h (7 days) — genuinely broken
// WARNING (🟡): last price > 48h (2 days) — possibly stale
// STABLE (✅): 0 new prices but last price ≤48h — prices unchanged, scraper OK
const CRITICAL_HOURS = 168;
const WARN_HOURS = 48;
const critical: string[] = [];
const warnings: string[] = [];
const stable: string[] = [];
for (const row of priceResult.rows) {
const h = parseFloat(row.hours_since ?? "9999");
const n = parseInt(row.prices_6h ?? "0", 10);
if (n === 0) {
if (n > 0) continue; // new prices written → healthy
const lastStr = row.last_seen
? `last seen ${h.toFixed(1)}h ago (${new Date(row.last_seen).toISOString().slice(0, 16)})`
? `last price ${h.toFixed(1)}h ago (${new Date(row.last_seen as string).toISOString().slice(0, 16)})`
: "NEVER scraped";
problems.push(`${row.name}: 0 prices in last 6h — ${lastStr}`);
const vendor = EXPECTED_VENDORS.find((v) => v.name === row.name);
const jobInfo = vendor ? jobMap.get(vendor.jobName) : undefined;
const jobStr = jobInfo
? ` | job=${jobInfo.state} at ${jobInfo.completed_on ? new Date(jobInfo.completed_on).toISOString().slice(11, 16) : "?"}`
: " | job=not run in 12h";
if (!row.last_seen || h > CRITICAL_HOURS) {
critical.push(`🔴 ${row.name}: ${lastStr}${jobStr}`);
} else if (h > WARN_HOURS) {
warnings.push(`🟡 ${row.name}: ${lastStr}${jobStr}`);
} else {
stable.push(`${row.name}: prices stable (${h.toFixed(1)}h unchanged)${jobStr}`);
}
}
if (problems.length > 0) {
console.error("=== SCRAPER HEALTH ALERT ===");
for (const p of problems) console.error(p);
console.error("=== Check pm2 logs tip-scraper-daemon ===");
if (critical.length > 0 || warnings.length > 0) {
if (critical.length > 0) {
console.error("=== 🔴 SCRAPER CRITICAL — vendors with no prices for 7+ days ===");
for (const p of critical) console.error(p);
}
if (warnings.length > 0) {
console.warn("=== 🟡 SCRAPER WARNING — vendors with stale prices (48h+) ===");
for (const p of warnings) console.warn(p);
}
console.error("=== Check: pm2 logs tip-scraper-daemon ===");
} else {
const activeCount = EXPECTED_VENDORS.length - stable.length;
if (stable.length > 0) {
console.log(`[monitor] Scraper health OK — ${activeCount} vendors active, ${stable.length} stable (no price changes)`);
for (const s of stable) console.log(` ${s}`);
} else {
console.log(`[monitor] Scraper health OK — all ${EXPECTED_VENDORS.length} vendors active in last 6h`);
}
}
});
// ── Verification reconciliation ─────────────────────────────────────────