fix: fs-com Phase 1+2 crawler.run() ENOENT guard — Crawlee catches and re-throws the post-run _isTaskReadyFunction ENOENT internally, which rejected crawler.run() and aborted Phase 2 before it could start. Wrap both crawler.run() calls in try/catch to swallow ENOENT from request_queues paths; all processing is already complete at this point.
This commit is contained in:
parent
b3eff15fc4
commit
2a6ec90ecd
@ -3,6 +3,7 @@
|
|||||||
Format: `{"d":"YYYY-MM-DD","t":"TYPE","m":"Description"}`
|
Format: `{"d":"YYYY-MM-DD","t":"TYPE","m":"Description"}`
|
||||||
Types: FEAT · FIX · UI · DATA · AI · INFRA
|
Types: FEAT · FIX · UI · DATA · AI · INFRA
|
||||||
|
|
||||||
|
{"d":"2026-04-18","t":"FIX","m":"PM2 SKIP_FS_SCRAPER env not picked up by tip-scraper-daemon: pm2 restart --update-env did not apply new ecosystem.config.js vars because PM2 loaded from its saved dump. Fixed: pm2 delete + pm2 start ecosystem.config.js --only tip-scraper-daemon + pm2 save. Daemon restarted fresh (ID 83, 0 restarts) with SKIP_FS_SCRAPER=true now confirmed live. FS.com job now correctly skips on Erik instead of failing with ENOENT."}
|
||||||
{"d":"2026-04-18","t":"FIX","m":"FS.com Mac scraper: suppress Crawlee post-run ENOENT unhandledRejection — Crawlee's FileSystemStorage fires a final _isTaskReadyFunction call after run() resolves, reading a request .json that was already processed/cleaned-up. This ENOENT triggered process.exit(1) before Phase 2 completed, causing 7 days of missing FS.com price data. Fixed: targeted unhandledRejection handler in require.main block swallows ENOENT from request_queues paths while re-raising real errors."}
|
{"d":"2026-04-18","t":"FIX","m":"FS.com Mac scraper: suppress Crawlee post-run ENOENT unhandledRejection — Crawlee's FileSystemStorage fires a final _isTaskReadyFunction call after run() resolves, reading a request .json that was already processed/cleaned-up. This ENOENT triggered process.exit(1) before Phase 2 completed, causing 7 days of missing FS.com price data. Fixed: targeted unhandledRejection handler in require.main block swallows ENOENT from request_queues paths while re-raising real errors."}
|
||||||
{"d":"2026-04-18","t":"FIX","m":"FS.com Mac scraper: PID lock (/tmp/tip-fs-scraper.lock) added to run-fs-scraper-mac.sh — prevents concurrent instances when launchd 2am fire overlaps with a still-running earlier run. Previous concurrent instances caused rmSync(storage-fs-phase1) race (one instance deletes the storage dir while another is using it), crashing Phase 2."}
|
{"d":"2026-04-18","t":"FIX","m":"FS.com Mac scraper: PID lock (/tmp/tip-fs-scraper.lock) added to run-fs-scraper-mac.sh — prevents concurrent instances when launchd 2am fire overlaps with a still-running earlier run. Previous concurrent instances caused rmSync(storage-fs-phase1) race (one instance deletes the storage dir while another is using it), crashing Phase 2."}
|
||||||
{"d":"2026-04-18","t":"FIX","m":"Scraper health monitor: tiered alerts replacing false-positive 6h threshold. Old: fired every 3h for any vendor with 0 new prices (including stable prices). New: 🔴 CRITICAL (last price >7 days), 🟡 WARNING (last price 48h-7 days), ✅ STABLE (0 new prices but last price ≤48h — content hash dedup, scraper running OK). Shows pg-boss job state+time for faster root-cause."}
|
{"d":"2026-04-18","t":"FIX","m":"Scraper health monitor: tiered alerts replacing false-positive 6h threshold. Old: fired every 3h for any vendor with 0 new prices (including stable prices). New: 🔴 CRITICAL (last price >7 days), 🟡 WARNING (last price 48h-7 days), ✅ STABLE (0 new prices but last price ≤48h — content hash dedup, scraper running OK). Shows pg-boss job state+time for faster root-cause."}
|
||||||
|
|||||||
@ -354,7 +354,20 @@ async function collectProductUrls(
|
|||||||
},
|
},
|
||||||
}, makeCrawleeConfig("fs-phase1"));
|
}, makeCrawleeConfig("fs-phase1"));
|
||||||
|
|
||||||
await crawler.run(listingRequests);
|
try {
|
||||||
|
await crawler.run(listingRequests);
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
if (msg.includes("ENOENT") && msg.includes("request_queues")) {
|
||||||
|
// Benign Crawlee post-run lock-file race: _isTaskReadyFunction reads a
|
||||||
|
// request .json that was already cleaned up after the crawl finished.
|
||||||
|
// Crawlee catches + re-throws it internally, which rejects crawler.run().
|
||||||
|
// Safe to ignore — all pages were already processed.
|
||||||
|
console.warn("[Phase 1] Crawlee post-run ENOENT (benign, ignoring)");
|
||||||
|
} else {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
console.log(`[Phase 1] ${products.size} unique products across ${CATEGORY_URLS.length} categories`);
|
console.log(`[Phase 1] ${products.size} unique products across ${CATEGORY_URLS.length} categories`);
|
||||||
return products;
|
return products;
|
||||||
}
|
}
|
||||||
@ -616,7 +629,16 @@ async function scrapeProductDetails(
|
|||||||
},
|
},
|
||||||
}, makeCrawleeConfig("fs-phase2"));
|
}, makeCrawleeConfig("fs-phase2"));
|
||||||
|
|
||||||
await crawler.run(requests);
|
try {
|
||||||
|
await crawler.run(requests);
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
if (msg.includes("ENOENT") && msg.includes("request_queues")) {
|
||||||
|
console.warn("[Phase 2] Crawlee post-run ENOENT (benign, ignoring)");
|
||||||
|
} else {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
return details;
|
return details;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user