From 2a6ec90ecdeec7a5dd0b7e8191b5614d6052cb64 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 18 Apr 2026 03:52:49 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20fs-com=20Phase=201+2=20crawler.run()=20E?= =?UTF-8?q?NOENT=20guard=20=E2=80=94=20Crawlee=20catches=20and=20re-throws?= =?UTF-8?q?=20the=20post-run=20=5FisTaskReadyFunction=20ENOENT=20internall?= =?UTF-8?q?y,=20which=20rejected=20crawler.run()=20and=20aborted=20Phase?= =?UTF-8?q?=202=20before=20it=20could=20start.=20Wrap=20both=20crawler.run?= =?UTF-8?q?()=20calls=20in=20try/catch=20to=20swallow=20ENOENT=20from=20re?= =?UTF-8?q?quest=5Fqueues=20paths;=20all=20processing=20is=20already=20com?= =?UTF-8?q?plete=20at=20this=20point.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG_PENDING.md | 1 + packages/scraper/src/scrapers/fs-com.ts | 26 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 0aa5e2f..99d09a0 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -3,6 +3,7 @@ Format: `{"d":"YYYY-MM-DD","t":"TYPE","m":"Description"}` Types: FEAT · FIX · UI · DATA · AI · INFRA +{"d":"2026-04-18","t":"FIX","m":"PM2 SKIP_FS_SCRAPER env not picked up by tip-scraper-daemon: pm2 restart --update-env did not apply new ecosystem.config.js vars because PM2 loaded from its saved dump. Fixed: pm2 delete + pm2 start ecosystem.config.js --only tip-scraper-daemon + pm2 save. Daemon restarted fresh (ID 83, 0 restarts) with SKIP_FS_SCRAPER=true now confirmed live. FS.com job now correctly skips on Erik instead of failing with ENOENT."} {"d":"2026-04-18","t":"FIX","m":"FS.com Mac scraper: suppress Crawlee post-run ENOENT unhandledRejection — Crawlee's FileSystemStorage fires a final _isTaskReadyFunction call after run() resolves, reading a request .json that was already processed/cleaned-up. This ENOENT triggered process.exit(1) before Phase 2 completed, causing 7 days of missing FS.com price data. Fixed: targeted unhandledRejection handler in require.main block swallows ENOENT from request_queues paths while re-raising real errors."} {"d":"2026-04-18","t":"FIX","m":"FS.com Mac scraper: PID lock (/tmp/tip-fs-scraper.lock) added to run-fs-scraper-mac.sh — prevents concurrent instances when launchd 2am fire overlaps with a still-running earlier run. Previous concurrent instances caused rmSync(storage-fs-phase1) race (one instance deletes the storage dir while another is using it), crashing Phase 2."} {"d":"2026-04-18","t":"FIX","m":"Scraper health monitor: tiered alerts replacing false-positive 6h threshold. Old: fired every 3h for any vendor with 0 new prices (including stable prices). New: 🔴 CRITICAL (last price >7 days), 🟡 WARNING (last price 48h-7 days), ✅ STABLE (0 new prices but last price ≤48h — content hash dedup, scraper running OK). Shows pg-boss job state+time for faster root-cause."} diff --git a/packages/scraper/src/scrapers/fs-com.ts b/packages/scraper/src/scrapers/fs-com.ts index 76c43ea..235489b 100644 --- a/packages/scraper/src/scrapers/fs-com.ts +++ b/packages/scraper/src/scrapers/fs-com.ts @@ -354,7 +354,20 @@ async function collectProductUrls( }, }, makeCrawleeConfig("fs-phase1")); - await crawler.run(listingRequests); + try { + await crawler.run(listingRequests); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes("ENOENT") && msg.includes("request_queues")) { + // Benign Crawlee post-run lock-file race: _isTaskReadyFunction reads a + // request .json that was already cleaned up after the crawl finished. + // Crawlee catches + re-throws it internally, which rejects crawler.run(). + // Safe to ignore — all pages were already processed. + console.warn("[Phase 1] Crawlee post-run ENOENT (benign, ignoring)"); + } else { + throw err; + } + } console.log(`[Phase 1] ${products.size} unique products across ${CATEGORY_URLS.length} categories`); return products; } @@ -616,7 +629,16 @@ async function scrapeProductDetails( }, }, makeCrawleeConfig("fs-phase2")); - await crawler.run(requests); + try { + await crawler.run(requests); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes("ENOENT") && msg.includes("request_queues")) { + console.warn("[Phase 2] Crawlee post-run ENOENT (benign, ignoring)"); + } else { + throw err; + } + } return details; }