From 148d2e1000b73f564bbb054ea911a7060361ae3b Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 11 Apr 2026 07:27:24 +0200 Subject: [PATCH] fix(scraper): set CRAWLEE_PURGE_ON_START=1 in withIsolatedStorage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crawlee's SessionPool throws 'Could not find SDK_SESSION_POOL_STATE.json' when initializing against a freshly-created isolated storage dir. Setting CRAWLEE_PURGE_ON_START=1 tells Crawlee to start fresh instead of trying to load non-existent session state — fixes FS.com and ATGBICS crashes at the start of every 2h cycle after the dirs were cleaned up. --- packages/scraper/src/scheduler.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index ef2afcc..0fdafe5 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -31,11 +31,16 @@ async function withIsolatedStorage(name: string, fn: () => Promise): Promi mkdirSync(join(dir, "datasets", "default"), { recursive: true }); mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true }); const prev = process.env.CRAWLEE_STORAGE_DIR; + const prevPurge = process.env.CRAWLEE_PURGE_ON_START; process.env.CRAWLEE_STORAGE_DIR = dir; + // Force Crawlee to initialize fresh — prevents "Could not find SDK_SESSION_POOL_STATE.json" + // when the isolated storage dir was just created and has no pre-existing state files. + process.env.CRAWLEE_PURGE_ON_START = "1"; try { await fn(); } finally { process.env.CRAWLEE_STORAGE_DIR = prev ?? ""; + process.env.CRAWLEE_PURGE_ON_START = prevPurge ?? ""; // Clean up after successful run try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ } }