fix(scraper): set CRAWLEE_PURGE_ON_START=1 in withIsolatedStorage
Crawlee's SessionPool throws 'Could not find SDK_SESSION_POOL_STATE.json' when initializing against a freshly-created isolated storage dir. Setting CRAWLEE_PURGE_ON_START=1 tells Crawlee to start fresh instead of trying to load non-existent session state — fixes FS.com and ATGBICS crashes at the start of every 2h cycle after the dirs were cleaned up.
This commit is contained in:
parent
a3af873206
commit
832f163be0
@ -31,11 +31,16 @@ async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promi
|
||||
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
|
||||
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
|
||||
const prev = process.env.CRAWLEE_STORAGE_DIR;
|
||||
const prevPurge = process.env.CRAWLEE_PURGE_ON_START;
|
||||
process.env.CRAWLEE_STORAGE_DIR = dir;
|
||||
// Force Crawlee to initialize fresh — prevents "Could not find SDK_SESSION_POOL_STATE.json"
|
||||
// when the isolated storage dir was just created and has no pre-existing state files.
|
||||
process.env.CRAWLEE_PURGE_ON_START = "1";
|
||||
try {
|
||||
await fn();
|
||||
} finally {
|
||||
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
|
||||
process.env.CRAWLEE_PURGE_ON_START = prevPurge ?? "";
|
||||
// Clean up after successful run
|
||||
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user