fix(scraper): set CRAWLEE_PURGE_ON_START=1 in withIsolatedStorage
Crawlee's SessionPool throws 'Could not find SDK_SESSION_POOL_STATE.json' when initializing against a freshly-created isolated storage dir. Setting CRAWLEE_PURGE_ON_START=1 tells Crawlee to start fresh instead of trying to load non-existent session state — fixes FS.com and ATGBICS crashes at the start of every 2h cycle after the dirs were cleaned up.
This commit is contained in:
parent
45c48755e4
commit
148d2e1000
@ -31,11 +31,16 @@ async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promi
|
|||||||
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
|
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
|
||||||
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
|
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
|
||||||
const prev = process.env.CRAWLEE_STORAGE_DIR;
|
const prev = process.env.CRAWLEE_STORAGE_DIR;
|
||||||
|
const prevPurge = process.env.CRAWLEE_PURGE_ON_START;
|
||||||
process.env.CRAWLEE_STORAGE_DIR = dir;
|
process.env.CRAWLEE_STORAGE_DIR = dir;
|
||||||
|
// Force Crawlee to initialize fresh — prevents "Could not find SDK_SESSION_POOL_STATE.json"
|
||||||
|
// when the isolated storage dir was just created and has no pre-existing state files.
|
||||||
|
process.env.CRAWLEE_PURGE_ON_START = "1";
|
||||||
try {
|
try {
|
||||||
await fn();
|
await fn();
|
||||||
} finally {
|
} finally {
|
||||||
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
|
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
|
||||||
|
process.env.CRAWLEE_PURGE_ON_START = prevPurge ?? "";
|
||||||
// Clean up after successful run
|
// Clean up after successful run
|
||||||
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user