fix(scheduler): vendor discovery crawlers daily 24/7 (not weekly)

This commit is contained in:
Rene Fichtmueller 2026-04-28 23:59:00 +02:00
parent 297dc46f2b
commit 39a63e0401
2 changed files with 11 additions and 10 deletions

View File

@ -17,6 +17,7 @@
* tsx packages/scraper/src/crawler-llm/vendor-discovery-crawler.ts * tsx packages/scraper/src/crawler-llm/vendor-discovery-crawler.ts
* *
* Or import and call discoverVendorCatalog() from the scheduler. * Or import and call discoverVendorCatalog() from the scheduler.
* Scheduler: 8 vendors daily, 3h stagger (20:00/22:00/00:00/02:00/04:00/06:00/08:00/10:00 UTC).
*/ */
import { PlaywrightCrawler, RequestQueue, Configuration, type Log } from "crawlee"; import { PlaywrightCrawler, RequestQueue, Configuration, type Log } from "crawlee";

View File

@ -442,19 +442,19 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
await boss.schedule("scrape:catalog:avaya-legacy-oem", "15 6 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:avaya-legacy-oem", "15 6 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
// VENDOR DISCOVERY CRAWLERS — weekly (deep crawl, Playwright, TIPLLM training) // VENDOR DISCOVERY CRAWLERS — daily, permanent 24/7 rotation
// Each run: crawls catalog → LLM extract → spec validate → DB + Gitea SFT // Each run: crawls catalog → LLM extract → spec validate → DB + Gitea SFT
// Staggered across Sun/Mon nights (low-traffic window, 2h expiry each) // 8 vendors × 3h stagger = full rotation every 24h, no overlap
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
await boss.schedule("discover:vendor:cisco-tmg", "0 20 * * 0", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:cisco-tmg", "0 20 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
await boss.schedule("discover:vendor:juniper", "0 22 * * 0", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:juniper", "0 22 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
await boss.schedule("discover:vendor:arista", "0 0 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:arista", "0 0 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
await boss.schedule("discover:vendor:fs-com", "0 2 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:fs-com", "0 2 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
await boss.schedule("discover:vendor:flexoptix", "0 4 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:flexoptix", "0 4 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
await boss.schedule("discover:vendor:nokia", "0 6 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:nokia", "0 6 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
await boss.schedule("discover:vendor:huawei", "0 8 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:huawei", "0 8 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
await boss.schedule("discover:vendor:ii-vi", "0 10 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); await boss.schedule("discover:vendor:ii-vi", "0 10 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
// VENDOR LISTS — every 12h // VENDOR LISTS — every 12h