From 39a63e04016a84d6ab2c79c5baa959306e97790a Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Tue, 28 Apr 2026 23:59:00 +0200 Subject: [PATCH] fix(scheduler): vendor discovery crawlers daily 24/7 (not weekly) --- .../crawler-llm/vendor-discovery-crawler.ts | 1 + packages/scraper/src/scheduler.ts | 20 +++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/packages/scraper/src/crawler-llm/vendor-discovery-crawler.ts b/packages/scraper/src/crawler-llm/vendor-discovery-crawler.ts index 71b9de5..f307e25 100644 --- a/packages/scraper/src/crawler-llm/vendor-discovery-crawler.ts +++ b/packages/scraper/src/crawler-llm/vendor-discovery-crawler.ts @@ -17,6 +17,7 @@ * tsx packages/scraper/src/crawler-llm/vendor-discovery-crawler.ts * * Or import and call discoverVendorCatalog() from the scheduler. + * Scheduler: 8 vendors daily, 3h stagger (20:00/22:00/00:00/02:00/04:00/06:00/08:00/10:00 UTC). */ import { PlaywrightCrawler, RequestQueue, Configuration, type Log } from "crawlee"; diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index c75a3e9..a4bb284 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -442,19 +442,19 @@ export async function registerSchedules(boss: PgBoss): Promise { await boss.schedule("scrape:catalog:avaya-legacy-oem", "15 6 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ - // VENDOR DISCOVERY CRAWLERS — weekly (deep crawl, Playwright, TIPLLM training) + // VENDOR DISCOVERY CRAWLERS — daily, permanent 24/7 rotation // Each run: crawls catalog → LLM extract → spec validate → DB + Gitea SFT - // Staggered across Sun/Mon nights (low-traffic window, 2h expiry each) + // 8 vendors × 3h stagger = full rotation every 24h, no overlap // ══════════════════════════════════════════════════════════════════════ - await boss.schedule("discover:vendor:cisco-tmg", "0 20 * * 0", {}, { retryLimit: 1, expireInSeconds: 7200 }); - await boss.schedule("discover:vendor:juniper", "0 22 * * 0", {}, { retryLimit: 1, expireInSeconds: 7200 }); - await boss.schedule("discover:vendor:arista", "0 0 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); - await boss.schedule("discover:vendor:fs-com", "0 2 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); - await boss.schedule("discover:vendor:flexoptix", "0 4 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); - await boss.schedule("discover:vendor:nokia", "0 6 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); - await boss.schedule("discover:vendor:huawei", "0 8 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); - await boss.schedule("discover:vendor:ii-vi", "0 10 * * 1", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:cisco-tmg", "0 20 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:juniper", "0 22 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:arista", "0 0 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:fs-com", "0 2 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:flexoptix", "0 4 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:nokia", "0 6 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:huawei", "0 8 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); + await boss.schedule("discover:vendor:ii-vi", "0 10 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); // ══════════════════════════════════════════════════════════════════════ // VENDOR LISTS — every 12h