From 199f36be487bdff310dfbb833da43e17b8e3a6b3 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Wed, 29 Apr 2026 16:14:25 +0200 Subject: [PATCH] fix(scraper): auto-create pg-boss queues before scheduling + worker/schedule order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - scheduler: patch boss.schedule() to call createQueue() first (idempotent), fixing FK constraint errors after DB reset — no need to touch 277 call sites - index: registerWorkers() before registerSchedules() since boss.work() must register handlers before schedules fire - dashboard: fix switchBlogLlm() to use api() helper (adds Bearer auth token) instead of raw fetch() which was returning 401 Unauthorized --- packages/dashboard/index.html | 3 +-- packages/scraper/src/index.ts | 11 +++++++++-- packages/scraper/src/scheduler.ts | 12 ++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index e583d9a..f26f5b4 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -5644,12 +5644,11 @@ async function switchBlogLlm(providerKey, model) { try { var body = { provider: providerKey }; if (model) body.model = model; - var res = await fetch(API + '/api/blog/llm/switch', { + var data = await api('/api/blog/llm/switch', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body) }); - var data = await res.json(); if (data.success) { if (msgEl) { msgEl.style.background = '#d1fae5'; diff --git a/packages/scraper/src/index.ts b/packages/scraper/src/index.ts index aec908a..bba4984 100644 --- a/packages/scraper/src/index.ts +++ b/packages/scraper/src/index.ts @@ -32,6 +32,7 @@ * tsx src/index.ts --addon — Run AddOn Networks scraper once * tsx src/index.ts --fiber24 — Run ShopFiber24 scraper once (sitemap-based) * tsx src/index.ts --fibermall — Run FiberMall scraper once + * tsx src/index.ts --backfill-images — Fill missing transceiver product photos */ import { createScheduler, registerSchedules, registerWorkers } from "./scheduler"; import { scrapeFs } from "./scrapers/fs-com"; @@ -156,6 +157,10 @@ async function runOnce(): Promise { if (args.includes("--fibermall") || isAll || isFetchOnly) { await scrapeFiberMall(); } + if (args.includes("--backfill-images")) { + const { backfillImages } = await import("./utils/backfill-images"); + await backfillImages(); + } // Playwright-based scrapers (need Chromium installed) if (!isFetchOnly) { @@ -218,8 +223,10 @@ async function runScheduler(): Promise { console.warn("Startup zombie cleanup failed (non-fatal):", (err as Error).message); } - await registerSchedules(boss); + // Workers must register before schedules — boss.work() auto-creates queues, + // boss.schedule() requires the queue to already exist (pg-boss v10 FK constraint) await registerWorkers(boss); + await registerSchedules(boss); console.log("\nScheduler running. Press Ctrl+C to stop.\n"); @@ -235,7 +242,7 @@ async function runScheduler(): Promise { process.on("SIGTERM", shutdown); } -const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--naddod", "--qsfptek", "--addon", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics", "--fiber24", "--fibermall"]; +const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--naddod", "--qsfptek", "--addon", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics", "--fiber24", "--fibermall", "--backfill-images"]; if (args.some((a) => ALL_FLAGS.includes(a))) { runOnce().catch((err) => { diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index a4bb284..927fd9c 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -63,6 +63,18 @@ export async function createScheduler(): Promise { } export async function registerSchedules(boss: PgBoss): Promise { + // pg-boss v10: boss.schedule() requires the queue to already exist in pgboss.queue. + // After a DB reset (e.g. server outage), all queue rows are wiped. + // Patch boss.schedule to auto-create queues idempotently before each schedule call, + // so the 277 individual schedule() calls below don't need to be touched. + const _origSchedule = boss.schedule.bind(boss) as typeof boss.schedule; + (boss as unknown as Record).schedule = async ( + name: string, cron: string, data?: unknown, opts?: unknown, + ) => { + await boss.createQueue(name).catch(() => { /* already exists */ }); + return _origSchedule(name, cron, data as object, opts as object); + }; + const queues = [ // ── Playwright scrapers (Erik, every 8h) ─────────────────────────── "scrape:pricing:fs",