fix: eBay enricher Crawlee isolation + ephemeral queues

- Add makeCrawleeConfig isolation to CheerioCrawler instances
- Switch from named persistent RequestQueue to ephemeral null queues:
  named queues retain 'handled' state and skip all URLs on re-runs,
  causing 0 observations on every run after the first.
- Applies to both enrichSwitchFromEbay and enrichTransceiversFromEbay.
This commit is contained in:
Rene Fichtmueller 2026-04-18 01:42:08 +02:00
parent 35a02057f4
commit 24481b09e6

View File

@ -11,6 +11,7 @@
*/ */
import { CheerioCrawler, RequestQueue } from "crawlee"; import { CheerioCrawler, RequestQueue } from "crawlee";
import { makeCrawleeConfig } from "../utils/crawlee-config";
import { scrapeWithLLM } from "../crawler-llm/core"; import { scrapeWithLLM } from "../crawler-llm/core";
import { db } from "../utils/db"; import { db } from "../utils/db";
import { logger } from "../utils/logger"; import { logger } from "../utils/logger";
@ -109,7 +110,10 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
imageUrl: null, imageUrl: null,
}; };
const queue = await RequestQueue.open(`ebay-${switchId.substring(0, 8)}`); // Use ephemeral queue (null) so every run starts fresh — named queues
// retain "handled" state and skip already-processed URLs on re-runs.
const ebayConfig = makeCrawleeConfig("ebay-switches");
const queue = await RequestQueue.open(null, { config: ebayConfig });
await queue.addRequest({ url: buildSearchUrl(model), userData: { model, phase: "search" } }); await queue.addRequest({ url: buildSearchUrl(model), userData: { model, phase: "search" } });
const crawler = new CheerioCrawler({ const crawler = new CheerioCrawler({
@ -230,7 +234,7 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
failedRequestHandler: ({ request, error }) => { failedRequestHandler: ({ request, error }) => {
logger.warn(`eBay enricher failed for ${request.url}: ${error}`); logger.warn(`eBay enricher failed for ${request.url}: ${error}`);
}, },
}); }, ebayConfig);
try { try {
await crawler.run(); await crawler.run();
@ -378,7 +382,9 @@ export async function enrichTransceiversFromEbay(limit = 50): Promise<void> {
logger.info(`eBay transceiver enricher: processing ${transceivers.rows.length} transceivers`); logger.info(`eBay transceiver enricher: processing ${transceivers.rows.length} transceivers`);
const queue = await RequestQueue.open("ebay-transceivers"); // Ephemeral queue: every run starts fresh (named queues skip already-handled URLs)
const crawleeConfig = makeCrawleeConfig("ebay-transceivers");
const queue = await RequestQueue.open(null, { config: crawleeConfig });
for (const tcvr of transceivers.rows) { for (const tcvr of transceivers.rows) {
const query = tcvr.part_number || `${tcvr.form_factor} ${tcvr.speed_gbps}G transceiver`; const query = tcvr.part_number || `${tcvr.form_factor} ${tcvr.speed_gbps}G transceiver`;
@ -427,7 +433,7 @@ export async function enrichTransceiversFromEbay(limit = 50): Promise<void> {
// Best new price // Best new price
if (newItems[0]) await insertObs(newItems[0], "new"); if (newItems[0]) await insertObs(newItems[0], "new");
}, },
}); }, crawleeConfig);
try { try {
await crawler.run(); await crawler.run();