fix: eBay enricher Crawlee isolation + ephemeral queues
- Add makeCrawleeConfig isolation to CheerioCrawler instances - Switch from named persistent RequestQueue to ephemeral null queues: named queues retain 'handled' state and skip all URLs on re-runs, causing 0 observations on every run after the first. - Applies to both enrichSwitchFromEbay and enrichTransceiversFromEbay.
This commit is contained in:
parent
35a02057f4
commit
24481b09e6
@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
import { CheerioCrawler, RequestQueue } from "crawlee";
|
||||
import { makeCrawleeConfig } from "../utils/crawlee-config";
|
||||
import { scrapeWithLLM } from "../crawler-llm/core";
|
||||
import { db } from "../utils/db";
|
||||
import { logger } from "../utils/logger";
|
||||
@ -109,7 +110,10 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
|
||||
imageUrl: null,
|
||||
};
|
||||
|
||||
const queue = await RequestQueue.open(`ebay-${switchId.substring(0, 8)}`);
|
||||
// Use ephemeral queue (null) so every run starts fresh — named queues
|
||||
// retain "handled" state and skip already-processed URLs on re-runs.
|
||||
const ebayConfig = makeCrawleeConfig("ebay-switches");
|
||||
const queue = await RequestQueue.open(null, { config: ebayConfig });
|
||||
await queue.addRequest({ url: buildSearchUrl(model), userData: { model, phase: "search" } });
|
||||
|
||||
const crawler = new CheerioCrawler({
|
||||
@ -230,7 +234,7 @@ async function enrichSwitchFromEbay(switchId: string, model: string): Promise<En
|
||||
failedRequestHandler: ({ request, error }) => {
|
||||
logger.warn(`eBay enricher failed for ${request.url}: ${error}`);
|
||||
},
|
||||
});
|
||||
}, ebayConfig);
|
||||
|
||||
try {
|
||||
await crawler.run();
|
||||
@ -378,7 +382,9 @@ export async function enrichTransceiversFromEbay(limit = 50): Promise<void> {
|
||||
|
||||
logger.info(`eBay transceiver enricher: processing ${transceivers.rows.length} transceivers`);
|
||||
|
||||
const queue = await RequestQueue.open("ebay-transceivers");
|
||||
// Ephemeral queue: every run starts fresh (named queues skip already-handled URLs)
|
||||
const crawleeConfig = makeCrawleeConfig("ebay-transceivers");
|
||||
const queue = await RequestQueue.open(null, { config: crawleeConfig });
|
||||
|
||||
for (const tcvr of transceivers.rows) {
|
||||
const query = tcvr.part_number || `${tcvr.form_factor} ${tcvr.speed_gbps}G transceiver`;
|
||||
@ -427,7 +433,7 @@ export async function enrichTransceiversFromEbay(limit = 50): Promise<void> {
|
||||
// Best new price
|
||||
if (newItems[0]) await insertObs(newItems[0], "new");
|
||||
},
|
||||
});
|
||||
}, crawleeConfig);
|
||||
|
||||
try {
|
||||
await crawler.run();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user