/** * eBay Product Enricher * * Searches eBay for switch/transceiver models to extract: * - Product description & features * - Refurbished/used prices * - Product images * - Technical specs from listing descriptions * * Uses CheerioCrawler + Crawler LLM for structured extraction. */ import { CheerioCrawler, RequestQueue } from "crawlee"; import { scrapeWithLLM } from "../crawler-llm/core"; import { db } from "../utils/db"; import { logger } from "../utils/logger"; interface EbayListing { title: string; price: number; currency: string; condition: "new" | "refurbished" | "used"; seller: string; warrantyMonths: number | null; imageUrl: string | null; listingUrl: string; itemId: string; description: string; features: string[]; specs: Record; } interface EnrichResult { model: string; listings: EbayListing[]; bestRefurbPrice: number | null; bestNewPrice: number | null; features: string[]; description: string; imageUrl: string | null; } // eBay search URL for .de (EUR pricing, covers DE/EU market) function buildSearchUrl(query: string, page = 1): string { const encoded = encodeURIComponent(query); const offset = (page - 1) * 50; return `https://www.ebay.de/sch/i.html?_nkw=${encoded}&_sop=15&LH_ItemCondition=3000%7C1500%7C1000&_ipg=50&_pgn=${page}&_stpos=0&_from=R40`; } // Parse eBay condition string to our condition type function parseCondition(condStr: string): "new" | "refurbished" | "used" { const lower = condStr.toLowerCase(); if (lower.includes("neu") || lower.includes("new")) return "new"; if (lower.includes("refurb") || lower.includes("überholt") || lower.includes("generalüber")) return "refurbished"; return "used"; } // Extract warranty months from listing title/description function extractWarranty(text: string): number | null { const patterns = [ /(\d+)\s*[-–]?\s*month\s*warrant/i, /(\d+)\s*[-–]?\s*monat\s*gewähr/i, /(\d+)\s*[-–]?\s*year\s*warrant/i, /(\d+)\s*[-–]?\s*jahr\s*gewähr/i, ]; for (const pattern of patterns) { const match = text.match(pattern); if (match && match[1]) { const num = parseInt(match[1]); return pattern.source.includes("year") || pattern.source.includes("jahr") ? num * 12 : num; } } return null; } // eslint-disable-next-line @typescript-eslint/no-explicit-any async function parseSearchResults($: any, baseUrl: string): Promise> { const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = []; $(".s-item").each((_: number, el: unknown) => { const titleEl = $(el).find(".s-item__title"); const priceEl = $(el).find(".s-item__price"); const condEl = $(el).find(".SECONDARY_INFO"); const linkEl = $(el).find(".s-item__link"); const imgEl = $(el).find(".s-item__image-img"); const title = titleEl.text().trim(); const price = priceEl.text().trim(); const condition = condEl.text().trim(); const url = linkEl.attr("href") || ""; const imageUrl = imgEl.attr("src") || imgEl.attr("data-src") || ""; if (title && url && !title.toLowerCase().includes("shop on ebay")) { items.push({ title, url, price, condition, imageUrl }); } }); return items; } async function enrichSwitchFromEbay(switchId: string, model: string): Promise { const result: EnrichResult = { model, listings: [], bestRefurbPrice: null, bestNewPrice: null, features: [], description: "", imageUrl: null, }; const queue = await RequestQueue.open(`ebay-${switchId.substring(0, 8)}`); await queue.addRequest({ url: buildSearchUrl(model), userData: { model, phase: "search" } }); const crawler = new CheerioCrawler({ requestQueue: queue, maxRequestsPerCrawl: 5, requestHandlerTimeoutSecs: 30, async requestHandler({ request, $, crawler }) { const { phase, model } = request.userData as { phase: string; model: string }; if (phase === "search") { const items = await parseSearchResults($, request.url); // Take up to 3 most relevant listings const relevant = items.filter(item => item.title.toLowerCase().includes(model.toLowerCase().split("-")[0]?.toLowerCase() ?? "") ).slice(0, 3); for (const item of relevant) { if (item.url && item.url.startsWith("http")) { await crawler.addRequests([{ url: item.url.split("?")[0]!, userData: { phase: "listing", model, priceStr: item.price, conditionStr: item.condition, imageUrl: item.imageUrl, title: item.title, }, }]); } } } else if (phase === "listing") { const { title, priceStr, conditionStr, imageUrl: searchImageUrl, model } = request.userData as { title: string; priceStr: string; conditionStr: string; imageUrl: string; model: string; }; // Use Crawler LLM to extract structured data from listing page const html = $.html(); const extracted = await scrapeWithLLM(html, request.url, { vendorSlug: "ebay", }); // Parse price from string (handle EUR format "1.234,56 EUR") const priceClean = priceStr.replace(/[^\d,.-]/g, "").replace(".", "").replace(",", "."); const price = parseFloat(priceClean) || 0; const condition = parseCondition(conditionStr); const warranty = extractWarranty(title); // Extract image from listing page (higher quality than search thumbnail) const listingImage = $(".ux-image-carousel-item img").first().attr("src") || $(".img img").first().attr("src") || searchImageUrl; // Extract features from item specifics table const features: string[] = []; $(".ux-labels-values").each((_, el) => { const label = $(el).find(".ux-labels-values__labels").text().trim(); const value = $(el).find(".ux-labels-values__values").text().trim(); if (label && value && value !== "Siehe Anzeige") { features.push(`${label}: ${value}`); } }); // Extract description const description = extracted?.extraction.standard_name || $(".ux-textspans--BOLD").first().text().trim() || ""; const listing: EbayListing = { title, price, currency: "EUR", condition, seller: $(".ux-seller-section__item--seller a").text().trim() || "unknown", warrantyMonths: warranty, imageUrl: listingImage || null, listingUrl: request.url, itemId: request.url.match(/\/itm\/(\d+)/)?.[1] || "", description, features, specs: {}, }; result.listings.push(listing); // Track best prices if (price > 0) { if (condition === "refurbished" || condition === "used") { if (!result.bestRefurbPrice || price < result.bestRefurbPrice) { result.bestRefurbPrice = price; } } else if (condition === "new") { if (!result.bestNewPrice || price < result.bestNewPrice) { result.bestNewPrice = price; } } } // Collect features for switch enrichment if (features.length > 0 && result.features.length === 0) { result.features = features.slice(0, 10); } // Use best image if (!result.imageUrl && listingImage) { result.imageUrl = listingImage; } // Use first good description if (!result.description && description.length > 50) { result.description = description.substring(0, 500); } } }, failedRequestHandler: ({ request, error }) => { logger.warn(`eBay enricher failed for ${request.url}: ${error}`); }, }); try { await crawler.run(); } catch (err) { logger.error("eBay crawler run error", { err, model }); } return result.listings.length > 0 ? result : null; } // ───────────────────────────────────────────────────────────────────────────── // Save enrichment results to DB // ───────────────────────────────────────────────────────────────────────────── async function saveEnrichment(switchId: string, result: EnrichResult): Promise { const { db: pool } = await import("../utils/db"); // Update switch: features, description, refurb price, image const updateFields: string[] = ["ebay_enriched_at = NOW()"]; const params: unknown[] = []; let idx = 1; if (result.features.length > 0) { updateFields.push(`features = $${idx}::jsonb`); params.push(JSON.stringify(result.features)); idx++; } if (result.description) { updateFields.push(`description = COALESCE(description, $${idx})`); params.push(result.description); idx++; } if (result.bestRefurbPrice) { updateFields.push(`ebay_refurb_price_usd = $${idx}`); params.push(result.bestRefurbPrice); idx++; } if (result.imageUrl && result.imageUrl.startsWith("http")) { // Only set image_url if not already set updateFields.push(`image_url = COALESCE(NULLIF(image_url, ''), $${idx})`); params.push(result.imageUrl); idx++; } params.push(switchId); await pool.query( `UPDATE switches SET ${updateFields.join(", ")} WHERE id = $${idx}`, params ); // Find eBay vendor ID (create if needed) const ebayVendorResult = await pool.query( `INSERT INTO vendors (name, slug, type, website_url) VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de') ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name RETURNING id` ); const ebayVendorId = ebayVendorResult.rows[0]?.id; if (!ebayVendorId) return; // For each listing that has a price, we need a transceiver_id or we skip // (price_observations requires transceiver_id — for switches we'll use a different approach later) // For now, just log the refurb price data logger.info("eBay enrichment saved", { model: result.model, listingsCount: result.listings.length, bestRefurb: result.bestRefurbPrice, featuresCount: result.features.length, hasImage: !!result.imageUrl, }); } // ───────────────────────────────────────────────────────────────────────────── // Main: enrich switches that haven't been enriched yet // ───────────────────────────────────────────────────────────────────────────── export async function enrichSwitchesFromEbay(limit = 20): Promise { const { db: pool } = await import("../utils/db"); const switches = await pool.query<{ id: string; model: string; vendor_name: string }>( `SELECT sw.id, sw.model, v.name AS vendor_name FROM switches sw JOIN vendors v ON sw.vendor_id = v.id WHERE sw.ebay_enriched_at IS NULL AND sw.max_speed_gbps >= 10 ORDER BY sw.max_speed_gbps DESC, sw.created_at ASC LIMIT $1`, [limit] ); logger.info(`eBay enricher: processing ${switches.rows.length} switches`); for (const sw of switches.rows) { logger.info(`Enriching ${sw.model} from eBay...`); try { const result = await enrichSwitchFromEbay(sw.id, sw.model); if (result) { await saveEnrichment(sw.id, result); logger.info(`✓ ${sw.model}: ${result.listings.length} listings, refurb €${result.bestRefurbPrice}`); } else { // Mark as tried even if no results await pool.query("UPDATE switches SET ebay_enriched_at = NOW() WHERE id = $1", [sw.id]); logger.info(`○ ${sw.model}: no eBay listings found`); } } catch (err) { logger.error(`✗ ${sw.model}: enrichment failed`, { err }); } // Rate limiting — be polite to eBay await new Promise(r => setTimeout(r, 3000 + Math.random() * 2000)); } } // ───────────────────────────────────────────────────────────────────────────── // Enrich transceivers from eBay (price observations with condition) // ───────────────────────────────────────────────────────────────────────────── export async function enrichTransceiversFromEbay(limit = 50): Promise { const { db: pool } = await import("../utils/db"); // Find eBay vendor const ebayVendor = await pool.query( `INSERT INTO vendors (name, slug, type, website_url) VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de') ON CONFLICT (slug) DO UPDATE SET updated_at = NOW() RETURNING id` ); const ebayVendorId = ebayVendor.rows[0]?.id; // Get transceivers without eBay price observations in last 30 days const transceivers = await pool.query<{ id: string; slug: string; part_number: string; form_factor: string; speed_gbps: number }>( `SELECT t.id, t.slug, t.part_number, t.form_factor, t.speed_gbps FROM transceivers t WHERE NOT EXISTS ( SELECT 1 FROM price_observations po WHERE po.transceiver_id = t.id AND po.marketplace = 'ebay' AND po.time > NOW() - INTERVAL '30 days' ) AND t.part_number IS NOT NULL ORDER BY t.speed_gbps DESC LIMIT $1`, [limit] ); logger.info(`eBay transceiver enricher: processing ${transceivers.rows.length} transceivers`); const queue = await RequestQueue.open("ebay-transceivers"); for (const tcvr of transceivers.rows) { const query = tcvr.part_number || `${tcvr.form_factor} ${tcvr.speed_gbps}G transceiver`; await queue.addRequest({ url: buildSearchUrl(query), userData: { transceiverI: tcvr.id, query, formFactor: tcvr.form_factor, speedGbps: tcvr.speed_gbps }, }); } const crawler = new CheerioCrawler({ requestQueue: queue, maxRequestsPerCrawl: limit, requestHandlerTimeoutSecs: 20, maxConcurrency: 2, async requestHandler({ request, $ }) { const { transceiverI, formFactor, speedGbps } = request.userData as { transceiverI: string; query: string; formFactor: string; speedGbps: number; }; const items = await parseSearchResults($, request.url); const refurbItems = items.filter(i => { const cond = i.condition.toLowerCase(); return cond.includes("refurb") || cond.includes("überholt") || cond.includes("generalüber"); }); const newItems = items.filter(i => i.condition.toLowerCase().includes("neu") || i.condition.toLowerCase().includes("new")); const insertObs = async (item: { price: string; condition: string; imageUrl: string; title: string; url: string }, condition: "new" | "refurbished") => { const priceClean = item.price.replace(/[^\d,.-]/g, "").replace(".", "").replace(",", "."); const price = parseFloat(priceClean); if (!price || price <= 0) return; const warranty = extractWarranty(item.title); await pool.query( `INSERT INTO price_observations (time, transceiver_id, source_vendor_id, price, currency, condition, marketplace, warranty_months, seller_name, listing_title, url, scrape_method, stock_level) VALUES (NOW(), $1, $2, $3, 'EUR', $4, 'ebay', $5, $6, $7, $8, 'crawlee', 'in_stock') ON CONFLICT DO NOTHING`, [transceiverI, ebayVendorId, price, condition, warranty, "eBay Seller", item.title.substring(0, 200), item.url] ); }; // Best refurbished price if (refurbItems[0]) await insertObs(refurbItems[0], "refurbished"); // Best new price if (newItems[0]) await insertObs(newItems[0], "new"); }, }); try { await crawler.run(); } catch (err) { logger.error("eBay transceiver crawler error", { err }); } } // CLI entrypoint if (require.main === module) { (async () => { const target = process.argv[2] || "switches"; if (target === "switches") { await enrichSwitchesFromEbay(parseInt(process.argv[3] || "20")); } else { await enrichTransceiversFromEbay(parseInt(process.argv[3] || "50")); } process.exit(0); })(); }