Rene Fichtmueller a3af873206 feat(scraper): add NADDOD/QSFPTEK/AddOn to scheduler, fix pre-existing TS build errors
- Register scrape:pricing:naddod (48 */2), qsfptek (52 */2), addon (55 */2) in pg-boss
- Add boss.work() handlers for all three (fetch-based, run on Erik)
- Fix findOrCreateScrapedTransceiver callers: remove invalid `name`/`url` params,
  fix `t.id` → `t` (function already returns string ID)
- Fix ebay-enricher: remove invalid `extractType` option, use extraction.standard_name
  instead of non-existent `.description`, fix cheerio type incompatibility
- Fix community-issues: description → summary, publishedDate → published_at
- Startup zombie cleanup already deployed (index.ts) — no changes needed
- ProLabs rewritten to fetch-based catalog scraper (no Playwright, bypasses WAF)
2026-04-11 03:17:33 +02:00

451 lines
17 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* eBay Product Enricher
*
* Searches eBay for switch/transceiver models to extract:
* - Product description & features
* - Refurbished/used prices
* - Product images
* - Technical specs from listing descriptions
*
* Uses CheerioCrawler + Crawler LLM for structured extraction.
*/
import { CheerioCrawler, RequestQueue } from "crawlee";
import { scrapeWithLLM } from "../crawler-llm/core";
import { db } from "../utils/db";
import { logger } from "../utils/logger";
interface EbayListing {
title: string;
price: number;
currency: string;
condition: "new" | "refurbished" | "used";
seller: string;
warrantyMonths: number | null;
imageUrl: string | null;
listingUrl: string;
itemId: string;
description: string;
features: string[];
specs: Record<string, string>;
}
interface EnrichResult {
model: string;
listings: EbayListing[];
bestRefurbPrice: number | null;
bestNewPrice: number | null;
features: string[];
description: string;
imageUrl: string | null;
}
// eBay search URL for .de (EUR pricing, covers DE/EU market)
function buildSearchUrl(query: string, page = 1): string {
const encoded = encodeURIComponent(query);
const offset = (page - 1) * 50;
return `https://www.ebay.de/sch/i.html?_nkw=${encoded}&_sop=15&LH_ItemCondition=3000%7C1500%7C1000&_ipg=50&_pgn=${page}&_stpos=0&_from=R40`;
}
// Parse eBay condition string to our condition type
function parseCondition(condStr: string): "new" | "refurbished" | "used" {
const lower = condStr.toLowerCase();
if (lower.includes("neu") || lower.includes("new")) return "new";
if (lower.includes("refurb") || lower.includes("überholt") || lower.includes("generalüber")) return "refurbished";
return "used";
}
// Extract warranty months from listing title/description
function extractWarranty(text: string): number | null {
const patterns = [
/(\d+)\s*[-]?\s*month\s*warrant/i,
/(\d+)\s*[-]?\s*monat\s*gewähr/i,
/(\d+)\s*[-]?\s*year\s*warrant/i,
/(\d+)\s*[-]?\s*jahr\s*gewähr/i,
];
for (const pattern of patterns) {
const match = text.match(pattern);
if (match && match[1]) {
const num = parseInt(match[1]);
return pattern.source.includes("year") || pattern.source.includes("jahr") ? num * 12 : num;
}
}
return null;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
async function parseSearchResults($: any, baseUrl: string): Promise<Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }>> {
const items: Array<{ title: string; url: string; price: string; condition: string; imageUrl: string }> = [];
$(".s-item").each((_: number, el: unknown) => {
const titleEl = $(el).find(".s-item__title");
const priceEl = $(el).find(".s-item__price");
const condEl = $(el).find(".SECONDARY_INFO");
const linkEl = $(el).find(".s-item__link");
const imgEl = $(el).find(".s-item__image-img");
const title = titleEl.text().trim();
const price = priceEl.text().trim();
const condition = condEl.text().trim();
const url = linkEl.attr("href") || "";
const imageUrl = imgEl.attr("src") || imgEl.attr("data-src") || "";
if (title && url && !title.toLowerCase().includes("shop on ebay")) {
items.push({ title, url, price, condition, imageUrl });
}
});
return items;
}
async function enrichSwitchFromEbay(switchId: string, model: string): Promise<EnrichResult | null> {
const result: EnrichResult = {
model,
listings: [],
bestRefurbPrice: null,
bestNewPrice: null,
features: [],
description: "",
imageUrl: null,
};
const queue = await RequestQueue.open(`ebay-${switchId.substring(0, 8)}`);
await queue.addRequest({ url: buildSearchUrl(model), userData: { model, phase: "search" } });
const crawler = new CheerioCrawler({
requestQueue: queue,
maxRequestsPerCrawl: 5,
requestHandlerTimeoutSecs: 30,
async requestHandler({ request, $, crawler }) {
const { phase, model } = request.userData as { phase: string; model: string };
if (phase === "search") {
const items = await parseSearchResults($, request.url);
// Take up to 3 most relevant listings
const relevant = items.filter(item =>
item.title.toLowerCase().includes(model.toLowerCase().split("-")[0]?.toLowerCase() ?? "")
).slice(0, 3);
for (const item of relevant) {
if (item.url && item.url.startsWith("http")) {
await crawler.addRequests([{
url: item.url.split("?")[0]!,
userData: {
phase: "listing",
model,
priceStr: item.price,
conditionStr: item.condition,
imageUrl: item.imageUrl,
title: item.title,
},
}]);
}
}
} else if (phase === "listing") {
const { title, priceStr, conditionStr, imageUrl: searchImageUrl, model } = request.userData as {
title: string; priceStr: string; conditionStr: string; imageUrl: string; model: string;
};
// Use Crawler LLM to extract structured data from listing page
const html = $.html();
const extracted = await scrapeWithLLM(html, request.url, {
vendorSlug: "ebay",
});
// Parse price from string (handle EUR format "1.234,56 EUR")
const priceClean = priceStr.replace(/[^\d,.-]/g, "").replace(".", "").replace(",", ".");
const price = parseFloat(priceClean) || 0;
const condition = parseCondition(conditionStr);
const warranty = extractWarranty(title);
// Extract image from listing page (higher quality than search thumbnail)
const listingImage = $(".ux-image-carousel-item img").first().attr("src")
|| $(".img img").first().attr("src")
|| searchImageUrl;
// Extract features from item specifics table
const features: string[] = [];
$(".ux-labels-values").each((_, el) => {
const label = $(el).find(".ux-labels-values__labels").text().trim();
const value = $(el).find(".ux-labels-values__values").text().trim();
if (label && value && value !== "Siehe Anzeige") {
features.push(`${label}: ${value}`);
}
});
// Extract description
const description = extracted?.extraction.standard_name
|| $(".ux-textspans--BOLD").first().text().trim()
|| "";
const listing: EbayListing = {
title,
price,
currency: "EUR",
condition,
seller: $(".ux-seller-section__item--seller a").text().trim() || "unknown",
warrantyMonths: warranty,
imageUrl: listingImage || null,
listingUrl: request.url,
itemId: request.url.match(/\/itm\/(\d+)/)?.[1] || "",
description,
features,
specs: {},
};
result.listings.push(listing);
// Track best prices
if (price > 0) {
if (condition === "refurbished" || condition === "used") {
if (!result.bestRefurbPrice || price < result.bestRefurbPrice) {
result.bestRefurbPrice = price;
}
} else if (condition === "new") {
if (!result.bestNewPrice || price < result.bestNewPrice) {
result.bestNewPrice = price;
}
}
}
// Collect features for switch enrichment
if (features.length > 0 && result.features.length === 0) {
result.features = features.slice(0, 10);
}
// Use best image
if (!result.imageUrl && listingImage) {
result.imageUrl = listingImage;
}
// Use first good description
if (!result.description && description.length > 50) {
result.description = description.substring(0, 500);
}
}
},
failedRequestHandler: ({ request, error }) => {
logger.warn(`eBay enricher failed for ${request.url}: ${error}`);
},
});
try {
await crawler.run();
} catch (err) {
logger.error("eBay crawler run error", { err, model });
}
return result.listings.length > 0 ? result : null;
}
// ─────────────────────────────────────────────────────────────────────────────
// Save enrichment results to DB
// ─────────────────────────────────────────────────────────────────────────────
async function saveEnrichment(switchId: string, result: EnrichResult): Promise<void> {
const { db: pool } = await import("../utils/db");
// Update switch: features, description, refurb price, image
const updateFields: string[] = ["ebay_enriched_at = NOW()"];
const params: unknown[] = [];
let idx = 1;
if (result.features.length > 0) {
updateFields.push(`features = $${idx}::jsonb`);
params.push(JSON.stringify(result.features));
idx++;
}
if (result.description) {
updateFields.push(`description = COALESCE(description, $${idx})`);
params.push(result.description);
idx++;
}
if (result.bestRefurbPrice) {
updateFields.push(`ebay_refurb_price_usd = $${idx}`);
params.push(result.bestRefurbPrice);
idx++;
}
if (result.imageUrl && result.imageUrl.startsWith("http")) {
// Only set image_url if not already set
updateFields.push(`image_url = COALESCE(NULLIF(image_url, ''), $${idx})`);
params.push(result.imageUrl);
idx++;
}
params.push(switchId);
await pool.query(
`UPDATE switches SET ${updateFields.join(", ")} WHERE id = $${idx}`,
params
);
// Find eBay vendor ID (create if needed)
const ebayVendorResult = await pool.query(
`INSERT INTO vendors (name, slug, type, website_url)
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
RETURNING id`
);
const ebayVendorId = ebayVendorResult.rows[0]?.id;
if (!ebayVendorId) return;
// For each listing that has a price, we need a transceiver_id or we skip
// (price_observations requires transceiver_id — for switches we'll use a different approach later)
// For now, just log the refurb price data
logger.info("eBay enrichment saved", {
model: result.model,
listingsCount: result.listings.length,
bestRefurb: result.bestRefurbPrice,
featuresCount: result.features.length,
hasImage: !!result.imageUrl,
});
}
// ─────────────────────────────────────────────────────────────────────────────
// Main: enrich switches that haven't been enriched yet
// ─────────────────────────────────────────────────────────────────────────────
export async function enrichSwitchesFromEbay(limit = 20): Promise<void> {
const { db: pool } = await import("../utils/db");
const switches = await pool.query<{ id: string; model: string; vendor_name: string }>(
`SELECT sw.id, sw.model, v.name AS vendor_name
FROM switches sw
JOIN vendors v ON sw.vendor_id = v.id
WHERE sw.ebay_enriched_at IS NULL
AND sw.max_speed_gbps >= 10
ORDER BY sw.max_speed_gbps DESC, sw.created_at ASC
LIMIT $1`,
[limit]
);
logger.info(`eBay enricher: processing ${switches.rows.length} switches`);
for (const sw of switches.rows) {
logger.info(`Enriching ${sw.model} from eBay...`);
try {
const result = await enrichSwitchFromEbay(sw.id, sw.model);
if (result) {
await saveEnrichment(sw.id, result);
logger.info(`${sw.model}: ${result.listings.length} listings, refurb €${result.bestRefurbPrice}`);
} else {
// Mark as tried even if no results
await pool.query("UPDATE switches SET ebay_enriched_at = NOW() WHERE id = $1", [sw.id]);
logger.info(`${sw.model}: no eBay listings found`);
}
} catch (err) {
logger.error(`${sw.model}: enrichment failed`, { err });
}
// Rate limiting — be polite to eBay
await new Promise(r => setTimeout(r, 3000 + Math.random() * 2000));
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Enrich transceivers from eBay (price observations with condition)
// ─────────────────────────────────────────────────────────────────────────────
export async function enrichTransceiversFromEbay(limit = 50): Promise<void> {
const { db: pool } = await import("../utils/db");
// Find eBay vendor
const ebayVendor = await pool.query(
`INSERT INTO vendors (name, slug, type, website_url)
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()
RETURNING id`
);
const ebayVendorId = ebayVendor.rows[0]?.id;
// Get transceivers without eBay price observations in last 30 days
const transceivers = await pool.query<{ id: string; slug: string; part_number: string; form_factor: string; speed_gbps: number }>(
`SELECT t.id, t.slug, t.part_number, t.form_factor, t.speed_gbps
FROM transceivers t
WHERE NOT EXISTS (
SELECT 1 FROM price_observations po
WHERE po.transceiver_id = t.id
AND po.marketplace = 'ebay'
AND po.time > NOW() - INTERVAL '30 days'
)
AND t.part_number IS NOT NULL
ORDER BY t.speed_gbps DESC
LIMIT $1`,
[limit]
);
logger.info(`eBay transceiver enricher: processing ${transceivers.rows.length} transceivers`);
const queue = await RequestQueue.open("ebay-transceivers");
for (const tcvr of transceivers.rows) {
const query = tcvr.part_number || `${tcvr.form_factor} ${tcvr.speed_gbps}G transceiver`;
await queue.addRequest({
url: buildSearchUrl(query),
userData: { transceiverI: tcvr.id, query, formFactor: tcvr.form_factor, speedGbps: tcvr.speed_gbps },
});
}
const crawler = new CheerioCrawler({
requestQueue: queue,
maxRequestsPerCrawl: limit,
requestHandlerTimeoutSecs: 20,
maxConcurrency: 2,
async requestHandler({ request, $ }) {
const { transceiverI, formFactor, speedGbps } = request.userData as {
transceiverI: string; query: string; formFactor: string; speedGbps: number;
};
const items = await parseSearchResults($, request.url);
const refurbItems = items.filter(i => {
const cond = i.condition.toLowerCase();
return cond.includes("refurb") || cond.includes("überholt") || cond.includes("generalüber");
});
const newItems = items.filter(i => i.condition.toLowerCase().includes("neu") || i.condition.toLowerCase().includes("new"));
const insertObs = async (item: { price: string; condition: string; imageUrl: string; title: string; url: string }, condition: "new" | "refurbished") => {
const priceClean = item.price.replace(/[^\d,.-]/g, "").replace(".", "").replace(",", ".");
const price = parseFloat(priceClean);
if (!price || price <= 0) return;
const warranty = extractWarranty(item.title);
await pool.query(
`INSERT INTO price_observations
(time, transceiver_id, source_vendor_id, price, currency, condition, marketplace, warranty_months, seller_name, listing_title, url, scrape_method, stock_level)
VALUES (NOW(), $1, $2, $3, 'EUR', $4, 'ebay', $5, $6, $7, $8, 'crawlee', 'in_stock')
ON CONFLICT DO NOTHING`,
[transceiverI, ebayVendorId, price, condition, warranty, "eBay Seller", item.title.substring(0, 200), item.url]
);
};
// Best refurbished price
if (refurbItems[0]) await insertObs(refurbItems[0], "refurbished");
// Best new price
if (newItems[0]) await insertObs(newItems[0], "new");
},
});
try {
await crawler.run();
} catch (err) {
logger.error("eBay transceiver crawler error", { err });
}
}
// CLI entrypoint
if (require.main === module) {
(async () => {
const target = process.argv[2] || "switches";
if (target === "switches") {
await enrichSwitchesFromEbay(parseInt(process.argv[3] || "20"));
} else {
await enrichTransceiversFromEbay(parseInt(process.argv[3] || "50"));
}
process.exit(0);
})();
}