import { createHash } from "crypto"; /** * Generate SHA-256 content hash for change detection. * Accepts an object (preferred) or a plain string (legacy scrapers). */ export function contentHash(data: Record | string): string { const normalized = typeof data === "string" ? data : JSON.stringify(data, Object.keys(data).sort()); return createHash("sha256").update(normalized).digest("hex").slice(0, 16); } /** * Parse price string into number. * Handles: "$12.50", "12,50 €", "US$12.50", "12.50 USD" */ export function parsePrice(raw: string): { price: number; currency: string } { if (!raw) return { price: 0, currency: "USD" }; const currency = raw.includes("€") ? "EUR" : raw.includes("£") ? "GBP" : raw.includes("¥") ? "CNY" : raw.includes("$") || raw.toLowerCase().includes("usd") ? "USD" : ""; // Require an explicit currency symbol OR a price pattern like "1,234.56" // This prevents stock counts ("1914"), page numbers, or CSS values from being parsed as prices if (!currency) { // No currency symbol — only accept if the text is clearly a decimal price (e.g. "1234.56") const decimalMatch = raw.match(/^\s*[\d,]+\.\d{2}\s*$/); if (!decimalMatch) return { price: 0, currency: "USD" }; } // Extract the numeric value: take the last price-like number in the string // (handles cases like "$1,063.02" or "USD 1,063.02" or "1,063.02 USD") const allNumbers = raw.match(/[\d]{1,3}(?:[,.][\d]{3})*(?:[.,]\d{1,2})?|\d+\.\d{1,2}/g); if (!allNumbers || allNumbers.length === 0) return { price: 0, currency: currency || "USD" }; // Use the LARGEST number found — avoids picking up "2" from "2 in stock" over "1063.02" const prices = allNumbers.map(n => parseFloat(n.replace(/,/g, ""))); const price = Math.max(...prices); return { price: isNaN(price) ? 0 : price, currency: currency || "USD" }; } /** * Determine stock level from various text representations. */ export function parseStockLevel( raw: string ): "in_stock" | "low_stock" | "out_of_stock" | "on_request" | "discontinued" { const lower = raw.toLowerCase(); if (lower.includes("in stock") || lower.includes("auf lager") || lower.includes("available")) return "in_stock"; if (lower.includes("low stock") || lower.includes("few left") || lower.includes("limited")) return "low_stock"; if ( lower.includes("out of stock") || lower.includes("sold out") || lower.includes("nicht verfügbar") || lower.includes("unavailable") ) return "out_of_stock"; if (lower.includes("discontinued") || lower.includes("eol") || lower.includes("end of life")) return "discontinued"; return "on_request"; } /** * Extract numeric quantity from stock text. * "23 in stock" → 23, "500+ available" → 500 */ export function parseQuantity(raw: string): number | undefined { const match = raw.match(/(\d+)\+?\s*(in stock|available|auf lager|stück|units|pcs)/i); return match ? parseInt(match[1]) : undefined; } /** * Parse lead time from text. * "Ships in 3-5 days" → 5, "2 weeks" → 14 */ export function parseLeadTime(raw: string): number | undefined { const dayMatch = raw.match(/(\d+)\s*(business\s+)?days?/i); if (dayMatch) return parseInt(dayMatch[1]); const weekMatch = raw.match(/(\d+)\s*weeks?/i); if (weekMatch) return parseInt(weekMatch[1]) * 7; return undefined; }