From e0db86252ba510496cf0fa2960f77889bb6b24f4 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Mon, 6 Apr 2026 01:19:25 +0200 Subject: [PATCH] fix: parsePrice requires currency symbol + uses largest number to avoid misreads Root cause of fake prices (e.g. 1.30 for 800G OSFP): - parsePrice accepted any bare number without currency symbol - Could misread stock counts, page numbers, or CSS values as prices - Also picked the first number, not the main price Fix: - Require explicit currency symbol or decimal format (1234.56) - Use the LARGEST number found in the price string - Returns price=0 (rejected) when no valid price pattern found --- packages/scraper/src/utils/hash.ts | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/packages/scraper/src/utils/hash.ts b/packages/scraper/src/utils/hash.ts index 44a4d8c..a3d3f8a 100644 --- a/packages/scraper/src/utils/hash.ts +++ b/packages/scraper/src/utils/hash.ts @@ -14,16 +14,36 @@ export function contentHash(data: Record): string { * Handles: "$12.50", "12,50 €", "US$12.50", "12.50 USD" */ export function parsePrice(raw: string): { price: number; currency: string } { - const cleaned = raw.replace(/[^\d.,]/g, "").replace(",", "."); - const price = parseFloat(cleaned); + if (!raw) return { price: 0, currency: "USD" }; + const currency = raw.includes("€") ? "EUR" : raw.includes("£") ? "GBP" : raw.includes("¥") ? "CNY" - : "USD"; - return { price: isNaN(price) ? 0 : price, currency }; + : raw.includes("$") || raw.toLowerCase().includes("usd") + ? "USD" + : ""; + + // Require an explicit currency symbol OR a price pattern like "1,234.56" + // This prevents stock counts ("1914"), page numbers, or CSS values from being parsed as prices + if (!currency) { + // No currency symbol — only accept if the text is clearly a decimal price (e.g. "1234.56") + const decimalMatch = raw.match(/^\s*[\d,]+\.\d{2}\s*$/); + if (!decimalMatch) return { price: 0, currency: "USD" }; + } + + // Extract the numeric value: take the last price-like number in the string + // (handles cases like "$1,063.02" or "USD 1,063.02" or "1,063.02 USD") + const allNumbers = raw.match(/[\d]{1,3}(?:[,.][\d]{3})*(?:[.,]\d{1,2})?|\d+\.\d{1,2}/g); + if (!allNumbers || allNumbers.length === 0) return { price: 0, currency: currency || "USD" }; + + // Use the LARGEST number found — avoids picking up "2" from "2 in stock" over "1063.02" + const prices = allNumbers.map(n => parseFloat(n.replace(/,/g, ""))); + const price = Math.max(...prices); + + return { price: isNaN(price) ? 0 : price, currency: currency || "USD" }; } /**