API/finder: - Add modular chassis support: sibling linecards fetched when is_linecard=true - Add chassis linecards when system_type=modular - Extend switch response: system_type, is_linecard, chassis_model, slot_type, flexbox_compat_mode, flexbox_notes, description, switching_capacity_tbps, total_ports, category, lifecycle_status, features, use_cases, linecards[] API/transceivers: - Filter price_observations with COALESCE(is_anomalous, false) = false (direct prices + comparable market prices) Scraper/db: - Add PRICE_BOUNDS map (per form-factor min/max USD sanity bounds) - Add isPriceAnomalous() — marks DB price_observations as is_anomalous=true - Add competitor_verified flag: set true when valid competitor price stored - upsertPriceObservation: skip prices outside sanity bounds, set competitor_verified Scraper/hash: - contentHash() now accepts Record<string,unknown> | string (union type) to support both structured objects and legacy string callers Scrapers (skylane, tscom, wiitek): - Fix contentHash() call signature: pass objects not JSON.stringify strings - Fix wiitek: remove invalid 'name' param, fix t.id → transceiverId Migrations: - Add is_anomalous, competitor_verified, competitor_verified_at, image_primary columns - Recreate sync_fully_verified trigger to include competitor_verified - Add is_linecard, chassis_model, system_type, slot_type, flexbox_compat_mode, flexbox_notes to switches table
94 lines
3.4 KiB
TypeScript
94 lines
3.4 KiB
TypeScript
import { createHash } from "crypto";
|
|
|
|
/**
|
|
* Generate SHA-256 content hash for change detection.
|
|
* Accepts an object (preferred) or a plain string (legacy scrapers).
|
|
*/
|
|
export function contentHash(data: Record<string, unknown> | string): string {
|
|
const normalized = typeof data === "string"
|
|
? data
|
|
: JSON.stringify(data, Object.keys(data).sort());
|
|
return createHash("sha256").update(normalized).digest("hex").slice(0, 16);
|
|
}
|
|
|
|
/**
|
|
* Parse price string into number.
|
|
* Handles: "$12.50", "12,50 €", "US$12.50", "12.50 USD"
|
|
*/
|
|
export function parsePrice(raw: string): { price: number; currency: string } {
|
|
if (!raw) return { price: 0, currency: "USD" };
|
|
|
|
const currency = raw.includes("€")
|
|
? "EUR"
|
|
: raw.includes("£")
|
|
? "GBP"
|
|
: raw.includes("¥")
|
|
? "CNY"
|
|
: raw.includes("$") || raw.toLowerCase().includes("usd")
|
|
? "USD"
|
|
: "";
|
|
|
|
// Require an explicit currency symbol OR a price pattern like "1,234.56"
|
|
// This prevents stock counts ("1914"), page numbers, or CSS values from being parsed as prices
|
|
if (!currency) {
|
|
// No currency symbol — only accept if the text is clearly a decimal price (e.g. "1234.56")
|
|
const decimalMatch = raw.match(/^\s*[\d,]+\.\d{2}\s*$/);
|
|
if (!decimalMatch) return { price: 0, currency: "USD" };
|
|
}
|
|
|
|
// Extract the numeric value: take the last price-like number in the string
|
|
// (handles cases like "$1,063.02" or "USD 1,063.02" or "1,063.02 USD")
|
|
const allNumbers = raw.match(/[\d]{1,3}(?:[,.][\d]{3})*(?:[.,]\d{1,2})?|\d+\.\d{1,2}/g);
|
|
if (!allNumbers || allNumbers.length === 0) return { price: 0, currency: currency || "USD" };
|
|
|
|
// Use the LARGEST number found — avoids picking up "2" from "2 in stock" over "1063.02"
|
|
const prices = allNumbers.map(n => parseFloat(n.replace(/,/g, "")));
|
|
const price = Math.max(...prices);
|
|
|
|
return { price: isNaN(price) ? 0 : price, currency: currency || "USD" };
|
|
}
|
|
|
|
/**
|
|
* Determine stock level from various text representations.
|
|
*/
|
|
export function parseStockLevel(
|
|
raw: string
|
|
): "in_stock" | "low_stock" | "out_of_stock" | "on_request" | "discontinued" {
|
|
const lower = raw.toLowerCase();
|
|
if (lower.includes("in stock") || lower.includes("auf lager") || lower.includes("available"))
|
|
return "in_stock";
|
|
if (lower.includes("low stock") || lower.includes("few left") || lower.includes("limited"))
|
|
return "low_stock";
|
|
if (
|
|
lower.includes("out of stock") ||
|
|
lower.includes("sold out") ||
|
|
lower.includes("nicht verfügbar") ||
|
|
lower.includes("unavailable")
|
|
)
|
|
return "out_of_stock";
|
|
if (lower.includes("discontinued") || lower.includes("eol") || lower.includes("end of life"))
|
|
return "discontinued";
|
|
return "on_request";
|
|
}
|
|
|
|
/**
|
|
* Extract numeric quantity from stock text.
|
|
* "23 in stock" → 23, "500+ available" → 500
|
|
*/
|
|
export function parseQuantity(raw: string): number | undefined {
|
|
const match = raw.match(/(\d+)\+?\s*(in stock|available|auf lager|stück|units|pcs)/i);
|
|
return match ? parseInt(match[1]) : undefined;
|
|
}
|
|
|
|
/**
|
|
* Parse lead time from text.
|
|
* "Ships in 3-5 days" → 5, "2 weeks" → 14
|
|
*/
|
|
export function parseLeadTime(raw: string): number | undefined {
|
|
const dayMatch = raw.match(/(\d+)\s*(business\s+)?days?/i);
|
|
if (dayMatch) return parseInt(dayMatch[1]);
|
|
const weekMatch = raw.match(/(\d+)\s*weeks?/i);
|
|
if (weekMatch) return parseInt(weekMatch[1]) * 7;
|
|
return undefined;
|
|
}
|