Rene Fichtmueller 7869f098b2 feat: linecard system support, Cisco 8000 accuracy, price anomaly detection
API/finder:
- Add modular chassis support: sibling linecards fetched when is_linecard=true
- Add chassis linecards when system_type=modular
- Extend switch response: system_type, is_linecard, chassis_model, slot_type,
  flexbox_compat_mode, flexbox_notes, description, switching_capacity_tbps,
  total_ports, category, lifecycle_status, features, use_cases, linecards[]

API/transceivers:
- Filter price_observations with COALESCE(is_anomalous, false) = false
  (direct prices + comparable market prices)

Scraper/db:
- Add PRICE_BOUNDS map (per form-factor min/max USD sanity bounds)
- Add isPriceAnomalous() — marks DB price_observations as is_anomalous=true
- Add competitor_verified flag: set true when valid competitor price stored
- upsertPriceObservation: skip prices outside sanity bounds, set competitor_verified

Scraper/hash:
- contentHash() now accepts Record<string,unknown> | string (union type)
  to support both structured objects and legacy string callers

Scrapers (skylane, tscom, wiitek):
- Fix contentHash() call signature: pass objects not JSON.stringify strings
- Fix wiitek: remove invalid 'name' param, fix t.id → transceiverId

Migrations:
- Add is_anomalous, competitor_verified, competitor_verified_at,
  image_primary columns
- Recreate sync_fully_verified trigger to include competitor_verified
- Add is_linecard, chassis_model, system_type, slot_type,
  flexbox_compat_mode, flexbox_notes to switches table
2026-04-09 09:06:22 +02:00

94 lines
3.4 KiB
TypeScript

import { createHash } from "crypto";
/**
* Generate SHA-256 content hash for change detection.
* Accepts an object (preferred) or a plain string (legacy scrapers).
*/
export function contentHash(data: Record<string, unknown> | string): string {
const normalized = typeof data === "string"
? data
: JSON.stringify(data, Object.keys(data).sort());
return createHash("sha256").update(normalized).digest("hex").slice(0, 16);
}
/**
* Parse price string into number.
* Handles: "$12.50", "12,50 €", "US$12.50", "12.50 USD"
*/
export function parsePrice(raw: string): { price: number; currency: string } {
if (!raw) return { price: 0, currency: "USD" };
const currency = raw.includes("€")
? "EUR"
: raw.includes("£")
? "GBP"
: raw.includes("¥")
? "CNY"
: raw.includes("$") || raw.toLowerCase().includes("usd")
? "USD"
: "";
// Require an explicit currency symbol OR a price pattern like "1,234.56"
// This prevents stock counts ("1914"), page numbers, or CSS values from being parsed as prices
if (!currency) {
// No currency symbol — only accept if the text is clearly a decimal price (e.g. "1234.56")
const decimalMatch = raw.match(/^\s*[\d,]+\.\d{2}\s*$/);
if (!decimalMatch) return { price: 0, currency: "USD" };
}
// Extract the numeric value: take the last price-like number in the string
// (handles cases like "$1,063.02" or "USD 1,063.02" or "1,063.02 USD")
const allNumbers = raw.match(/[\d]{1,3}(?:[,.][\d]{3})*(?:[.,]\d{1,2})?|\d+\.\d{1,2}/g);
if (!allNumbers || allNumbers.length === 0) return { price: 0, currency: currency || "USD" };
// Use the LARGEST number found — avoids picking up "2" from "2 in stock" over "1063.02"
const prices = allNumbers.map(n => parseFloat(n.replace(/,/g, "")));
const price = Math.max(...prices);
return { price: isNaN(price) ? 0 : price, currency: currency || "USD" };
}
/**
* Determine stock level from various text representations.
*/
export function parseStockLevel(
raw: string
): "in_stock" | "low_stock" | "out_of_stock" | "on_request" | "discontinued" {
const lower = raw.toLowerCase();
if (lower.includes("in stock") || lower.includes("auf lager") || lower.includes("available"))
return "in_stock";
if (lower.includes("low stock") || lower.includes("few left") || lower.includes("limited"))
return "low_stock";
if (
lower.includes("out of stock") ||
lower.includes("sold out") ||
lower.includes("nicht verfügbar") ||
lower.includes("unavailable")
)
return "out_of_stock";
if (lower.includes("discontinued") || lower.includes("eol") || lower.includes("end of life"))
return "discontinued";
return "on_request";
}
/**
* Extract numeric quantity from stock text.
* "23 in stock" → 23, "500+ available" → 500
*/
export function parseQuantity(raw: string): number | undefined {
const match = raw.match(/(\d+)\+?\s*(in stock|available|auf lager|stück|units|pcs)/i);
return match ? parseInt(match[1]) : undefined;
}
/**
* Parse lead time from text.
* "Ships in 3-5 days" → 5, "2 weeks" → 14
*/
export function parseLeadTime(raw: string): number | undefined {
const dayMatch = raw.match(/(\d+)\s*(business\s+)?days?/i);
if (dayMatch) return parseInt(dayMatch[1]);
const weekMatch = raw.match(/(\d+)\s*weeks?/i);
if (weekMatch) return parseInt(weekMatch[1]) * 7;
return undefined;
}