- Migration 019: stock_snapshots, abc_classification, reorder_signals, product_lifecycle_events, market_intelligence, crawler_llm_log tables - Seeded 7 market intel events (OFC 2026, AWS/Azure CapEx, Coherent lead times, EU TED tenders, ECOC 2026, IEEE 802.3df) - Seeded 4 lifecycle events (Cisco SFP-10G-LR EOL, Juniper EOL, 400ZR ratified, 800G MSA draft) - Crawler LLM: core.ts (Ollama-based extractor), stock-schema.ts (typed schemas + vendor profiles for Flexoptix/FS.com/10Gtek/ATGBICS/ProLabs/Farnell/Mouser), validator.ts (rule-based sanity checks + cross-validation) - market-intelligence.ts scraper: OFC/ECOC, LightReading, IEEE 802.3, EU TED, Farnell/Mouser lead times, FierceTelecom — weekly via pg-boss - computeAbcClassification(): dynamic A/B/C classification from price obs + compat count + vendor breadth - computeReorderSignals(): buy_now/wait/hold/monitor with reasons + signal strength - API: GET /api/procurement/overview|signals|signals/:id|abc|market-intel| stock-trends/:id|lifecycle - Dashboard: Procurement Intel tab with Reorder Signals, ABC table, Market Intel cards, Lifecycle Events
158 lines
4.6 KiB
TypeScript
158 lines
4.6 KiB
TypeScript
/**
|
||
* Crawler LLM — Rule-based validator.
|
||
*
|
||
* Runs AFTER the LLM extraction to catch hallucinations and obvious errors.
|
||
* The LLM is good at structure; this catches range violations and nonsense.
|
||
*/
|
||
|
||
import type { StockExtractionResult } from "./stock-schema";
|
||
|
||
export interface ValidationResult {
|
||
passed: boolean;
|
||
warnings: string[];
|
||
errors: string[];
|
||
}
|
||
|
||
/** Expected price ranges per speed class (USD/EUR). Rough but effective. */
|
||
const PRICE_RANGES: Record<string, [number, number]> = {
|
||
"1G": [10, 500],
|
||
"10G": [20, 2000],
|
||
"25G": [30, 2000],
|
||
"40G": [50, 3000],
|
||
"100G": [80, 15000],
|
||
"200G": [200, 20000],
|
||
"400G": [200, 50000],
|
||
"800G": [500, 80000],
|
||
};
|
||
|
||
const VALID_FORM_FACTORS = new Set([
|
||
"SFP", "SFP+", "SFP28", "SFP56", "SFP-DD",
|
||
"QSFP", "QSFP+", "QSFP28", "QSFP56", "QSFP-DD", "QSFP112",
|
||
"OSFP", "OSFP-RHS",
|
||
"CFP", "CFP2", "CFP4", "CFP8",
|
||
"XFP", "X2", "XENPAK",
|
||
"DSFP", "CSFP",
|
||
]);
|
||
|
||
const VALID_CURRENCIES = new Set(["USD", "EUR", "GBP", "CNY"]);
|
||
|
||
export function validateStockExtraction(
|
||
result: StockExtractionResult,
|
||
speedGbps?: number
|
||
): ValidationResult {
|
||
const errors: string[] = [];
|
||
const warnings: string[] = [];
|
||
|
||
// Not a product page — caller should discard, not an error
|
||
if (!result.is_product_page) {
|
||
return { passed: false, errors: ["Not a product page"], warnings: [] };
|
||
}
|
||
|
||
// Confidence too low
|
||
if (result.confidence < 0.5) {
|
||
errors.push(`Confidence ${result.confidence} below threshold 0.5`);
|
||
}
|
||
|
||
// Price validation
|
||
if (result.price !== null) {
|
||
if (result.price <= 0) {
|
||
errors.push(`Price ${result.price} is not positive`);
|
||
}
|
||
if (result.price > 500_000) {
|
||
errors.push(`Price ${result.price} exceeds maximum sanity limit`);
|
||
}
|
||
if (!result.currency || !VALID_CURRENCIES.has(result.currency)) {
|
||
errors.push(`Invalid currency: ${result.currency}`);
|
||
}
|
||
|
||
// Speed-class price range check
|
||
if (speedGbps) {
|
||
const speedKey = `${speedGbps}G`;
|
||
const range = PRICE_RANGES[speedKey];
|
||
if (range && (result.price < range[0] * 0.1 || result.price > range[1] * 10)) {
|
||
warnings.push(`Price ${result.price} ${result.currency} looks unusual for ${speedKey} (expected ${range[0]}–${range[1]})`);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Stock quantity sanity
|
||
if (result.stock_quantity !== null) {
|
||
if (result.stock_quantity < 0) {
|
||
errors.push(`Stock quantity ${result.stock_quantity} is negative`);
|
||
}
|
||
if (result.stock_quantity > 100_000) {
|
||
warnings.push(`Stock quantity ${result.stock_quantity} unusually high — verify`);
|
||
}
|
||
}
|
||
|
||
// Lead time sanity
|
||
if (result.lead_time_days !== null) {
|
||
if (result.lead_time_days < 0) {
|
||
errors.push(`Lead time ${result.lead_time_days} is negative`);
|
||
}
|
||
if (result.lead_time_days > 730) {
|
||
warnings.push(`Lead time ${result.lead_time_days} days (>2 years) — verify`);
|
||
}
|
||
}
|
||
|
||
// MOQ sanity
|
||
if (result.moq !== null && result.moq < 1) {
|
||
errors.push(`MOQ ${result.moq} must be at least 1`);
|
||
}
|
||
|
||
// Form factor check
|
||
if (result.form_factor && !VALID_FORM_FACTORS.has(result.form_factor)) {
|
||
warnings.push(`Unknown form factor: ${result.form_factor}`);
|
||
}
|
||
|
||
// Price break consistency
|
||
if (result.price_breaks.length > 0) {
|
||
for (const pb of result.price_breaks) {
|
||
if (pb.qty < 1 || pb.price <= 0) {
|
||
errors.push(`Invalid price break: qty=${pb.qty} price=${pb.price}`);
|
||
}
|
||
if (result.price && pb.price > result.price * 2) {
|
||
warnings.push(`Price break ${pb.qty}x=${pb.price} higher than unit price — unusual`);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Incoming ETA must be a future-ish date
|
||
if (result.incoming_eta) {
|
||
const eta = new Date(result.incoming_eta);
|
||
if (isNaN(eta.getTime())) {
|
||
errors.push(`Invalid incoming_eta date: ${result.incoming_eta}`);
|
||
}
|
||
}
|
||
|
||
return {
|
||
passed: errors.length === 0,
|
||
errors,
|
||
warnings,
|
||
};
|
||
}
|
||
|
||
/** Cross-source comparison: do two extractions agree within tolerance? */
|
||
export function crossValidate(
|
||
a: StockExtractionResult,
|
||
b: StockExtractionResult,
|
||
priceTolerance = 0.10 // 10% price difference allowed
|
||
): boolean {
|
||
if (a.price === null || b.price === null) return false;
|
||
|
||
// Both in same currency
|
||
if (a.currency !== b.currency) return false;
|
||
|
||
// Price within tolerance
|
||
const diff = Math.abs(a.price - b.price) / Math.max(a.price, b.price);
|
||
if (diff > priceTolerance) return false;
|
||
|
||
// Part numbers match (if both present)
|
||
if (a.part_number && b.part_number) {
|
||
const normalize = (s: string) => s.replace(/[\s\-_]/g, "").toUpperCase();
|
||
if (normalize(a.part_number) !== normalize(b.part_number)) return false;
|
||
}
|
||
|
||
return true;
|
||
}
|