/** * Crawler LLM — Transceiver spec physical plausibility validator. * * Runs AFTER LLM extraction to catch technically impossible combinations * (e.g. 100G over SFP, 850nm on SMF, 80km over MMF). Complements * validator.ts which checks stock/price sanity. * * Returns a SpecValidationResult with: * - passed: false blocks DB write and lowers training data confidence tier * - warnings: still writes to DB but flags for human review * - confidence_delta: adjustment applied to the LLM confidence score */ // ───────────────────────────────────────────────────────────────────────────── // Type definitions // ───────────────────────────────────────────────────────────────────────────── export interface ExtractedSpec { part_number?: string | null; form_factor?: string | null; speed_gbps?: number | null; reach_meters?: number | null; fiber_type?: string | null; // "SMF" | "MMF" | "CU" | "DAC" | "AOC" connector?: string | null; wavelengths?: string | null; // e.g. "850nm" or "1310nm TX / 1490nm RX" ieee_standard?: string | null; // e.g. "100GBASE-SR4" dom_support?: boolean | null; } export interface SpecValidationResult { passed: boolean; errors: string[]; warnings: string[]; confidence_delta: number; // negative = reduce LLM confidence score tier: "high" | "medium" | "low" | "rejected"; } // ───────────────────────────────────────────────────────────────────────────── // Compatibility tables // ───────────────────────────────────────────────────────────────────────────── /** Max rated speed per form factor (Gbps). DAC/AOC = same form factor shell. */ const FORM_FACTOR_MAX_SPEED: Record = { "GBIC": 1, "SFP": 4.25, // 4G FC max; 1G Ethernet common "SFP+": 28.05, // nominally 10G but 16G FC / 25G variants exist "SFP28": 28.05, "SFP56": 56, "SFP-DD": 100, // dual-lane SFP "QSFP": 40, "QSFP+": 40, "QSFP28": 112, // 4×25G = 100G; some push 112G "QSFP56": 224, // 4×56G = 200G "QSFP-DD": 800, // 8×100G "QSFP112": 800, "OSFP": 800, "OSFP-RHS": 800, "CFP": 100, "CFP2": 400, "CFP4": 100, "CFP8": 400, "XFP": 10, "X2": 10, "XENPAK": 10, "DSFP": 100, "CSFP": 2.5, }; /** Min rated speed per form factor (Gbps). Catches wild mismatches. */ const FORM_FACTOR_MIN_SPEED: Record = { "GBIC": 0.1, "SFP": 0.1, "SFP+": 1, "SFP28": 10, "SFP56": 25, "SFP-DD": 50, "QSFP": 4, "QSFP+": 10, "QSFP28": 40, "QSFP56": 100, "QSFP-DD": 100, "QSFP112": 200, "OSFP": 200, "OSFP-RHS":200, "CFP": 10, "CFP2": 40, "CFP4": 10, "CFP8": 100, "XFP": 10, "X2": 10, "XENPAK": 10, "DSFP": 25, "CSFP": 0.1, }; /** * Wavelength → expected fiber type. * 850 nm is classically MMF; 1270–1610 nm is SMF. * Exceptions: some 1310nm SFP (1000BASE-LX) work on MMF with mode-conditioning. */ function expectedFiberForWavelength(nm: number): "MMF" | "SMF" | "either" { if (nm <= 900) return "MMF"; if (nm >= 1260) return "SMF"; return "either"; } /** Max practical reach per fiber type (meters). Soft sanity limit. */ const MAX_REACH: Record = { MMF: 4000, // OM5 push ~3.5km; 4km is outer limit for 100M FX SMF: 200_000, // 200km coherent ZR is real CU: 100, DAC: 30, AOC: 200, }; /** Known IEEE standards and their canonical speed (Gbps) + form factor hints */ const IEEE_STANDARDS: Record = { "100BASE-FX": { speedGbps: 0.1, fiberType: "MMF", reachKm: 2 }, "100BASE-LX10": { speedGbps: 0.1, fiberType: "SMF", reachKm: 10 }, "1000BASE-SX": { speedGbps: 1, fiberType: "MMF", reachKm: 0.55 }, "1000BASE-LX": { speedGbps: 1, fiberType: "SMF", reachKm: 10 }, "1000BASE-EX": { speedGbps: 1, fiberType: "SMF", reachKm: 40 }, "1000BASE-ZX": { speedGbps: 1, fiberType: "SMF", reachKm: 80 }, "1000BASE-T": { speedGbps: 1, fiberType: "CU" }, "10GBASE-SR": { speedGbps: 10, fiberType: "MMF", reachKm: 0.3 }, "10GBASE-LR": { speedGbps: 10, fiberType: "SMF", reachKm: 10 }, "10GBASE-ER": { speedGbps: 10, fiberType: "SMF", reachKm: 40 }, "10GBASE-ZR": { speedGbps: 10, fiberType: "SMF", reachKm: 80 }, "25GBASE-SR": { speedGbps: 25, fiberType: "MMF", reachKm: 0.1 }, "25GBASE-LR": { speedGbps: 25, fiberType: "SMF", reachKm: 10 }, "25GBASE-ER": { speedGbps: 25, fiberType: "SMF", reachKm: 40 }, "40GBASE-SR4": { speedGbps: 40, fiberType: "MMF", reachKm: 0.15 }, "40GBASE-LR4": { speedGbps: 40, fiberType: "SMF", reachKm: 10 }, "40GBASE-ER4": { speedGbps: 40, fiberType: "SMF", reachKm: 40 }, "100GBASE-SR4": { speedGbps: 100, fiberType: "MMF", reachKm: 0.1 }, "100GBASE-SR10": { speedGbps: 100, fiberType: "MMF", reachKm: 0.15 }, "100GBASE-LR4": { speedGbps: 100, fiberType: "SMF", reachKm: 10 }, "100GBASE-ER4": { speedGbps: 100, fiberType: "SMF", reachKm: 40 }, "100GBASE-ZR": { speedGbps: 100, fiberType: "SMF", reachKm: 80 }, "400GBASE-SR4": { speedGbps: 400, fiberType: "MMF", reachKm: 0.1 }, "400GBASE-SR8": { speedGbps: 400, fiberType: "MMF", reachKm: 0.1 }, "400GBASE-LR4": { speedGbps: 400, fiberType: "SMF", reachKm: 10 }, "400GBASE-LR8": { speedGbps: 400, fiberType: "SMF", reachKm: 10 }, "400GBASE-ER8": { speedGbps: 400, fiberType: "SMF", reachKm: 40 }, "400GBASE-ZR": { speedGbps: 400, fiberType: "SMF", reachKm: 80 }, "400ZR": { speedGbps: 400, fiberType: "SMF", reachKm: 120 }, "800GBASE-SR8": { speedGbps: 800, fiberType: "MMF", reachKm: 0.1 }, "800GBASE-LR4": { speedGbps: 800, fiberType: "SMF", reachKm: 2 }, }; // ───────────────────────────────────────────────────────────────────────────── // Helpers // ───────────────────────────────────────────────────────────────────────────── /** Parse first numeric wavelength from a string like "850nm" or "1310nm TX / 1490nm RX" */ function parsePrimaryWavelength(wl: string): number | null { const match = wl.match(/(\d{3,4})\s*nm/); return match ? parseInt(match[1], 10) : null; } function normalizeFormFactor(ff: string): string { return ff.trim().toUpperCase().replace(/\s+/g, ""); } function normalizeStandard(s: string): string { return s.trim().toUpperCase().replace(/\s+/g, "").replace("BASE-", "BASE-"); } // ───────────────────────────────────────────────────────────────────────────── // Main validator // ───────────────────────────────────────────────────────────────────────────── export function validateTransceiverSpec(spec: ExtractedSpec): SpecValidationResult { const errors: string[] = []; const warnings: string[] = []; let confidenceDelta = 0; const ff = spec.form_factor ? normalizeFormFactor(spec.form_factor) : null; const speedGbps = spec.speed_gbps ?? null; const fiberType = spec.fiber_type?.toUpperCase().trim() ?? null; const reachM = spec.reach_meters ?? null; const wavelengths = spec.wavelengths ?? null; // ── 1. Form factor ↔ speed compatibility ────────────────────────────────── if (ff && speedGbps !== null) { const maxSpeed = FORM_FACTOR_MAX_SPEED[ff]; const minSpeed = FORM_FACTOR_MIN_SPEED[ff]; if (maxSpeed !== undefined && speedGbps > maxSpeed * 1.15) { errors.push( `Speed ${speedGbps}G exceeds ${ff} maximum (${maxSpeed}G). Physically impossible.` ); confidenceDelta -= 0.4; } if (minSpeed !== undefined && speedGbps < minSpeed * 0.5) { warnings.push( `Speed ${speedGbps}G is unusually low for ${ff} (typical min ${minSpeed}G). Verify.` ); confidenceDelta -= 0.1; } } // ── 2. Wavelength ↔ fiber type consistency ──────────────────────────────── if (wavelengths && fiberType && fiberType !== "DAC" && fiberType !== "AOC" && fiberType !== "CU") { const primaryNm = parsePrimaryWavelength(wavelengths); if (primaryNm !== null) { const expectedFiber = expectedFiberForWavelength(primaryNm); if (expectedFiber === "MMF" && fiberType === "SMF") { errors.push( `${primaryNm}nm is a multi-mode wavelength but fiber_type is SMF. Check the source.` ); confidenceDelta -= 0.3; } if (expectedFiber === "SMF" && fiberType === "MMF") { // 1310nm LX on MMF with mode-conditioning cable is a real thing — warn, not error if (primaryNm >= 1260 && primaryNm <= 1360) { warnings.push( `${primaryNm}nm on MMF is unusual. Possible mode-conditioning cable — verify.` ); confidenceDelta -= 0.05; } else { errors.push( `${primaryNm}nm (SMF wavelength) cannot work on MMF fiber at this reach.` ); confidenceDelta -= 0.35; } } } } // ── 3. Reach ↔ fiber type sanity ───────────────────────────────────────── if (reachM !== null && fiberType && fiberType in MAX_REACH) { const maxReach = MAX_REACH[fiberType]; if (reachM > maxReach) { errors.push( `Reach ${reachM}m exceeds physical maximum for ${fiberType} (${maxReach}m). Data error.` ); confidenceDelta -= 0.4; } } if (reachM !== null && fiberType === "MMF" && reachM > 2000) { warnings.push( `MMF reach ${reachM}m is very high (rare). OM5 max ~3.5km, earlier OM4 max 400m at 10G+.` ); confidenceDelta -= 0.1; } // ── 4. IEEE standard cross-check ───────────────────────────────────────── if (spec.ieee_standard) { const stdKey = Object.keys(IEEE_STANDARDS).find( (k) => normalizeStandard(k) === normalizeStandard(spec.ieee_standard!) ); if (stdKey) { const stdDef = IEEE_STANDARDS[stdKey]; // Speed mismatch if (speedGbps !== null && Math.abs(speedGbps - stdDef.speedGbps) / stdDef.speedGbps > 0.15) { errors.push( `${spec.ieee_standard} requires ${stdDef.speedGbps}G but extracted speed is ${speedGbps}G.` ); confidenceDelta -= 0.35; } // Fiber type mismatch (soft — standard may have variants) if (fiberType && stdDef.fiberType && fiberType !== stdDef.fiberType) { warnings.push( `${spec.ieee_standard} expects ${stdDef.fiberType} but fiber_type is ${fiberType}.` ); confidenceDelta -= 0.1; } // Reach mismatch: more than 3× the defined reach is suspicious if (reachM !== null && stdDef.reachKm !== undefined) { const stdReachM = stdDef.reachKm * 1000; if (reachM > stdReachM * 3) { warnings.push( `Reach ${reachM}m is >3× the ${spec.ieee_standard} defined reach (${stdReachM}m). Verify — may be a proprietary extended reach variant.` ); confidenceDelta -= 0.05; } } } else { // Standard not in table — not an error, just warn for unknown standards warnings.push(`IEEE standard "${spec.ieee_standard}" not in reference table. Accepted as-is.`); } } // ── 5. DAC/AOC special rules ────────────────────────────────────────────── if (fiberType === "DAC" || fiberType === "AOC") { if (reachM !== null && reachM > 30 && fiberType === "DAC") { warnings.push(`DAC cables > 30m are unusual (passive DAC max ~7m). Verify if active DAC or AOC.`); confidenceDelta -= 0.1; } if (wavelengths) { warnings.push(`DAC/AOC have no wavelength. Extracted wavelength "${wavelengths}" may be wrong.`); confidenceDelta -= 0.05; } } // ── 6. Connector ↔ form factor ──────────────────────────────────────────── if (spec.connector && ff) { const connector = spec.connector.toUpperCase(); const mpoBased = ["QSFP", "QSFP+", "QSFP28", "QSFP56", "QSFP-DD", "OSFP", "CFP8"]; const scBased = ["GBIC", "CSFP"]; if (mpoBased.includes(ff) && connector === "SC") { warnings.push(`${ff} modules rarely use SC connectors. LC or MPO expected. Verify.`); confidenceDelta -= 0.1; } if (scBased.includes(ff) && connector === "LC") { // GBIC can use LC — soft warning only warnings.push(`${ff} with LC connector is unusual. SC more common for this form factor.`); confidenceDelta -= 0.05; } } // ── Tier assignment ─────────────────────────────────────────────────────── const passed = errors.length === 0; let tier: SpecValidationResult["tier"]; if (!passed) { tier = "rejected"; } else if (warnings.length === 0 && confidenceDelta >= 0) { tier = "high"; } else if (warnings.length <= 2 && confidenceDelta >= -0.15) { tier = "medium"; } else { tier = "low"; } return { passed, errors, warnings, confidence_delta: Math.max(confidenceDelta, -0.9), tier, }; } // ───────────────────────────────────────────────────────────────────────────── // Convenience: combine with stock validation result // ───────────────────────────────────────────────────────────────────────────── export interface CombinedValidationResult { passed: boolean; spec_errors: string[]; spec_warnings: string[]; tier: SpecValidationResult["tier"]; adjusted_confidence: number; } export function combineValidations( specResult: SpecValidationResult, baseLlmConfidence: number ): CombinedValidationResult { const adjusted = Math.min( 1.0, Math.max(0.0, baseLlmConfidence + specResult.confidence_delta) ); return { passed: specResult.passed, spec_errors: specResult.errors, spec_warnings: specResult.warnings, tier: specResult.tier, adjusted_confidence: adjusted, }; }