- spec-validator.ts: physical plausibility checks (form factor↔speed matrix, wavelength↔fiber consistency, IEEE standard cross-check, reach limits). Outputs tier (high/medium/low/rejected) + confidence_delta for LLM scores. - training-data-writer.ts: converts validated crawler extractions to SFT JSONL training pairs (spec_qa / crawl_reasoning / validation / discovery types). Auto-commits and pushes to Gitea tip-training-data repo in batches of 50. - vendor-discovery-crawler.ts: PlaywrightCrawler pipeline — catalog URL → LLM extraction (scrapeWithLLM) → spec validation → DB persist + Gitea SFT training pairs. 8 vendor configs registered (Cisco/Juniper/Arista/FS.com/Flexoptix/Nokia/Huawei/II-VI). - scheduler.ts: 8 weekly discover:vendor:* jobs added (Sun 20:00–Mon 10:00 UTC). Total registered jobs: 102. - Gitea repo created: gitea.context-x.org/rene/tip-training-data
374 lines
16 KiB
TypeScript
374 lines
16 KiB
TypeScript
/**
|
||
* Crawler LLM — Transceiver spec physical plausibility validator.
|
||
*
|
||
* Runs AFTER LLM extraction to catch technically impossible combinations
|
||
* (e.g. 100G over SFP, 850nm on SMF, 80km over MMF). Complements
|
||
* validator.ts which checks stock/price sanity.
|
||
*
|
||
* Returns a SpecValidationResult with:
|
||
* - passed: false blocks DB write and lowers training data confidence tier
|
||
* - warnings: still writes to DB but flags for human review
|
||
* - confidence_delta: adjustment applied to the LLM confidence score
|
||
*/
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Type definitions
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
export interface ExtractedSpec {
|
||
part_number?: string | null;
|
||
form_factor?: string | null;
|
||
speed_gbps?: number | null;
|
||
reach_meters?: number | null;
|
||
fiber_type?: string | null; // "SMF" | "MMF" | "CU" | "DAC" | "AOC"
|
||
connector?: string | null;
|
||
wavelengths?: string | null; // e.g. "850nm" or "1310nm TX / 1490nm RX"
|
||
ieee_standard?: string | null; // e.g. "100GBASE-SR4"
|
||
dom_support?: boolean | null;
|
||
}
|
||
|
||
export interface SpecValidationResult {
|
||
passed: boolean;
|
||
errors: string[];
|
||
warnings: string[];
|
||
confidence_delta: number; // negative = reduce LLM confidence score
|
||
tier: "high" | "medium" | "low" | "rejected";
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Compatibility tables
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/** Max rated speed per form factor (Gbps). DAC/AOC = same form factor shell. */
|
||
const FORM_FACTOR_MAX_SPEED: Record<string, number> = {
|
||
"GBIC": 1,
|
||
"SFP": 4.25, // 4G FC max; 1G Ethernet common
|
||
"SFP+": 28.05, // nominally 10G but 16G FC / 25G variants exist
|
||
"SFP28": 28.05,
|
||
"SFP56": 56,
|
||
"SFP-DD": 100, // dual-lane SFP
|
||
"QSFP": 40,
|
||
"QSFP+": 40,
|
||
"QSFP28": 112, // 4×25G = 100G; some push 112G
|
||
"QSFP56": 224, // 4×56G = 200G
|
||
"QSFP-DD": 800, // 8×100G
|
||
"QSFP112": 800,
|
||
"OSFP": 800,
|
||
"OSFP-RHS": 800,
|
||
"CFP": 100,
|
||
"CFP2": 400,
|
||
"CFP4": 100,
|
||
"CFP8": 400,
|
||
"XFP": 10,
|
||
"X2": 10,
|
||
"XENPAK": 10,
|
||
"DSFP": 100,
|
||
"CSFP": 2.5,
|
||
};
|
||
|
||
/** Min rated speed per form factor (Gbps). Catches wild mismatches. */
|
||
const FORM_FACTOR_MIN_SPEED: Record<string, number> = {
|
||
"GBIC": 0.1,
|
||
"SFP": 0.1,
|
||
"SFP+": 1,
|
||
"SFP28": 10,
|
||
"SFP56": 25,
|
||
"SFP-DD": 50,
|
||
"QSFP": 4,
|
||
"QSFP+": 10,
|
||
"QSFP28": 40,
|
||
"QSFP56": 100,
|
||
"QSFP-DD": 100,
|
||
"QSFP112": 200,
|
||
"OSFP": 200,
|
||
"OSFP-RHS":200,
|
||
"CFP": 10,
|
||
"CFP2": 40,
|
||
"CFP4": 10,
|
||
"CFP8": 100,
|
||
"XFP": 10,
|
||
"X2": 10,
|
||
"XENPAK": 10,
|
||
"DSFP": 25,
|
||
"CSFP": 0.1,
|
||
};
|
||
|
||
/**
|
||
* Wavelength → expected fiber type.
|
||
* 850 nm is classically MMF; 1270–1610 nm is SMF.
|
||
* Exceptions: some 1310nm SFP (1000BASE-LX) work on MMF with mode-conditioning.
|
||
*/
|
||
function expectedFiberForWavelength(nm: number): "MMF" | "SMF" | "either" {
|
||
if (nm <= 900) return "MMF";
|
||
if (nm >= 1260) return "SMF";
|
||
return "either";
|
||
}
|
||
|
||
/** Max practical reach per fiber type (meters). Soft sanity limit. */
|
||
const MAX_REACH: Record<string, number> = {
|
||
MMF: 4000, // OM5 push ~3.5km; 4km is outer limit for 100M FX
|
||
SMF: 200_000, // 200km coherent ZR is real
|
||
CU: 100,
|
||
DAC: 30,
|
||
AOC: 200,
|
||
};
|
||
|
||
/** Known IEEE standards and their canonical speed (Gbps) + form factor hints */
|
||
const IEEE_STANDARDS: Record<string, { speedGbps: number; fiberType?: string; reachKm?: number }> = {
|
||
"100BASE-FX": { speedGbps: 0.1, fiberType: "MMF", reachKm: 2 },
|
||
"100BASE-LX10": { speedGbps: 0.1, fiberType: "SMF", reachKm: 10 },
|
||
"1000BASE-SX": { speedGbps: 1, fiberType: "MMF", reachKm: 0.55 },
|
||
"1000BASE-LX": { speedGbps: 1, fiberType: "SMF", reachKm: 10 },
|
||
"1000BASE-EX": { speedGbps: 1, fiberType: "SMF", reachKm: 40 },
|
||
"1000BASE-ZX": { speedGbps: 1, fiberType: "SMF", reachKm: 80 },
|
||
"1000BASE-T": { speedGbps: 1, fiberType: "CU" },
|
||
"10GBASE-SR": { speedGbps: 10, fiberType: "MMF", reachKm: 0.3 },
|
||
"10GBASE-LR": { speedGbps: 10, fiberType: "SMF", reachKm: 10 },
|
||
"10GBASE-ER": { speedGbps: 10, fiberType: "SMF", reachKm: 40 },
|
||
"10GBASE-ZR": { speedGbps: 10, fiberType: "SMF", reachKm: 80 },
|
||
"25GBASE-SR": { speedGbps: 25, fiberType: "MMF", reachKm: 0.1 },
|
||
"25GBASE-LR": { speedGbps: 25, fiberType: "SMF", reachKm: 10 },
|
||
"25GBASE-ER": { speedGbps: 25, fiberType: "SMF", reachKm: 40 },
|
||
"40GBASE-SR4": { speedGbps: 40, fiberType: "MMF", reachKm: 0.15 },
|
||
"40GBASE-LR4": { speedGbps: 40, fiberType: "SMF", reachKm: 10 },
|
||
"40GBASE-ER4": { speedGbps: 40, fiberType: "SMF", reachKm: 40 },
|
||
"100GBASE-SR4": { speedGbps: 100, fiberType: "MMF", reachKm: 0.1 },
|
||
"100GBASE-SR10": { speedGbps: 100, fiberType: "MMF", reachKm: 0.15 },
|
||
"100GBASE-LR4": { speedGbps: 100, fiberType: "SMF", reachKm: 10 },
|
||
"100GBASE-ER4": { speedGbps: 100, fiberType: "SMF", reachKm: 40 },
|
||
"100GBASE-ZR": { speedGbps: 100, fiberType: "SMF", reachKm: 80 },
|
||
"400GBASE-SR4": { speedGbps: 400, fiberType: "MMF", reachKm: 0.1 },
|
||
"400GBASE-SR8": { speedGbps: 400, fiberType: "MMF", reachKm: 0.1 },
|
||
"400GBASE-LR4": { speedGbps: 400, fiberType: "SMF", reachKm: 10 },
|
||
"400GBASE-LR8": { speedGbps: 400, fiberType: "SMF", reachKm: 10 },
|
||
"400GBASE-ER8": { speedGbps: 400, fiberType: "SMF", reachKm: 40 },
|
||
"400GBASE-ZR": { speedGbps: 400, fiberType: "SMF", reachKm: 80 },
|
||
"400ZR": { speedGbps: 400, fiberType: "SMF", reachKm: 120 },
|
||
"800GBASE-SR8": { speedGbps: 800, fiberType: "MMF", reachKm: 0.1 },
|
||
"800GBASE-LR4": { speedGbps: 800, fiberType: "SMF", reachKm: 2 },
|
||
};
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Helpers
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/** Parse first numeric wavelength from a string like "850nm" or "1310nm TX / 1490nm RX" */
|
||
function parsePrimaryWavelength(wl: string): number | null {
|
||
const match = wl.match(/(\d{3,4})\s*nm/);
|
||
return match ? parseInt(match[1], 10) : null;
|
||
}
|
||
|
||
function normalizeFormFactor(ff: string): string {
|
||
return ff.trim().toUpperCase().replace(/\s+/g, "");
|
||
}
|
||
|
||
function normalizeStandard(s: string): string {
|
||
return s.trim().toUpperCase().replace(/\s+/g, "").replace("BASE-", "BASE-");
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Main validator
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
export function validateTransceiverSpec(spec: ExtractedSpec): SpecValidationResult {
|
||
const errors: string[] = [];
|
||
const warnings: string[] = [];
|
||
let confidenceDelta = 0;
|
||
|
||
const ff = spec.form_factor ? normalizeFormFactor(spec.form_factor) : null;
|
||
const speedGbps = spec.speed_gbps ?? null;
|
||
const fiberType = spec.fiber_type?.toUpperCase().trim() ?? null;
|
||
const reachM = spec.reach_meters ?? null;
|
||
const wavelengths = spec.wavelengths ?? null;
|
||
|
||
// ── 1. Form factor ↔ speed compatibility ──────────────────────────────────
|
||
if (ff && speedGbps !== null) {
|
||
const maxSpeed = FORM_FACTOR_MAX_SPEED[ff];
|
||
const minSpeed = FORM_FACTOR_MIN_SPEED[ff];
|
||
|
||
if (maxSpeed !== undefined && speedGbps > maxSpeed * 1.15) {
|
||
errors.push(
|
||
`Speed ${speedGbps}G exceeds ${ff} maximum (${maxSpeed}G). Physically impossible.`
|
||
);
|
||
confidenceDelta -= 0.4;
|
||
}
|
||
|
||
if (minSpeed !== undefined && speedGbps < minSpeed * 0.5) {
|
||
warnings.push(
|
||
`Speed ${speedGbps}G is unusually low for ${ff} (typical min ${minSpeed}G). Verify.`
|
||
);
|
||
confidenceDelta -= 0.1;
|
||
}
|
||
}
|
||
|
||
// ── 2. Wavelength ↔ fiber type consistency ────────────────────────────────
|
||
if (wavelengths && fiberType && fiberType !== "DAC" && fiberType !== "AOC" && fiberType !== "CU") {
|
||
const primaryNm = parsePrimaryWavelength(wavelengths);
|
||
if (primaryNm !== null) {
|
||
const expectedFiber = expectedFiberForWavelength(primaryNm);
|
||
|
||
if (expectedFiber === "MMF" && fiberType === "SMF") {
|
||
errors.push(
|
||
`${primaryNm}nm is a multi-mode wavelength but fiber_type is SMF. Check the source.`
|
||
);
|
||
confidenceDelta -= 0.3;
|
||
}
|
||
|
||
if (expectedFiber === "SMF" && fiberType === "MMF") {
|
||
// 1310nm LX on MMF with mode-conditioning cable is a real thing — warn, not error
|
||
if (primaryNm >= 1260 && primaryNm <= 1360) {
|
||
warnings.push(
|
||
`${primaryNm}nm on MMF is unusual. Possible mode-conditioning cable — verify.`
|
||
);
|
||
confidenceDelta -= 0.05;
|
||
} else {
|
||
errors.push(
|
||
`${primaryNm}nm (SMF wavelength) cannot work on MMF fiber at this reach.`
|
||
);
|
||
confidenceDelta -= 0.35;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// ── 3. Reach ↔ fiber type sanity ─────────────────────────────────────────
|
||
if (reachM !== null && fiberType && fiberType in MAX_REACH) {
|
||
const maxReach = MAX_REACH[fiberType];
|
||
if (reachM > maxReach) {
|
||
errors.push(
|
||
`Reach ${reachM}m exceeds physical maximum for ${fiberType} (${maxReach}m). Data error.`
|
||
);
|
||
confidenceDelta -= 0.4;
|
||
}
|
||
}
|
||
|
||
if (reachM !== null && fiberType === "MMF" && reachM > 2000) {
|
||
warnings.push(
|
||
`MMF reach ${reachM}m is very high (rare). OM5 max ~3.5km, earlier OM4 max 400m at 10G+.`
|
||
);
|
||
confidenceDelta -= 0.1;
|
||
}
|
||
|
||
// ── 4. IEEE standard cross-check ─────────────────────────────────────────
|
||
if (spec.ieee_standard) {
|
||
const stdKey = Object.keys(IEEE_STANDARDS).find(
|
||
(k) => normalizeStandard(k) === normalizeStandard(spec.ieee_standard!)
|
||
);
|
||
|
||
if (stdKey) {
|
||
const stdDef = IEEE_STANDARDS[stdKey];
|
||
|
||
// Speed mismatch
|
||
if (speedGbps !== null && Math.abs(speedGbps - stdDef.speedGbps) / stdDef.speedGbps > 0.15) {
|
||
errors.push(
|
||
`${spec.ieee_standard} requires ${stdDef.speedGbps}G but extracted speed is ${speedGbps}G.`
|
||
);
|
||
confidenceDelta -= 0.35;
|
||
}
|
||
|
||
// Fiber type mismatch (soft — standard may have variants)
|
||
if (fiberType && stdDef.fiberType && fiberType !== stdDef.fiberType) {
|
||
warnings.push(
|
||
`${spec.ieee_standard} expects ${stdDef.fiberType} but fiber_type is ${fiberType}.`
|
||
);
|
||
confidenceDelta -= 0.1;
|
||
}
|
||
|
||
// Reach mismatch: more than 3× the defined reach is suspicious
|
||
if (reachM !== null && stdDef.reachKm !== undefined) {
|
||
const stdReachM = stdDef.reachKm * 1000;
|
||
if (reachM > stdReachM * 3) {
|
||
warnings.push(
|
||
`Reach ${reachM}m is >3× the ${spec.ieee_standard} defined reach (${stdReachM}m). Verify — may be a proprietary extended reach variant.`
|
||
);
|
||
confidenceDelta -= 0.05;
|
||
}
|
||
}
|
||
} else {
|
||
// Standard not in table — not an error, just warn for unknown standards
|
||
warnings.push(`IEEE standard "${spec.ieee_standard}" not in reference table. Accepted as-is.`);
|
||
}
|
||
}
|
||
|
||
// ── 5. DAC/AOC special rules ──────────────────────────────────────────────
|
||
if (fiberType === "DAC" || fiberType === "AOC") {
|
||
if (reachM !== null && reachM > 30 && fiberType === "DAC") {
|
||
warnings.push(`DAC cables > 30m are unusual (passive DAC max ~7m). Verify if active DAC or AOC.`);
|
||
confidenceDelta -= 0.1;
|
||
}
|
||
if (wavelengths) {
|
||
warnings.push(`DAC/AOC have no wavelength. Extracted wavelength "${wavelengths}" may be wrong.`);
|
||
confidenceDelta -= 0.05;
|
||
}
|
||
}
|
||
|
||
// ── 6. Connector ↔ form factor ────────────────────────────────────────────
|
||
if (spec.connector && ff) {
|
||
const connector = spec.connector.toUpperCase();
|
||
const mpoBased = ["QSFP", "QSFP+", "QSFP28", "QSFP56", "QSFP-DD", "OSFP", "CFP8"];
|
||
const scBased = ["GBIC", "CSFP"];
|
||
|
||
if (mpoBased.includes(ff) && connector === "SC") {
|
||
warnings.push(`${ff} modules rarely use SC connectors. LC or MPO expected. Verify.`);
|
||
confidenceDelta -= 0.1;
|
||
}
|
||
if (scBased.includes(ff) && connector === "LC") {
|
||
// GBIC can use LC — soft warning only
|
||
warnings.push(`${ff} with LC connector is unusual. SC more common for this form factor.`);
|
||
confidenceDelta -= 0.05;
|
||
}
|
||
}
|
||
|
||
// ── Tier assignment ───────────────────────────────────────────────────────
|
||
const passed = errors.length === 0;
|
||
let tier: SpecValidationResult["tier"];
|
||
|
||
if (!passed) {
|
||
tier = "rejected";
|
||
} else if (warnings.length === 0 && confidenceDelta >= 0) {
|
||
tier = "high";
|
||
} else if (warnings.length <= 2 && confidenceDelta >= -0.15) {
|
||
tier = "medium";
|
||
} else {
|
||
tier = "low";
|
||
}
|
||
|
||
return {
|
||
passed,
|
||
errors,
|
||
warnings,
|
||
confidence_delta: Math.max(confidenceDelta, -0.9),
|
||
tier,
|
||
};
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Convenience: combine with stock validation result
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
export interface CombinedValidationResult {
|
||
passed: boolean;
|
||
spec_errors: string[];
|
||
spec_warnings: string[];
|
||
tier: SpecValidationResult["tier"];
|
||
adjusted_confidence: number;
|
||
}
|
||
|
||
export function combineValidations(
|
||
specResult: SpecValidationResult,
|
||
baseLlmConfidence: number
|
||
): CombinedValidationResult {
|
||
const adjusted = Math.min(
|
||
1.0,
|
||
Math.max(0.0, baseLlmConfidence + specResult.confidence_delta)
|
||
);
|
||
|
||
return {
|
||
passed: specResult.passed,
|
||
spec_errors: specResult.errors,
|
||
spec_warnings: specResult.warnings,
|
||
tier: specResult.tier,
|
||
adjusted_confidence: adjusted,
|
||
};
|
||
}
|