From d1bde66e39619720f445f606f073039a1ea670cc Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Wed, 13 May 2026 17:59:08 +0200 Subject: [PATCH] feat: deterministic equivalence matcher + full wavelength/connector enrichment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace confidence-based matcher with deterministic 6-field exact match: - form_factor (exact), speed_gbps (±0.1G), fiber_type (exact), reach (±10%), wavelength_tx (±5nm), connector_type (exact) - Complete products → confidence=1.0, never creates pending records - Incomplete products → enhanced confidence ≥0.85, still auto_approved - PENDING CREATED: 0 (by design, permanent) Migrations: - sql/113: Connector type inference from IEEE lookup + form-factor rules (970→479 missing connector for FX products) - sql/114: Extend IEEE lookup with 400G/800G/1.6T OSFP/QSFP-DD standards, wavelength fallback (SMF→1310nm, MMF→850nm), clear pending queue to 0 Enrichment results (before→after): - FX fully complete: 50 → 555 / 1,089 (+505) - Total fully complete: ~3,600 → 15,431 / 18,133 (+11,800) - FX coverage: 54.7% → 55.8% (608/1,089 matched) - Deterministic matches: 0 → 44,596 (confidence=1.0) - Wavelength-mismatched records rejected: 521 - Pending queue: 42 → 0 (permanent) New match stats: - 55,743 new deterministic auto_approved matches - 521 legacy wavelength-mismatch records rejected - Total active: 53,447 auto_approved + 1,987 approved --- packages/scraper/src/scheduler.ts | 328 +++++++++++------- sql/113-infer-connector-type.sql | 172 +++++++++ ...4-extend-ieee-lookup-and-clear-pending.sql | 214 ++++++++++++ 3 files changed, 587 insertions(+), 127 deletions(-) create mode 100644 sql/113-infer-connector-type.sql create mode 100644 sql/114-extend-ieee-lookup-and-clear-pending.sql diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 8bf7f4e..99d0031 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -2745,195 +2745,269 @@ export async function registerWorkers(boss: PgBoss): Promise { await boss.work("maintenance:find-equivalences", async () => { const { pool } = await import("./utils/db"); const ts = new Date().toISOString(); - console.log(`[${ts}] Running: Equivalence matching`); + console.log(`[${ts}] Running: Deterministic Equivalence Matching`); - // Find Flexoptix transceivers whose competitor research is still open. - // Terminal product-level states are not manual-review work and must not - // recreate stale pending equivalence candidates. + // ── Load Flexoptix transceivers (all, including already-verified) ─────────── + // Re-process all FX products so deterministic matches at 1.0 confidence can + // replace any old confidence-based auto_approved records. const flexResult = await pool.query(` SELECT t.id, t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters, t.wavelengths, - t.connector, t.wdm_type, t.coherent + t.wavelength_tx_nm, t.wavelength_rx_nm, t.connector_type, + t.data_completeness, t.enrichment_needed, + t.wdm_type, t.coherent FROM transceivers t JOIN vendors v ON v.id = t.vendor_id WHERE UPPER(v.name) LIKE '%FLEXOPTIX%' - AND t.competitor_verified = false - AND COALESCE(t.competitor_status, 'needs_research') IN ('unknown', 'needs_research') + AND t.form_factor IS NOT NULL + AND t.speed_gbps IS NOT NULL + ORDER BY t.data_completeness DESC, t.part_number `); - let autoApproved = 0; - let queued = 0; - let skipped = 0; + let autoApprovedDeterministic = 0; // 6-field exact match (confidence = 1.0) + let autoApprovedEnhanced = 0; // enhanced confidence ≥ 0.85 (incomplete data) + let skippedIncomplete = 0; // both products have complete data but no field match + let skippedLowConf = 0; // incomplete products below 0.85 threshold + // NOTE: pending status is NEVER created — system creates auto_approved or skips for (const fx of flexResult.rows) { - let fxMatched = false; - let fxQueued = false; - // Find competitor transceivers with recent price observations and matching specs + if (!fx.form_factor || !fx.speed_gbps) continue; + + // ── Load competitor candidates (same form_factor + speed_gbps) ────────── const candidates = await pool.query(` SELECT t.id AS competitor_id, t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters, - t.wavelengths, t.connector, v.name AS vendor_name, + t.wavelengths, t.wavelength_tx_nm, t.wavelength_rx_nm, + t.connector_type, t.data_completeness, + v.name AS vendor_name, MAX(po.time) AS last_price, COUNT(*) AS price_count FROM transceivers t JOIN vendors v ON v.id = t.vendor_id JOIN price_observations po ON po.transceiver_id = t.id WHERE UPPER(v.name) NOT LIKE '%FLEXOPTIX%' + AND v.is_competitor = true AND po.time > NOW() - INTERVAL '90 days' AND UPPER(t.form_factor) = UPPER($1) AND ROUND(t.speed_gbps::NUMERIC, 2) = ROUND($2::NUMERIC, 2) AND t.id != $3 GROUP BY t.id, t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters, - t.wavelengths, t.connector, v.name + t.wavelengths, t.wavelength_tx_nm, t.wavelength_rx_nm, + t.connector_type, t.data_completeness, v.name + HAVING COUNT(*) >= 1 `, [fx.form_factor, fx.speed_gbps, fx.id]); + let fxMatched = false; + for (const cand of candidates.rows) { - // Confidence scoring - // Max points: form_factor(25) + speed_gbps(20) + standard_name(30) + - // wavelength_nm(20) + fiber_type(10) + reach(10) = 115 - let score = 0; - const basis: string[] = []; + const fxComplete = + fx.form_factor && fx.speed_gbps && fx.fiber_type && + fx.reach_meters && fx.wavelength_tx_nm && fx.connector_type; + const candComplete = + cand.form_factor && cand.speed_gbps && cand.fiber_type && + cand.reach_meters && cand.wavelength_tx_nm && cand.connector_type; - // form_factor already matched (pre-filter), award points - score += 25; basis.push("form_factor"); + let confidence = 0; + let basis: string[] = []; + let matchMode: "deterministic" | "enhanced" | "skip" = "skip"; - // speed_gbps already matched (pre-filter) - score += 20; basis.push("speed_gbps"); + if (fxComplete && candComplete) { + // ── Mode 1: Deterministic 6-field exact match ─────────────────────── + // All mandatory fields present → hard pass/fail, no soft scoring. + // A single field mismatch → skip (confidence stays 0). - // standard_name match (strong signal — e.g. "10GBASE-LR") - if (fx.standard_name && cand.standard_name && - fx.standard_name.trim().toUpperCase() === cand.standard_name.trim().toUpperCase()) { - score += 30; basis.push("standard_name"); - } - - // wavelength match — extract first numeric nm value and compare within ±15nm - // "wavelengths" is text: "1310 nm", "850nm", "1270/1290/1310/1330 nm" etc. - const extractNm = (w: string | null): number | null => { - if (!w) return null; - const m = w.match(/(\d{3,4})/); - return m ? parseInt(m[1], 10) : null; - }; - const fxNm = extractNm(fx.wavelengths); - const candNm = extractNm(cand.wavelengths); - if (fxNm !== null && candNm !== null) { - if (Math.abs(fxNm - candNm) <= 15) { - score += 20; basis.push(`wavelength_${fxNm}nm`); - } else { - score -= 20; // hard penalize wrong wavelength (1310 vs 1550 = completely different product) + // form_factor: exact + if (fx.form_factor.trim().toUpperCase() !== cand.form_factor.trim().toUpperCase()) { + skippedIncomplete++; continue; } - } - - // fiber_type match (SMF vs MMF — critical) - if (fx.fiber_type && cand.fiber_type) { - if (fx.fiber_type.trim().toUpperCase() === cand.fiber_type.trim().toUpperCase()) { - score += 10; basis.push("fiber_type"); - } else { - score -= 15; // SMF vs MMF = wrong product + // speed: ±0.1 Gbps + if (Math.abs(Number(fx.speed_gbps) - Number(cand.speed_gbps)) >= 0.1) { + skippedIncomplete++; continue; } - } - - // reach within ±25% - if (fx.reach_meters && cand.reach_meters && fx.reach_meters > 0 && cand.reach_meters > 0) { - const diff = Math.abs(fx.reach_meters - cand.reach_meters); - const tolerance = Math.max(fx.reach_meters, 1) * 0.25; - if (diff <= tolerance) { - score += 10; basis.push("reach"); - } else { - score -= 15; // penalize mismatched reach + // fiber_type: exact (SMF ≠ MMF ≠ DAC) + if (fx.fiber_type.trim().toUpperCase() !== cand.fiber_type.trim().toUpperCase()) { + skippedIncomplete++; continue; } - } else if (!fx.reach_meters && !cand.reach_meters) { - score += 5; basis.push("reach_null"); + // reach: ±10% tolerance (manufacturer variance within spec) + const reachRatio = Math.abs( + Number(fx.reach_meters) - Number(cand.reach_meters) + ) / Math.max(Number(fx.reach_meters), 1); + if (reachRatio > 0.10) { skippedIncomplete++; continue; } + // wavelength TX: ±5nm (ITU-T G.694.2 channel tolerance) + const wlTxDiff = Math.abs( + (Number(fx.wavelength_tx_nm) || 0) - (Number(cand.wavelength_tx_nm) || 0) + ); + if (wlTxDiff > 5) { skippedIncomplete++; continue; } + // BiDi RX wavelength (only if either side has RX set) + if (fx.wavelength_rx_nm != null || cand.wavelength_rx_nm != null) { + const wlRxDiff = Math.abs( + (Number(fx.wavelength_rx_nm) || 0) - (Number(cand.wavelength_rx_nm) || 0) + ); + if (wlRxDiff > 5) { skippedIncomplete++; continue; } + } + // connector: exact (LC ≠ SC ≠ MPO-12 ≠ MPO-16) + if (fx.connector_type.trim().toUpperCase() !== cand.connector_type.trim().toUpperCase()) { + skippedIncomplete++; continue; + } + + // All 6 fields matched → 100% deterministic match + confidence = 1.0; + basis = ["form_factor", "speed_gbps", "fiber_type", "reach", "wavelength_tx", "connector"]; + matchMode = "deterministic"; + + } else { + // ── Mode 2: Enhanced confidence for incomplete products ────────────── + // Only used when at least one product has missing fields. + // Raised threshold (0.85) and never produces pending status. + let score = 0; + const basisLocal: string[] = []; + + score += 25; basisLocal.push("form_factor"); // pre-filtered + score += 20; basisLocal.push("speed_gbps"); // pre-filtered + + // standard_name (strong signal) + if (fx.standard_name && cand.standard_name && + fx.standard_name.trim().toUpperCase() === cand.standard_name.trim().toUpperCase()) { + score += 30; basisLocal.push("standard_name"); + } + + // wavelength — use integer columns first, fall back to text + const fxWlTx = fx.wavelength_tx_nm + ?? (() => { const m = (fx.wavelengths || "").match(/(\d{3,4})/); return m ? parseInt(m[1], 10) : null; })(); + const cWlTx = cand.wavelength_tx_nm + ?? (() => { const m = (cand.wavelengths || "").match(/(\d{3,4})/); return m ? parseInt(m[1], 10) : null; })(); + if (fxWlTx !== null && cWlTx !== null) { + if (Math.abs(fxWlTx - cWlTx) <= 15) { + score += 20; basisLocal.push(`wavelength_${fxWlTx}nm`); + } else { + score -= 20; + } + } + + // fiber_type + if (fx.fiber_type && cand.fiber_type) { + if (fx.fiber_type.trim().toUpperCase() === cand.fiber_type.trim().toUpperCase()) { + score += 10; basisLocal.push("fiber_type"); + } else { + score -= 15; + } + } + + // reach: ±25% for incomplete data (more lenient) + if (fx.reach_meters && cand.reach_meters && + Number(fx.reach_meters) > 0 && Number(cand.reach_meters) > 0) { + const diff = Math.abs(Number(fx.reach_meters) - Number(cand.reach_meters)); + const tolerance = Math.max(Number(fx.reach_meters), 1) * 0.25; + if (diff <= tolerance) { + score += 10; basisLocal.push("reach"); + } else { + score -= 15; + } + } else if (!fx.reach_meters && !cand.reach_meters) { + score += 5; basisLocal.push("reach_null"); + } + + confidence = Math.max(0, Math.min(1, score / 115)); + basis = basisLocal; + + // Raised threshold for incomplete data: 0.85 (was 0.73) + // Below threshold → skip, NEVER pending + if (confidence < 0.85) { + skippedLowConf++; + continue; + } + matchMode = "enhanced"; } - const confidence = Math.max(0, Math.min(1, score / 115)); + // ── Both modes: upsert as auto_approved ───────────────────────────── + const notes = + `${fx.part_number} ↔ ${cand.part_number} (${cand.vendor_name}) | ` + + `mode: ${matchMode} | basis: ${basis.join(", ")} | ` + + `reach: ${fx.reach_meters}m vs ${cand.reach_meters}m | ` + + `wl_tx: ${fx.wavelength_tx_nm ?? fx.wavelengths ?? "?"}nm vs ` + + `${cand.wavelength_tx_nm ?? cand.wavelengths ?? "?"}nm`; - if (confidence < 0.50) { skipped++; continue; } - - const notes = `${fx.part_number} ↔ ${cand.part_number} (${cand.vendor_name}) | ` + - `basis: ${basis.join(", ")} | reach: ${fx.reach_meters}m vs ${cand.reach_meters}m | ` + - `wavelength: ${fx.wavelengths||"?"} vs ${cand.wavelengths||"?"}`; - - // Upsert equivalence candidate - const status = confidence >= 0.73 ? "auto_approved" : "pending"; + // Deterministic matches (1.0) upgrade existing auto_approved records. + // Enhanced matches (0.85+) do NOT overwrite existing auto_approved. + const conflictClause = matchMode === "deterministic" + ? `WHERE transceiver_equivalences.status NOT IN ('approved', 'rejected')` + : `WHERE transceiver_equivalences.status NOT IN ('approved', 'rejected', 'auto_approved')`; await pool.query(` INSERT INTO transceiver_equivalences (flexoptix_id, competitor_id, confidence, match_basis, match_notes, status) - VALUES ($1, $2, $3, $4, $5, $6) + VALUES ($1, $2, $3, $4, $5, 'auto_approved') ON CONFLICT (flexoptix_id, competitor_id) DO UPDATE SET confidence = EXCLUDED.confidence, match_basis = EXCLUDED.match_basis, match_notes = EXCLUDED.match_notes, updated_at = NOW() - WHERE transceiver_equivalences.status NOT IN ('approved', 'rejected') - `, [fx.id, cand.competitor_id, confidence, basis, notes, status]); + ${conflictClause} + `, [fx.id, cand.competitor_id, confidence, basis, notes]); - if (confidence >= 0.73) { - // Auto-approve: set competitor_verified on the Flexoptix transceiver - await pool.query(` - UPDATE transceivers - SET competitor_verified = true, - competitor_verified_at = NOW(), - competitor_status = 'matched', - competitor_status_updated_at = NOW() - WHERE id = $1 AND competitor_verified = false - `, [fx.id]); - await pool.query(` - INSERT INTO transceiver_verification_evidence ( - transceiver_id, verification_type, source_url, source_vendor_id, - evidence_value, evidence_hash, robot_name, confidence - ) - VALUES ( - $1, 'competitor_match', NULL, NULL, - $2::jsonb, - md5($2::text), - 'maintenance:find-equivalences', - $3 - ) - ON CONFLICT DO NOTHING - `, [ - fx.id, - JSON.stringify({ - competitor_id: cand.competitor_id, - competitor_part_number: cand.part_number, - competitor_vendor: cand.vendor_name, - match_basis: basis, - notes, - }), - confidence, - ]); - autoApproved++; - fxMatched = true; - } else { - queued++; - fxQueued = true; - } - } - - if (!fxMatched && fxQueued) { + // Set competitor_verified on FX product await pool.query(` UPDATE transceivers - SET competitor_status = 'ambiguous', + SET competitor_verified = true, + competitor_verified_at = NOW(), + competitor_status = 'matched', competitor_status_updated_at = NOW() - WHERE id = $1 - AND competitor_verified = false - AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match') + WHERE id = $1 AND competitor_verified = false `, [fx.id]); - } else if (!fxMatched && !fxQueued) { + + await pool.query(` + INSERT INTO transceiver_verification_evidence ( + transceiver_id, verification_type, source_url, source_vendor_id, + evidence_value, evidence_hash, robot_name, confidence + ) + VALUES ( + $1, 'competitor_match', NULL, NULL, + $2::jsonb, + md5($2::text), + 'maintenance:find-equivalences', + $3 + ) + ON CONFLICT DO NOTHING + `, [ + fx.id, + JSON.stringify({ + competitor_id: cand.competitor_id, + competitor_part_number: cand.part_number, + competitor_vendor: cand.vendor_name, + match_basis: basis, + match_mode: matchMode, + notes, + }), + confidence, + ]); + + if (matchMode === "deterministic") { + autoApprovedDeterministic++; + } else { + autoApprovedEnhanced++; + } + fxMatched = true; + } + + if (!fxMatched) { await pool.query(` UPDATE transceivers SET competitor_status = 'needs_research', competitor_status_updated_at = NOW() WHERE id = $1 AND competitor_verified = false - AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match', 'ambiguous') + AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match', 'ambiguous', 'matched') `, [fx.id]); } } + const autoApproved = autoApprovedDeterministic + autoApprovedEnhanced; console.log( - `[find-equivalences] auto_approved: ${autoApproved}, ` + - `queued for review: ${queued}, skipped (low confidence): ${skipped}` + `[find-equivalences] deterministic: ${autoApprovedDeterministic}, ` + + `enhanced (≥0.85): ${autoApprovedEnhanced}, ` + + `skipped (field mismatch): ${skippedIncomplete}, ` + + `skipped (low conf): ${skippedLowConf} | ` + + `PENDING CREATED: 0 (by design)` ); // After auto-approvals, rerun fully_verified check diff --git a/sql/113-infer-connector-type.sql b/sql/113-infer-connector-type.sql new file mode 100644 index 0000000..92d7298 --- /dev/null +++ b/sql/113-infer-connector-type.sql @@ -0,0 +1,172 @@ +-- Migration 113: Connector Type Inference +-- Füllt fehlende connector_type aus zwei Quellen: +-- 1. IEEE/MSA Lookup-Tabelle (exakt, nach reach range) +-- 2. Form-Factor + Fiber-Type Inferenz-Regeln (wenn IEEE kein Match) +-- Quelle: IEEE 802.3, SFF-8472, MSA specs, industry standard practices + +-- ── Quelle 1: IEEE Lookup (reach-based, exakt) ────────────────────────────── +UPDATE transceivers t SET + connector_type = ( + SELECT il.connector_type + FROM ieee_wavelength_lookup il + WHERE UPPER(il.form_factor) = UPPER(t.form_factor) + AND il.speed_gbps = ROUND(t.speed_gbps::NUMERIC, 2) + AND UPPER(il.fiber_type) = UPPER(t.fiber_type) + AND il.reach_min_m <= t.reach_meters + AND il.reach_max_m >= t.reach_meters + ORDER BY il.reach_max_m ASC -- Prefer tightest range match + LIMIT 1 + ) +WHERE t.connector_type IS NULL + AND t.form_factor IS NOT NULL + AND t.speed_gbps IS NOT NULL + AND t.fiber_type IS NOT NULL + AND t.reach_meters IS NOT NULL + AND t.reach_meters > 0; + +DO $$ +DECLARE v INTEGER; +BEGIN + SELECT COUNT(*) INTO v FROM transceivers WHERE connector_type IS NOT NULL + AND connector_type = ( + SELECT il.connector_type FROM ieee_wavelength_lookup il + WHERE UPPER(il.form_factor) = UPPER(transceivers.form_factor) LIMIT 1 + ); + RAISE NOTICE 'After IEEE lookup: approx % connector_type values now set', v; +END $$; + +-- ── Quelle 2: Form-Factor + Fiber-Type Inferenz ────────────────────────────── +-- Regeln basierend auf IEEE 802.3 und MSA Spezifikationen: +-- SFP/SFP+/SFP28/XFP + SMF/MMF → LC (dual fiber, standard single-mode) +-- QSFP+ + MMF → MPO-12 (SR4 = 4x parallel fiber) +-- QSFP+ + SMF, reach ≤ 2km → MPO-12 (PSM4 = parallel SMF) +-- QSFP+ + SMF, reach > 2km → LC (LR4 = CWDM4 on 2 fibers) +-- QSFP28 + MMF → MPO-12 (SR4) +-- QSFP28 + SMF, reach ≤ 2km → MPO-12 (DR/PSM4) +-- QSFP28 + SMF, reach > 2km → LC (LR4/CWDM4) +-- QSFP56 + MMF → MPO-16 (SR4 on 200G) +-- QSFP56 + SMF → LC (FR4/LR4) +-- QSFP-DD/QSFP-DD800 + MMF → MPO-16 (SR8) +-- QSFP-DD/QSFP-DD800 + SMF, reach ≤ 2km → MPO-12 (DR4/DR8) +-- QSFP-DD/QSFP-DD800 + SMF, reach > 2km → LC (FR4/LR4) +-- OSFP + MMF → MPO-16 (SR8) +-- OSFP + SMF, reach ≤ 2km → MPO-12 (DR8) +-- OSFP + SMF, reach > 2km → LC (FR4/LR4) +-- any + Copper → RJ45 +-- any + DAC → NULL (native electrical, no fiber connector) +-- any + AOC → LC (optical fan-out) + +UPDATE transceivers SET + connector_type = CASE + -- Copper BASE-T + WHEN UPPER(fiber_type) IN ('COPPER', 'COPPER/RJ45') THEN 'RJ45' + + -- DAC = Direct Attach Copper, no optical connector + WHEN UPPER(fiber_type) = 'DAC' THEN 'DAC' + + -- AOC = Active Optical Cable, LC fan-out connectors + WHEN UPPER(fiber_type) = 'AOC' THEN 'LC' + + -- Single-lane form factors: always LC for optical + WHEN UPPER(form_factor) IN ('SFP', 'SFP+', 'SFP28', 'XFP', 'SFP56') + AND UPPER(fiber_type) IN ('SMF', 'MMF') THEN 'LC' + + -- QSFP+ (40G) + WHEN UPPER(form_factor) = 'QSFP+' + AND UPPER(fiber_type) = 'MMF' THEN 'MPO-12' + WHEN UPPER(form_factor) = 'QSFP+' + AND UPPER(fiber_type) = 'SMF' + AND reach_meters IS NOT NULL AND reach_meters <= 2000 THEN 'MPO-12' + WHEN UPPER(form_factor) = 'QSFP+' + AND UPPER(fiber_type) = 'SMF' + AND (reach_meters IS NULL OR reach_meters > 2000) THEN 'LC' + + -- QSFP28 (100G) + WHEN UPPER(form_factor) = 'QSFP28' + AND UPPER(fiber_type) = 'MMF' THEN 'MPO-12' + WHEN UPPER(form_factor) = 'QSFP28' + AND UPPER(fiber_type) = 'SMF' + AND reach_meters IS NOT NULL AND reach_meters <= 2000 THEN 'MPO-12' + WHEN UPPER(form_factor) = 'QSFP28' + AND UPPER(fiber_type) = 'SMF' + AND (reach_meters IS NULL OR reach_meters > 2000) THEN 'LC' + + -- QSFP56 (200G) + WHEN UPPER(form_factor) = 'QSFP56' + AND UPPER(fiber_type) = 'MMF' THEN 'MPO-16' + WHEN UPPER(form_factor) = 'QSFP56' + AND UPPER(fiber_type) = 'SMF' THEN 'LC' + + -- QSFP-DD / QSFP-DD800 (400G/800G) + WHEN UPPER(form_factor) IN ('QSFP-DD', 'QSFP-DD800') + AND UPPER(fiber_type) = 'MMF' THEN 'MPO-16' + WHEN UPPER(form_factor) IN ('QSFP-DD', 'QSFP-DD800') + AND UPPER(fiber_type) = 'SMF' + AND reach_meters IS NOT NULL AND reach_meters <= 2000 THEN 'MPO-12' + WHEN UPPER(form_factor) IN ('QSFP-DD', 'QSFP-DD800') + AND UPPER(fiber_type) = 'SMF' + AND (reach_meters IS NULL OR reach_meters > 2000) THEN 'LC' + + -- OSFP (800G+) + WHEN UPPER(form_factor) = 'OSFP' + AND UPPER(fiber_type) = 'MMF' THEN 'MPO-16' + WHEN UPPER(form_factor) = 'OSFP' + AND UPPER(fiber_type) = 'SMF' + AND reach_meters IS NOT NULL AND reach_meters <= 2000 THEN 'MPO-12' + WHEN UPPER(form_factor) = 'OSFP' + AND UPPER(fiber_type) = 'SMF' + AND (reach_meters IS NULL OR reach_meters > 2000) THEN 'LC' + + -- CFP/CFP2/CFP4 (100G coherent) + WHEN UPPER(form_factor) IN ('CFP', 'CFP2', 'CFP4') THEN 'LC' + + ELSE NULL + END +WHERE connector_type IS NULL + AND form_factor IS NOT NULL + AND fiber_type IS NOT NULL; + +-- ── Completeness neu berechnen ─────────────────────────────────────────────── +UPDATE transceivers SET + data_completeness = calc_data_completeness( + form_factor, speed_gbps, fiber_type, + reach_meters, wavelength_tx_nm, connector_type + ), + enrichment_needed = ( + form_factor IS NULL OR speed_gbps IS NULL OR + fiber_type IS NULL OR reach_meters IS NULL OR + wavelength_tx_nm IS NULL OR connector_type IS NULL + ), + enrichment_fields = ARRAY_REMOVE(ARRAY[ + CASE WHEN form_factor IS NULL THEN 'form_factor' END, + CASE WHEN speed_gbps IS NULL THEN 'speed_gbps' END, + CASE WHEN fiber_type IS NULL THEN 'fiber_type' END, + CASE WHEN reach_meters IS NULL OR reach_meters = 0 THEN 'reach_meters' END, + CASE WHEN wavelength_tx_nm IS NULL THEN 'wavelength_tx_nm' END, + CASE WHEN connector_type IS NULL THEN 'connector_type' END + ], NULL); + +-- ── Statistik ──────────────────────────────────────────────────────────────── +DO $$ +DECLARE + total_cnt INTEGER; + complete_cnt INTEGER; + missing_conn INTEGER; + missing_wl INTEGER; + fx_complete INTEGER; +BEGIN + SELECT COUNT(*) INTO total_cnt FROM transceivers; + SELECT COUNT(*) INTO complete_cnt FROM transceivers WHERE enrichment_needed = FALSE; + SELECT COUNT(*) INTO missing_conn FROM transceivers WHERE connector_type IS NULL; + SELECT COUNT(*) INTO missing_wl FROM transceivers WHERE wavelength_tx_nm IS NULL; + SELECT COUNT(*) INTO fx_complete + FROM transceivers t JOIN vendors v ON v.id = t.vendor_id + WHERE UPPER(v.name) LIKE '%FLEXOPTIX%' AND enrichment_needed = FALSE; + + RAISE NOTICE 'Migration 113 complete:'; + RAISE NOTICE ' Total transceivers: %', total_cnt; + RAISE NOTICE ' Fully complete: %', complete_cnt; + RAISE NOTICE ' Still missing connector: %', missing_conn; + RAISE NOTICE ' Still missing wavelength: %', missing_wl; + RAISE NOTICE ' Flexoptix fully complete: %', fx_complete; +END $$; diff --git a/sql/114-extend-ieee-lookup-and-clear-pending.sql b/sql/114-extend-ieee-lookup-and-clear-pending.sql new file mode 100644 index 0000000..6a09338 --- /dev/null +++ b/sql/114-extend-ieee-lookup-and-clear-pending.sql @@ -0,0 +1,214 @@ +-- Migration 114: Extend IEEE/MSA Lookup (400G/800G/1.6T) + Clear Pending Queue +-- Part A: Add missing 400G/800G/1.6T standards to ieee_wavelength_lookup +-- Part B: Wavelength fallback for products with known form/fiber/reach +-- Part C: Reject remaining pending records (replaced by deterministic matcher) + +-- ── Part A: IEEE/MSA Lookup Erweiterung ───────────────────────────────────── +-- Sources: IEEE 802.3cd (200G), 802.3bs (400G), 802.3df (800G), 802.3dj (1.6T draft) +-- 400G-FR4 MSA, 400G-LR4-10 MSA, OSFP MSA, OpenZR+ MSA + +INSERT INTO ieee_wavelength_lookup + (form_factor, speed_gbps, fiber_type, reach_min_m, reach_max_m, wavelength_tx_nm, wavelength_rx_nm, connector_type, ieee_standard, notes) +VALUES +-- ── QSFP+ 40G additional reaches ───────────────────────────────────────────── + ('QSFP+', 40, 'SMF', 0, 150, 1310, NULL, 'MPO-12', '802.3ba', '40GBASE-PSM4 short'), + ('QSFP+', 40, 'SMF', 0, 1400, 1310, NULL, 'LC', '802.3ba', '40GBASE-LR4 1.4km'), +-- ── QSFP28 100G additional ─────────────────────────────────────────────────── + ('QSFP28', 100, 'SMF', 0, 80000, 1550, NULL, 'LC', '802.3ba', '100GBASE-ZR4'), + ('QSFP28', 100, 'SMF', 0, 120000, 1550, NULL, 'LC', 'OpenZR+', '100G OpenZR+ 120km'), +-- ── QSFP56 200G ────────────────────────────────────────────────────────────── + ('QSFP56', 200, 'DAC', 0, 5, NULL, NULL, 'QSFP56','802.3cd', '200G DAC'), + ('QSFP56', 200, 'AOC', 0, 100, 850, NULL, 'MPO-16','802.3cd', '200G AOC SR4'), +-- ── QSFP-DD 400G additional ────────────────────────────────────────────────── + ('QSFP-DD', 400, 'MMF', 0, 100, 850, NULL, 'MPO-16','802.3bs', '400GBASE-SR8'), + ('QSFP-DD', 400, 'SMF', 0, 500, 1310, NULL, 'MPO-12','802.3bs', '400GBASE-DR4'), + ('QSFP-DD', 400, 'SMF', 0, 2000, 1310, NULL, 'LC', '802.3bs', '400GBASE-FR4'), + ('QSFP-DD', 400, 'SMF', 0, 10000, 1310, NULL, 'LC', '802.3bs', '400GBASE-LR4'), + ('QSFP-DD', 400, 'SMF', 0, 80000, 1550, NULL, 'LC', '400ZR-MSA','400G ZR 80km'), + ('QSFP-DD', 400, 'SMF', 0, 120000, 1550, NULL, 'LC', 'OpenZR+', '400G OpenZR+ 120km'), + ('QSFP-DD', 400, 'AOC', 0, 100, 850, NULL, 'MPO-16','802.3bs', '400G AOC SR8'), +-- ── QSFP-DD800 800G ────────────────────────────────────────────────────────── + ('QSFP-DD800', 800, 'MMF', 0, 100, 850, NULL, 'MPO-16','802.3df', '800GBASE-SR8'), + ('QSFP-DD800', 800, 'SMF', 0, 500, 1310, NULL, 'MPO-12','802.3df', '800GBASE-DR8'), + ('QSFP-DD800', 800, 'SMF', 0, 2000, 1310, NULL, 'LC', '802.3df', '800GBASE-FR4 2x400G'), + ('QSFP-DD800', 800, 'SMF', 0,10000, 1310, NULL, 'LC', '802.3df', '800GBASE-LR4'), + ('QSFP-DD800', 800, 'SMF', 0,80000, 1550, NULL, 'LC', 'OpenZR+', '800G OpenZR+ 80km'), + ('QSFP-DD800', 800, 'DAC', 0, 5, NULL, NULL, 'QSFP-DD800','802.3df','800G DAC'), +-- ── OSFP 400G ──────────────────────────────────────────────────────────────── + ('OSFP', 400, 'MMF', 0, 100, 850, NULL, 'MPO-16', 'OSFP-MSA', '400GBASE-SR8 OSFP'), + ('OSFP', 400, 'SMF', 0, 500, 1310, NULL, 'MPO-12', 'OSFP-MSA', '400GBASE-DR4 OSFP'), + ('OSFP', 400, 'SMF', 0, 2000, 1310, NULL, 'LC', 'OSFP-MSA', '400GBASE-FR4 OSFP'), + ('OSFP', 400, 'SMF', 0, 10000, 1310, NULL, 'LC', 'OSFP-MSA', '400GBASE-LR4 OSFP'), + ('OSFP', 400, 'SMF', 0, 80000, 1550, NULL, 'LC', 'OpenZR+', '400G ZR OSFP 80km'), + ('OSFP', 400, 'SMF', 0, 120000, 1550, NULL, 'LC', 'OpenZR+', '400G OpenZR+ OSFP 120km'), +-- ── OSFP 800G ──────────────────────────────────────────────────────────────── + ('OSFP', 800, 'MMF', 0, 30, 850, NULL, 'MPO-16', '802.3df', '800GBASE-SR8 30m'), + ('OSFP', 800, 'MMF', 0, 100, 850, NULL, 'MPO-16', '802.3df', '800GBASE-SR8'), + ('OSFP', 800, 'SMF', 0, 500, 1310, NULL, 'MPO-12', '802.3df', '800GBASE-DR8 OSFP'), + ('OSFP', 800, 'SMF', 0, 2000, 1310, NULL, 'LC', '802.3df', '800GBASE-FR4 OSFP'), + ('OSFP', 800, 'SMF', 0, 10000, 1310, NULL, 'LC', '802.3df', '800GBASE-LR4 OSFP'), + ('OSFP', 800, 'SMF', 0, 80000, 1550, NULL, 'LC', 'OpenZR+', '800G ZR OSFP 80km'), +-- ── OSFP 1.6T (IEEE 802.3dj draft) ────────────────────────────────────────── + ('OSFP', 1600, 'SMF', 0, 500, 1310, NULL, 'MPO-16', '802.3dj', '1.6TBASE-DR16 OSFP'), + ('OSFP', 1600, 'SMF', 0, 2000, 1310, NULL, 'LC', '802.3dj', '1.6TBASE-FR4 OSFP'), + ('OSFP', 1600, 'SMF', 0, 10000, 1310, NULL, 'LC', '802.3dj', '1.6TBASE-LR4 OSFP'), + ('OSFP112', 800, 'SMF', 0, 10000, 1310, NULL, 'LC', '802.3df', '800GBASE-LR4 OSFP112'), + ('OSFP112', 800, 'SMF', 0, 80000, 1550, NULL, 'LC', 'OpenZR+', '800G ZR OSFP112 80km'), + ('OSFP112', 800, 'SMF', 0, 120000,1550, NULL, 'LC', 'OpenZR+', '800G OpenZR+ OSFP112 120km'), +-- ── CFP2 100G coherent ─────────────────────────────────────────────────────── + ('CFP2', 100, 'SMF', 0, 10000, 1310, NULL, 'LC', 'OIF-100G', '100GBASE-LR4 CFP2'), + ('CFP2', 100, 'SMF', 0, 80000, 1550, NULL, 'LC', 'OIF-100G', '100G ZR CFP2 80km'), + ('CFP2', 100, 'SMF', 0, 120000, 1550, NULL, 'LC', 'OpenZR+', '100G OpenZR+ CFP2'), +-- ── SFP+ / SFP 1G non-standard reaches ─────────────────────────────────────── + ('SFP', 1, 'SMF', 0, 20000, 1310, NULL, 'LC', '802.3z', '1000BASE-LH 20km'), + ('SFP', 1, 'SMF', 0, 60000, 1310, NULL, 'LC', '802.3z', '1000BASE-LH 60km'), + ('SFP', 1, 'SMF', 0, 80000, 1550, NULL, 'LC', '802.3z', '1000BASE-ZX 80km'), + ('SFP', 1, 'SMF', 0,100000, 1550, NULL, 'LC', '802.3z', '1000BASE-ZX 100km'), +-- ── SFP+ 10G non-standard reaches ──────────────────────────────────────────── + ('SFP+', 10, 'SMF', 0, 20000, 1310, NULL, 'LC', '802.3ae', '10GBASE-LR 20km variant'), + ('SFP+', 10, 'SMF', 0, 60000, 1550, NULL, 'LC', '802.3ae', '10GBASE-ZR 60km'), + ('SFP+', 10, 'SMF', 0, 80000, 1550, NULL, 'LC', '802.3ae', '10GBASE-ZR 80km'), + ('SFP+', 10, 'SMF', 0,100000, 1550, NULL, 'LC', '802.3ae', '10GBASE-ZR 100km'), +-- ── XFP 10G ────────────────────────────────────────────────────────────────── + ('XFP', 10, 'MMF', 0, 300, 850, NULL, 'LC', '802.3ae', '10GBASE-SR XFP'), + ('XFP', 10, 'SMF', 0, 10000, 1310, NULL, 'LC', '802.3ae', '10GBASE-LR XFP'), + ('XFP', 10, 'SMF', 0, 40000, 1310, NULL, 'LC', '802.3ae', '10GBASE-ER XFP'), + ('XFP', 10, 'SMF', 0, 80000, 1550, NULL, 'LC', '802.3ae', '10GBASE-ZR XFP') +ON CONFLICT DO NOTHING; + +-- ── Re-run IEEE lookup for wavelength after new entries ────────────────────── +UPDATE transceivers t SET + wavelength_tx_nm = ( + SELECT il.wavelength_tx_nm + FROM ieee_wavelength_lookup il + WHERE UPPER(il.form_factor) = UPPER(t.form_factor) + AND il.speed_gbps = ROUND(t.speed_gbps::NUMERIC, 2) + AND UPPER(il.fiber_type) = UPPER(t.fiber_type) + AND il.reach_min_m <= t.reach_meters + AND il.reach_max_m >= t.reach_meters + AND il.wavelength_tx_nm IS NOT NULL + ORDER BY il.reach_max_m ASC + LIMIT 1 + ), + wavelength_rx_nm = COALESCE( + wavelength_rx_nm, + ( + SELECT il.wavelength_rx_nm + FROM ieee_wavelength_lookup il + WHERE UPPER(il.form_factor) = UPPER(t.form_factor) + AND il.speed_gbps = ROUND(t.speed_gbps::NUMERIC, 2) + AND UPPER(il.fiber_type) = UPPER(t.fiber_type) + AND il.reach_min_m <= t.reach_meters + AND il.reach_max_m >= t.reach_meters + ORDER BY il.reach_max_m ASC + LIMIT 1 + ) + ), + connector_type = COALESCE( + connector_type, + ( + SELECT il.connector_type + FROM ieee_wavelength_lookup il + WHERE UPPER(il.form_factor) = UPPER(t.form_factor) + AND il.speed_gbps = ROUND(t.speed_gbps::NUMERIC, 2) + AND UPPER(il.fiber_type) = UPPER(t.fiber_type) + AND il.reach_min_m <= t.reach_meters + AND il.reach_max_m >= t.reach_meters + ORDER BY il.reach_max_m ASC + LIMIT 1 + ) + ) +WHERE t.wavelength_tx_nm IS NULL + AND t.form_factor IS NOT NULL + AND t.speed_gbps IS NOT NULL + AND t.fiber_type IS NOT NULL + AND t.fiber_type NOT IN ('Copper', 'DAC', 'AOC', 'COPPER') + AND t.reach_meters IS NOT NULL + AND t.reach_meters > 0; + +-- ── Part B: Fallback wavelength by fiber_type for remaining ────────────────── +-- Conservative rule: SMF products with reach > 80km → 1550nm (ZR/coherent) +-- All other SMF → 1310nm (covers ER/LR/DR/FR/LH etc.) +-- All MMF → 850nm (SR variants) +-- Products with DAC fiber_type: no optical wavelength (leave NULL) + +UPDATE transceivers SET + wavelength_tx_nm = CASE + -- Long-reach SMF: reach > 80km → 1550nm (ZR, coherent) + WHEN UPPER(fiber_type) = 'SMF' AND reach_meters > 80000 THEN 1550 + -- Standard SMF: 1310nm (LR/ER/DR/FR/LH etc.) + WHEN UPPER(fiber_type) = 'SMF' AND reach_meters > 0 THEN 1310 + -- Short MMF: 850nm (SR variants) + WHEN UPPER(fiber_type) = 'MMF' AND reach_meters > 0 THEN 850 + ELSE wavelength_tx_nm + END +WHERE wavelength_tx_nm IS NULL + AND fiber_type IS NOT NULL + AND UPPER(fiber_type) IN ('SMF', 'MMF') + AND reach_meters IS NOT NULL + AND reach_meters > 0 + AND form_factor IS NOT NULL + AND UPPER(form_factor) NOT IN ('LC', 'SC', 'DAC', 'TRANSCEIVER', 'PLUGGABLE', 'VARIES'); + +-- ── Completeness final update ───────────────────────────────────────────────── +UPDATE transceivers SET + data_completeness = calc_data_completeness( + form_factor, speed_gbps, fiber_type, + reach_meters, wavelength_tx_nm, connector_type + ), + enrichment_needed = ( + form_factor IS NULL OR speed_gbps IS NULL OR + fiber_type IS NULL OR reach_meters IS NULL OR + wavelength_tx_nm IS NULL OR connector_type IS NULL + ), + enrichment_fields = ARRAY_REMOVE(ARRAY[ + CASE WHEN form_factor IS NULL THEN 'form_factor' END, + CASE WHEN speed_gbps IS NULL THEN 'speed_gbps' END, + CASE WHEN fiber_type IS NULL THEN 'fiber_type' END, + CASE WHEN reach_meters IS NULL OR reach_meters = 0 THEN 'reach_meters' END, + CASE WHEN wavelength_tx_nm IS NULL THEN 'wavelength_tx_nm' END, + CASE WHEN connector_type IS NULL THEN 'connector_type' END + ], NULL); + +-- ── Part C: Clear pending queue ─────────────────────────────────────────────── +-- All pending records from confidence-based matcher are superseded. +-- Deterministic matcher (maintenance:find-equivalences) will re-generate +-- correct matches at confidence=1.0 for products with complete data. +UPDATE transceiver_equivalences +SET status = 'rejected', + reject_reason = 'Superseded by deterministic matcher — confidence-based pending removed in migration 114', + reviewed_at = NOW(), + reviewed_by = 'system:migration-114' +WHERE status = 'pending'; + +-- ── Final Statistics ───────────────────────────────────────────────────────── +DO $$ +DECLARE + total_cnt INTEGER; + complete_cnt INTEGER; + missing_conn INTEGER; + missing_wl INTEGER; + fx_complete INTEGER; + fx_total INTEGER; + pending_cnt INTEGER; +BEGIN + SELECT COUNT(*) INTO total_cnt FROM transceivers; + SELECT COUNT(*) INTO complete_cnt FROM transceivers WHERE enrichment_needed = FALSE; + SELECT COUNT(*) INTO missing_conn FROM transceivers WHERE connector_type IS NULL; + SELECT COUNT(*) INTO missing_wl FROM transceivers WHERE wavelength_tx_nm IS NULL; + SELECT COUNT(*) INTO pending_cnt FROM transceiver_equivalences WHERE status = 'pending'; + SELECT COUNT(*) INTO fx_total + FROM transceivers t JOIN vendors v ON v.id = t.vendor_id + WHERE UPPER(v.name) LIKE '%FLEXOPTIX%'; + SELECT COUNT(*) INTO fx_complete + FROM transceivers t JOIN vendors v ON v.id = t.vendor_id + WHERE UPPER(v.name) LIKE '%FLEXOPTIX%' AND enrichment_needed = FALSE; + + RAISE NOTICE 'Migration 114 complete:'; + RAISE NOTICE ' Total transceivers: %', total_cnt; + RAISE NOTICE ' Fully complete: %', complete_cnt; + RAISE NOTICE ' Still missing connector: %', missing_conn; + RAISE NOTICE ' Still missing wavelength: %', missing_wl; + RAISE NOTICE ' Flexoptix fully complete: % / %', fx_complete, fx_total; + RAISE NOTICE ' Pending queue: % (target: 0)', pending_cnt; +END $$;