transceiver-db/sql/108-form-factor-normalization.sql
Rene Fichtmueller 98b241f462 feat: implement Flexoptix reference matching overhaul
- sql/108: normalize form_factor across all vendors (SFP-Plus → SFP+, etc.)
  and round speed_gbps for consistent matching
- sql/109: document 30→90 day matcher window change
- robots/catalog-reconcile.ts: new bulk-reconcile robot — matches all
  Flexoptix products against all competitors without 30-day time limit
- scheduler.ts: register catalog:reconcile job (monthly + on-demand),
  fix nightly matcher 30→90 day window, UPPER() form_factor matching,
  ROUND() speed_gbps matching

Fixes: ATGBICS/NADDOD/10Gtek/ShopFiber24 had 0 Flexoptix equivalences
due to stale price_observations being filtered out. Expected coverage
improvement: 22% → 45-60% after first reconcile run.
2026-05-13 16:55:45 +02:00

92 lines
4.4 KiB
SQL

-- Migration 108: form_factor normalisieren (alle Schreibweisen → canonical)
-- Zweck: Matcher findet Kandidaten nur bei exaktem form_factor-Match.
-- Verschiedene Scraper schreiben inkonsistente Werte.
-- 1. Canonical-Mapping anwenden
UPDATE transceivers SET
form_factor = CASE
-- SFP Varianten
WHEN UPPER(TRIM(form_factor)) IN ('SFP', 'SFP (LC)', 'SFP DDM', 'SFP MODULE', '1G SFP', 'GLC', 'MINI-GBIC')
THEN 'SFP'
-- SFP+ Varianten
WHEN UPPER(TRIM(form_factor)) IN ('SFP+', 'SFP+ (LC)', 'SFP-PLUS', 'SFP PLUS', 'SFP+ DDM',
'SFP+ MODULE', '10G SFP+', 'SFP+ OPTICAL', '10GSFP+',
'SFP+/SFP28 COMPATIBLE', 'SFP+(LC)')
THEN 'SFP+'
-- SFP28 Varianten (25G)
WHEN UPPER(TRIM(form_factor)) IN ('SFP28', 'SFP-28', 'SFP 28', '25G SFP28', 'SFP28 (LC)',
'SFP28 DDM', '25GSFP28')
THEN 'SFP28'
-- QSFP+ Varianten (40G)
WHEN UPPER(TRIM(form_factor)) IN ('QSFP+', 'QSFP-PLUS', 'QSFP PLUS', '40G QSFP+',
'QSFP+ (MPO)', 'QSFP+ MODULE', 'QSFP+ DDM', '40GQSFP+')
THEN 'QSFP+'
-- QSFP28 Varianten (100G)
WHEN UPPER(TRIM(form_factor)) IN ('QSFP28', 'QSFP-28', 'QSFP 28', '100G QSFP28',
'QSFP28 (LC)', 'QSFP28 MODULE', 'QSFP28 DDM', '100GQSFP28')
THEN 'QSFP28'
-- QSFP56 Varianten (200G)
WHEN UPPER(TRIM(form_factor)) IN ('QSFP56', 'QSFP-56', '200G QSFP56', 'QSFP56-DD')
THEN 'QSFP56'
-- QSFP-DD Varianten (400G)
WHEN UPPER(TRIM(form_factor)) IN ('QSFP-DD', 'QSFPDD', 'QSFP DD', '400G QSFP-DD',
'QSFP-DD MODULE', 'QSFP56-DD 400G')
THEN 'QSFP-DD'
-- QSFP-DD800 / 800G
WHEN UPPER(TRIM(form_factor)) IN ('QSFP-DD800', 'QSFP-DD 800G', '800G QSFP-DD', 'OSFP-RHS')
THEN 'QSFP-DD800'
-- OSFP Varianten
WHEN UPPER(TRIM(form_factor)) IN ('OSFP', 'OSFP MODULE', '400G OSFP', '800G OSFP')
THEN 'OSFP'
-- CFP Varianten
WHEN UPPER(TRIM(form_factor)) IN ('CFP', 'CFP2', 'CFP4', 'CFP-DCO', 'CFP2-DCO')
THEN UPPER(TRIM(form_factor))
-- XFP
WHEN UPPER(TRIM(form_factor)) IN ('XFP', '10G XFP', 'XFP DDM')
THEN 'XFP'
-- X2 / XENPAK
WHEN UPPER(TRIM(form_factor)) IN ('X2', 'XENPAK', 'X2 MODULE')
THEN UPPER(TRIM(form_factor))
-- DAC Cable-Typen (kein optisches Modul — form_factor trotzdem normalisieren)
WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%QSFP28%' THEN 'QSFP28-DAC'
WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%QSFP+%' THEN 'QSFP+-DAC'
WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%SFP28%' THEN 'SFP28-DAC'
WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%SFP+%' THEN 'SFP+-DAC'
WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%QSFP28%' THEN 'QSFP28-AOC'
WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%QSFP+%' THEN 'QSFP+-AOC'
WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%SFP28%' THEN 'SFP28-AOC'
WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%SFP+%' THEN 'SFP+-AOC'
ELSE form_factor -- unbekannte Werte unverändert lassen
END
WHERE form_factor IS NOT NULL;
-- 2. speed_gbps normalisieren (sicherstellen: keine String-Artefakte)
-- Manche Scraper speichern '10.0', '10.00', '1.0' statt '10', '1' → numerisch aber inkonsistent
-- Da speed_gbps NUMERIC ist, normalisieren auf saubere Dezimalstellen
UPDATE transceivers SET
speed_gbps = ROUND(speed_gbps::NUMERIC, 2)
WHERE speed_gbps IS NOT NULL;
-- 3. Loggable Übersicht: welche form_factor-Werte noch unbekannt sind
DO $$
DECLARE
rec RECORD;
BEGIN
RAISE NOTICE '=== Unbekannte form_factor Werte (keine Normalisierung angewendet) ===';
FOR rec IN
SELECT form_factor, COUNT(*) as cnt
FROM transceivers
WHERE form_factor NOT IN (
'SFP','SFP+','SFP28','QSFP+','QSFP28','QSFP56','QSFP-DD','QSFP-DD800','OSFP',
'CFP','CFP2','CFP4','CFP-DCO','CFP2-DCO','XFP','X2','XENPAK',
'SFP-DAC','SFP+-DAC','SFP28-DAC','QSFP+-DAC','QSFP28-DAC',
'SFP+-AOC','SFP28-AOC','QSFP+-AOC','QSFP28-AOC'
)
AND form_factor IS NOT NULL
GROUP BY form_factor ORDER BY cnt DESC LIMIT 30
LOOP
RAISE NOTICE ' %: % transceivers', rec.form_factor, rec.cnt;
END LOOP;
END;
$$;