transceiver-db/sql/117-spec-equivalence-matcher.sql
Rene Fichtmueller db6b97186a feat: OPN+spec equivalence matchers, 400G pricing, TIP_LLM training data
- Add OPN-based equivalence matcher robot (7,245 manufacturer-confirmed matches, confidence=1.0)
- Add spec-based equivalence matcher robot (683 matches, confidence=0.85)
  - Matches by form_factor + speed_gbps + reach_tier + wavelength ±10nm
  - Safety cap: skip FX products matching >30 competitors (too generic)
  - Daily schedule: 04:30 UTC via pg-boss
- SQL migrations 116 (OPN) + 117 (spec) with tip_extract_wavelength_nm() + tip_reach_tier() helpers
- Fix tenGtek.ts: add 3 missing 400G categories (QSFP-DD, QSFP112) — closes pricing gap
- Generate tip-llm-pricing-v1.jsonl: 80 DB-grounded QA pairs (pricing, equivalences, 400G)
- Rebuild TIP_LLM training pool: 11,999 pairs (+127 vs prev), deployed to Erik
- FX product equivalence coverage: 88.1% (959/1089)
2026-05-13 21:33:19 +02:00

140 lines
6.0 KiB
PL/PgSQL

-- Migration 117: Spec-Based Equivalence Matcher
-- Matches FX products with competitor products by technical specification
-- when no OPN-based equivalence already exists.
--
-- Match criteria (ALL must apply):
-- 1. Same form_factor (exact)
-- 2. Same speed_gbps (exact)
-- 3. Same reach tier (SR/IR/LR/ER/ZR — based on reach_meters)
-- 4. Same primary wavelength (within ±10nm, extracted from wavelengths field)
-- OR both have no wavelength data (broadband / non-WDM products)
-- 5. Target must be a competitor vendor (is_competitor = true)
-- 6. Max 30 competitor matches per FX product (too many = too generic)
--
-- Match quality:
-- confidence = 0.85 (high but below OPN-confirmed 1.0)
-- match_basis = '{spec}'
-- status = 'auto_approved'
--
-- Rules:
-- - Skips pairs that already have ANY equivalence (approved, auto_approved, rejected)
-- - Skips FX products that already have an OPN-based equivalence
-- (OPN match is preferred; spec is only a fallback)
-- - Minimum reach_meters = 10 on both sides (avoids reach=0 garbage data)
-- - Reach tier comparison handles DAC/AOC (SR ≤ 300m)
-- ── Helper: extract primary wavelength in nm from text field ─────────────────
-- Handles: "1310nm", "850nm", "1310/1550nm", "1270nm-1610nm", NULL
CREATE OR REPLACE FUNCTION tip_extract_wavelength_nm(wl text)
RETURNS integer LANGUAGE sql IMMUTABLE PARALLEL SAFE AS $$
SELECT (regexp_match(wl, '(\d{3,4})\s*nm'))[1]::integer
$$;
-- ── Helper: reach tier label ─────────────────────────────────────────────────
CREATE OR REPLACE FUNCTION tip_reach_tier(reach integer)
RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE AS $$
SELECT CASE
WHEN reach <= 300 THEN 'SR' -- ≤300m (SR, VSR, DAC, AOC)
WHEN reach <= 2000 THEN 'IR' -- ≤2km (LX, LH intermediate)
WHEN reach <= 10000 THEN 'LR' -- ≤10km (LR, LX, standard LH)
WHEN reach <= 40000 THEN 'ER' -- ≤40km (ER, extended reach)
ELSE 'ZR' -- >40km (ZR, ZR+, coherent)
END
$$;
-- ── Insert spec-based equivalences ──────────────────────────────────────────
INSERT INTO transceiver_equivalences (
flexoptix_id,
competitor_id,
confidence,
status,
match_basis,
match_notes,
created_at,
updated_at
)
SELECT DISTINCT
fx.id AS flexoptix_id,
comp.id AS competitor_id,
0.85 AS confidence,
'auto_approved' AS status,
ARRAY['spec'] AS match_basis,
'Spec match: ' || fx.form_factor || ' ' || fx.speed_gbps || 'G ' ||
tip_reach_tier(fx.reach_meters) ||
CASE WHEN tip_extract_wavelength_nm(fx.wavelengths) IS NOT NULL
THEN ' @' || tip_extract_wavelength_nm(fx.wavelengths) || 'nm'
ELSE '' END AS match_notes,
NOW() AS created_at,
NOW() AS updated_at
FROM transceivers fx
JOIN vendors vfx ON vfx.id = fx.vendor_id AND UPPER(vfx.name) LIKE '%FLEXOPTIX%'
JOIN transceivers comp
ON comp.form_factor = fx.form_factor
AND comp.speed_gbps = fx.speed_gbps
AND comp.reach_meters >= 10 -- no garbage reach=0
AND tip_reach_tier(comp.reach_meters) = tip_reach_tier(fx.reach_meters)
-- Wavelength: both must match within ±10nm, OR both have no wavelength
AND (
(tip_extract_wavelength_nm(fx.wavelengths) IS NULL
AND tip_extract_wavelength_nm(comp.wavelengths) IS NULL)
OR
ABS( COALESCE(tip_extract_wavelength_nm(comp.wavelengths), 0)
- COALESCE(tip_extract_wavelength_nm(fx.wavelengths), 0) ) <= 10
)
JOIN vendors vcomp ON vcomp.id = comp.vendor_id AND vcomp.is_competitor = true
WHERE fx.reach_meters >= 10 -- no garbage reach=0 on FX side
AND fx.speed_gbps > 0
-- FX product has no OPN-based equivalence at all (spec is fallback only)
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id
AND 'opn' = ANY(e.match_basis)
)
-- Skip pairs that already have ANY equivalence
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id
AND e.competitor_id = comp.id
)
-- Safety cap: skip FX product if it would match > 30 competitors
-- (indicates too-generic spec — needs stricter criteria)
AND (
SELECT COUNT(DISTINCT c2.id)
FROM transceivers c2
JOIN vendors vc2 ON vc2.id = c2.vendor_id AND vc2.is_competitor = true
WHERE c2.form_factor = fx.form_factor
AND c2.speed_gbps = fx.speed_gbps
AND c2.reach_meters >= 10
AND tip_reach_tier(c2.reach_meters) = tip_reach_tier(fx.reach_meters)
AND (
(tip_extract_wavelength_nm(fx.wavelengths) IS NULL
AND tip_extract_wavelength_nm(c2.wavelengths) IS NULL)
OR ABS( COALESCE(tip_extract_wavelength_nm(c2.wavelengths), 0)
- COALESCE(tip_extract_wavelength_nm(fx.wavelengths), 0) ) <= 10
)
) <= 30
ON CONFLICT DO NOTHING;
-- ── Statistics ───────────────────────────────────────────────────────────────
DO $$
DECLARE
new_cnt INTEGER;
fx_covered INTEGER;
comp_covered INTEGER;
BEGIN
SELECT COUNT(*) INTO new_cnt
FROM transceiver_equivalences WHERE 'spec' = ANY(match_basis);
SELECT COUNT(DISTINCT flexoptix_id) INTO fx_covered
FROM transceiver_equivalences WHERE 'spec' = ANY(match_basis);
SELECT COUNT(DISTINCT competitor_id) INTO comp_covered
FROM transceiver_equivalences WHERE 'spec' = ANY(match_basis);
RAISE NOTICE 'Migration 117 complete: Spec-Based Equivalence Matcher';
RAISE NOTICE ' Spec equivalences total: %', new_cnt;
RAISE NOTICE ' FX products newly covered: %', fx_covered;
RAISE NOTICE ' Competitor products matched: %', comp_covered;
END $$;