From 048bf0dcf2fb7b2e92ad1e4d7a8835702b5fe374 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Wed, 13 May 2026 16:51:53 +0200 Subject: [PATCH] feat: add Codex task for Flexoptix reference matching overhaul MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CODEX-TASK-flexoptix-reference-matching.md — comprehensive plan to fix zero-match gap for ATGBICS/NADDOD/10Gtek/ShopFiber24 (8.260+ products with 0 Flexoptix equivalences). Root cause: 30-day price_observation window excludes vendors whose scrapers ran >30 days ago. Solution: catalog-reconcile robot (full bulk match, no time limit), form_factor normalization (SQL 108), 30→90 day window fix in nightly matcher, on-demand API endpoint. Expected: coverage from 22% → 45-60% after one reconcile run. --- CHANGELOG_PENDING.md | 1 + CODEX-TASK-flexoptix-reference-matching.md | 737 +++++++++++++++++++++ packages/api/src/routes/blog.ts | 2 +- packages/dashboard/index.html | 22 +- 4 files changed, 755 insertions(+), 7 deletions(-) create mode 100644 CODEX-TASK-flexoptix-reference-matching.md diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 77f052e..e7df4e3 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -1,6 +1,7 @@ # TIP Changelog Format: `{"d":"YYYY-MM-DD","t":"TYPE","m":"Description"}` +{"d":"2026-05-13","t":"FIX","m":"BlogLLM model version sync: dashboard FO_BlogLLM card now dynamically reflects the active Ollama model via /api/blog/llm/status (was hardcoded to fo-blog-v7). TIP ecosystem.config.js OLLAMA_LLM_MODEL + BLOG_LLM_MODEL bumped fo-blog-v7 → fo-blog-v10 (Mac Studio Magatama training adopted 2026-05-13 00:33 UTC). tip-api restarted, PM2 state saved."} {"d":"2026-04-26","t":"DATA","m":"Juniper OEM transceiver seed: 59 PIDs inserted (SFP-1GE/SFPP-10G/SFP-25G/QSFPP-40G/JNP-QSFP-100G/JNP-QSFP56-200G/JNP-QSFPDD-400G/JNP-OSFP-400G+800G + DAC/AOC). Scheduler: daily 04:15."} {"d":"2026-04-26","t":"FIX","m":"BlueOptics scraper: force HTTP/1.1 via Node.js https.get() to bypass empty-body HTTP/2 server bug; updated catalog path to /Transceivers_1 (changed 2026)."} {"d":"2026-04-26","t":"DATA","m":"Cisco TMG scraper: upsert logic fixed (market_status EOL + temp_range IND normalization). Full run in progress: 300+ switches, 15000+ compat matches written to switch_transceiver_compat."} diff --git a/CODEX-TASK-flexoptix-reference-matching.md b/CODEX-TASK-flexoptix-reference-matching.md new file mode 100644 index 0000000..d0418ba --- /dev/null +++ b/CODEX-TASK-flexoptix-reference-matching.md @@ -0,0 +1,737 @@ +# CODEX TASK: Flexoptix als Referenz-Katalog — Vollständige Equivalenz-Abdeckung + +## Kontext & Problem + +**Ziel:** Flexoptix-Katalog = absoluter Anker für ALLE Wettbewerbs-Preise in TIP. +Kein einziger Flexoptix-Artikel darf ohne Wettbewerbspreise sein. + +**Aktueller Zustand (Stand 2026-05-13 — aus live DB-Analyse):** + +``` +Vendor | Products priced | FO matches (approved) +----------------|-----------------|---------------------- +ATGBICS | 8.260 | 0 ← KRITISCH +NADDOD | 744 | 0 ← KRITISCH +ShopFiber24 | 312 | 0 ← KRITISCH +10Gtek | 49 | 0 ← KRITISCH +FiberMall | 304 | 1.011 ← OK +QSFPTEK | 206 | 162 ← OK +Fluxlight | 119 | 604 ← OK +SFPcables | 78 | 76 ← OK +GBICS | 72 | 133 ← OK +``` + +**Ursachen (diagnostiziert):** + +1. **30-Tage-Filter im Matcher** — Der Query in `scheduler.ts` filtert Kandidaten via + `po.time > NOW() - INTERVAL '30 days'`. ATGBICS/NADDOD/10Gtek/ShopFiber24 haben + Preis-Observations, aber diese sind älter als 30 Tage → werden als Kandidaten + komplett ausgeschlossen. + +2. **form_factor-Normalisierung fehlt** — Verschiedene Scraper schreiben unterschiedliche + Werte: `"SFP+"` vs `"SFP-Plus"` vs `"SFP+ (LC)"`. Der Matcher filtert exakt per + `t.form_factor = $1`, daher 0 Kandidaten bei abweichendem Format. + +3. **Kein Initial-Reconcile** — Der Matcher läuft nur nightly für `competitor_verified = false`. + Es gibt keinen einmaligen Bulk-Reconcile der gesamten Katalog-Überschneidung. + +--- + +## Repository + +``` +Pfad (lokal): /Users/renefichtmueller/Desktop/Claude Code/github-repos/transceiver-db/ +Pfad (Erik): /opt/tip/ +Stack: TypeScript, Node.js, PostgreSQL 17, pg-boss +Haupt-Dateien: packages/scraper/src/scheduler.ts ← Matcher, pg-boss Jobs + packages/scraper/src/robots/ ← Neues Robot-Modul hier rein + sql/ ← Migrationen (aktuell 107 vorhanden) +``` + +--- + +## SCHRITT 1: Diagnose-Migration — form_factor-Normalisierungsübersicht + +**Erstelle Datei:** `sql/108-form-factor-normalization.sql` + +```sql +-- Migration 108: form_factor normalisieren (alle Schreibweisen → canonical) +-- Zweck: Matcher findet Kandidaten nur bei exaktem form_factor-Match. +-- Verschiedene Scraper schreiben inkonsistente Werte. + +-- 1. Canonical-Mapping anwenden +UPDATE transceivers SET + form_factor = CASE + -- SFP Varianten + WHEN UPPER(TRIM(form_factor)) IN ('SFP', 'SFP (LC)', 'SFP DDM', 'SFP MODULE', '1G SFP', 'GLC', 'MINI-GBIC') + THEN 'SFP' + -- SFP+ Varianten + WHEN UPPER(TRIM(form_factor)) IN ('SFP+', 'SFP+ (LC)', 'SFP-PLUS', 'SFP PLUS', 'SFP+ DDM', + 'SFP+ MODULE', '10G SFP+', 'SFP+ OPTICAL', '10GSFP+', + 'SFP+/SFP28 COMPATIBLE', 'SFP+(LC)') + THEN 'SFP+' + -- SFP28 Varianten (25G) + WHEN UPPER(TRIM(form_factor)) IN ('SFP28', 'SFP-28', 'SFP 28', '25G SFP28', 'SFP28 (LC)', + 'SFP28 DDM', '25GSFP28') + THEN 'SFP28' + -- QSFP+ Varianten (40G) + WHEN UPPER(TRIM(form_factor)) IN ('QSFP+', 'QSFP-PLUS', 'QSFP PLUS', '40G QSFP+', + 'QSFP+ (MPO)', 'QSFP+ MODULE', 'QSFP+ DDM', '40GQSFP+') + THEN 'QSFP+' + -- QSFP28 Varianten (100G) + WHEN UPPER(TRIM(form_factor)) IN ('QSFP28', 'QSFP-28', 'QSFP 28', '100G QSFP28', + 'QSFP28 (LC)', 'QSFP28 MODULE', 'QSFP28 DDM', '100GQSFP28') + THEN 'QSFP28' + -- QSFP56 Varianten (200G) + WHEN UPPER(TRIM(form_factor)) IN ('QSFP56', 'QSFP-56', '200G QSFP56', 'QSFP56-DD') + THEN 'QSFP56' + -- QSFP-DD Varianten (400G) + WHEN UPPER(TRIM(form_factor)) IN ('QSFP-DD', 'QSFPDD', 'QSFP DD', '400G QSFP-DD', + 'QSFP-DD MODULE', 'QSFP56-DD 400G') + THEN 'QSFP-DD' + -- QSFP-DD800 / 800G + WHEN UPPER(TRIM(form_factor)) IN ('QSFP-DD800', 'QSFP-DD 800G', '800G QSFP-DD', 'OSFP-RHS') + THEN 'QSFP-DD800' + -- OSFP Varianten + WHEN UPPER(TRIM(form_factor)) IN ('OSFP', 'OSFP MODULE', '400G OSFP', '800G OSFP') + THEN 'OSFP' + -- CFP Varianten + WHEN UPPER(TRIM(form_factor)) IN ('CFP', 'CFP2', 'CFP4', 'CFP-DCO', 'CFP2-DCO') + THEN UPPER(TRIM(form_factor)) + -- XFP + WHEN UPPER(TRIM(form_factor)) IN ('XFP', '10G XFP', 'XFP DDM') + THEN 'XFP' + -- X2 / XENPAK + WHEN UPPER(TRIM(form_factor)) IN ('X2', 'XENPAK', 'X2 MODULE') + THEN UPPER(TRIM(form_factor)) + -- DAC Cable-Typen (kein optisches Modul — form_factor trotzdem normalisieren) + WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%QSFP28%' THEN 'QSFP28-DAC' + WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%QSFP+%' THEN 'QSFP+-DAC' + WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%SFP28%' THEN 'SFP28-DAC' + WHEN UPPER(form_factor) LIKE '%DAC%' AND UPPER(form_factor) LIKE '%SFP+%' THEN 'SFP+-DAC' + WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%QSFP28%' THEN 'QSFP28-AOC' + WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%QSFP+%' THEN 'QSFP+-AOC' + WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%SFP28%' THEN 'SFP28-AOC' + WHEN UPPER(form_factor) LIKE '%AOC%' AND UPPER(form_factor) LIKE '%SFP+%' THEN 'SFP+-AOC' + ELSE form_factor -- unbekannte Werte unverändert lassen + END +WHERE form_factor IS NOT NULL; + +-- 2. speed_gbps normalisieren (sicherstellen: keine String-Artefakte) +-- Manche Scraper speichern '10.0', '10.00', '1.0' statt '10', '1' → numerisch aber inkonsistent +-- Da speed_gbps NUMERIC ist, normalisieren auf saubere Dezimalstellen +UPDATE transceivers SET + speed_gbps = ROUND(speed_gbps::NUMERIC, 2) +WHERE speed_gbps IS NOT NULL; + +-- 3. Loggable Übersicht: welche form_factor-Werte noch unbekannt sind +DO $$ +DECLARE + rec RECORD; +BEGIN + RAISE NOTICE '=== Unbekannte form_factor Werte (keine Normalisierung angewendet) ==='; + FOR rec IN + SELECT form_factor, COUNT(*) as cnt + FROM transceivers + WHERE form_factor NOT IN ( + 'SFP','SFP+','SFP28','QSFP+','QSFP28','QSFP56','QSFP-DD','QSFP-DD800','OSFP', + 'CFP','CFP2','CFP4','CFP-DCO','CFP2-DCO','XFP','X2','XENPAK', + 'SFP-DAC','SFP+-DAC','SFP28-DAC','QSFP+-DAC','QSFP28-DAC', + 'SFP+-AOC','SFP28-AOC','QSFP+-AOC','QSFP28-AOC' + ) + AND form_factor IS NOT NULL + GROUP BY form_factor ORDER BY cnt DESC LIMIT 30 + LOOP + RAISE NOTICE ' %: % transceivers', rec.form_factor, rec.cnt; + END LOOP; +END; +$$; +``` + +--- + +## SCHRITT 2: Full-Catalog-Reconcile-Robot + +**Erstelle Datei:** `packages/scraper/src/robots/catalog-reconcile.ts` + +Dieser Robot ist ein einmaliger Bulk-Matcher der ALLE Flexoptix-Produkte gegen +ALLE Wettbewerber abgleicht — ohne 30-Tage-Fenster, ohne `competitor_verified`-Filter. +Er nutzt die gleiche Logik wie der bestehende Nightly-Matcher, aber breiter. + +```typescript +/** + * Catalog Reconcile Robot + * + * Vollständiger Bulk-Abgleich Flexoptix ↔ ALLE Wettbewerber. + * + * Unterschiede zum Nightly-Matcher (maintenance:find-equivalences): + * - Kein 30-Tage-Fenster für price_observations — alle Produkte mit JEMALS + * beobachteten Preisen werden als Kandidaten gewertet + * - Kein competitor_verified-Filter — auch bereits gematchte FX-Produkte + * bekommen neue Matches wenn neue Wettbewerberprodukte hinzukommen + * - Batch-Verarbeitung mit commit nach jeweils 100 Matches + * - Vollständiges Reporting am Ende + * + * Trigger: pg-boss Job "catalog:reconcile" (on-demand oder monatlich) + * Laufzeit: ~5–15 Minuten bei 1.000+ FX-Produkten + */ + +import { pool } from "../utils/db"; + +export interface ReconcileResult { + flexoptixProcessed: number; + newAutoApproved: number; + newPending: number; + skippedLowConfidence: number; + skippedAlreadyMatched: number; + vendorBreakdown: Record; + durationMs: number; +} + +// ── Konfigurations-Konstanten ──────────────────────────────────────────────── + +/** Minimum-Confidence für pending-Eintrag (unter diesem Schwellwert: ignorieren) */ +const CONFIDENCE_MIN = 0.50; + +/** Confidence-Schwellwert für auto_approved */ +const CONFIDENCE_AUTO_APPROVE = 0.73; + +/** + * Maximale Anzahl Tage seit letzter price_observation. + * NULL = kein Filter (alle Produkte mit mind. 1 Observation). + * Für Full-Reconcile: NULL. + */ +const MAX_PRICE_AGE_DAYS: number | null = null; + +// ── Helper: Wellenlänge aus Text extrahieren ──────────────────────────────── + +function extractFirstNm(wavelengths: string | null): number | null { + if (!wavelengths) return null; + const m = wavelengths.match(/(\d{3,4})/); + return m ? parseInt(m[1], 10) : null; +} + +// ── Haupt-Matching-Logik (identisch mit Nightly-Matcher) ──────────────────── + +function calcConfidence( + fx: { standard_name: string | null; fiber_type: string | null; reach_meters: number | null; wavelengths: string | null }, + cand: { standard_name: string | null; fiber_type: string | null; reach_meters: number | null; wavelengths: string | null } +): { confidence: number; basis: string[] } { + // Max-Score: form_factor(25) + speed_gbps(20) + standard_name(30) + + // wavelength_nm(20) + fiber_type(10) + reach(10) = 115 + // Beide form_factor und speed_gbps sind bereits durch den SQL-Filter gesichert. + let score = 0; + const basis: string[] = []; + + score += 25; basis.push("form_factor"); + score += 20; basis.push("speed_gbps"); + + if ( + fx.standard_name && cand.standard_name && + fx.standard_name.trim().toUpperCase() === cand.standard_name.trim().toUpperCase() + ) { + score += 30; basis.push("standard_name"); + } + + const fxNm = extractFirstNm(fx.wavelengths); + const candNm = extractFirstNm(cand.wavelengths); + if (fxNm !== null && candNm !== null) { + if (Math.abs(fxNm - candNm) <= 15) { + score += 20; basis.push(`wavelength_${fxNm}nm`); + } else { + score -= 20; + } + } + + if (fx.fiber_type && cand.fiber_type) { + if (fx.fiber_type.trim().toUpperCase() === cand.fiber_type.trim().toUpperCase()) { + score += 10; basis.push("fiber_type"); + } else { + score -= 15; + } + } + + if (fx.reach_meters && cand.reach_meters && fx.reach_meters > 0 && cand.reach_meters > 0) { + const diff = Math.abs(fx.reach_meters - cand.reach_meters); + const tolerance = Math.max(fx.reach_meters, 1) * 0.25; + if (diff <= tolerance) { + score += 10; basis.push("reach"); + } else { + score -= 15; + } + } else if (!fx.reach_meters && !cand.reach_meters) { + score += 5; basis.push("reach_null"); + } + + const confidence = Math.max(0, Math.min(1, score / 115)); + return { confidence, basis }; +} + +// ── Haupt-Funktion ─────────────────────────────────────────────────────────── + +export async function runCatalogReconcile(): Promise { + const startMs = Date.now(); + console.log("=== Catalog Reconcile Robot ==="); + console.log(` Started: ${new Date().toISOString()}`); + console.log(` Mode: FULL (no 30-day window, all vendors)`); + + const result: ReconcileResult = { + flexoptixProcessed: 0, + newAutoApproved: 0, + newPending: 0, + skippedLowConfidence: 0, + skippedAlreadyMatched: 0, + vendorBreakdown: {}, + } as ReconcileResult; + + // ── Alle Flexoptix-Produkte laden ───────────────────────────────────────── + // Kein competitor_verified-Filter → wir reconcilen ALLES + const { rows: fxProducts } = await pool.query<{ + id: string; + part_number: string; + standard_name: string | null; + form_factor: string | null; + speed_gbps: string | null; + fiber_type: string | null; + reach_meters: number | null; + wavelengths: string | null; + }>(` + SELECT t.id, t.part_number, t.standard_name, t.form_factor, + t.speed_gbps, t.fiber_type, t.reach_meters, t.wavelengths + FROM transceivers t + JOIN vendors v ON v.id = t.vendor_id + WHERE UPPER(v.name) LIKE '%FLEXOPTIX%' + AND t.form_factor IS NOT NULL + AND t.speed_gbps IS NOT NULL + ORDER BY t.part_number + `); + + result.flexoptixProcessed = fxProducts.length; + console.log(` Flexoptix products to process: ${fxProducts.length}`); + + const priceAgeFilter = MAX_PRICE_AGE_DAYS !== null + ? `AND po.time > NOW() - INTERVAL '${MAX_PRICE_AGE_DAYS} days'` + : ""; + + for (const fx of fxProducts) { + if (!fx.form_factor || !fx.speed_gbps) continue; + + // ── Wettbewerber-Kandidaten für dieses FX-Produkt ────────────────────── + // Kandidaten = alle Wettbewerber mit gleichem form_factor und speed_gbps + // die mindestens 1 price_observation haben (kein Zeitlimit) + const { rows: candidates } = await pool.query<{ + competitor_id: string; + part_number: string; + standard_name: string | null; + form_factor: string | null; + speed_gbps: string | null; + fiber_type: string | null; + reach_meters: number | null; + wavelengths: string | null; + vendor_name: string; + last_price: Date | null; + price_count: string; + }>(` + SELECT t.id AS competitor_id, t.part_number, t.standard_name, + t.form_factor, t.speed_gbps, t.fiber_type, t.reach_meters, + t.wavelengths, v.name AS vendor_name, + MAX(po.time) AS last_price, COUNT(DISTINCT po.id) AS price_count + FROM transceivers t + JOIN vendors v ON v.id = t.vendor_id + JOIN price_observations po ON po.transceiver_id = t.id + WHERE UPPER(v.name) NOT LIKE '%FLEXOPTIX%' + AND v.is_competitor = true + ${priceAgeFilter} + AND UPPER(t.form_factor) = UPPER($1) + AND ROUND(t.speed_gbps::NUMERIC, 2) = ROUND($2::NUMERIC, 2) + AND t.id != $3 + GROUP BY t.id, t.part_number, t.standard_name, t.form_factor, + t.speed_gbps, t.fiber_type, t.reach_meters, t.wavelengths, v.name + HAVING COUNT(DISTINCT po.id) >= 1 + `, [fx.form_factor, fx.speed_gbps, fx.id]); + + for (const cand of candidates) { + const { confidence, basis } = calcConfidence(fx, cand); + + if (confidence < CONFIDENCE_MIN) { + result.skippedLowConfidence++; + continue; + } + + const status = confidence >= CONFIDENCE_AUTO_APPROVE ? "auto_approved" : "pending"; + const notes = + `${fx.part_number} ↔ ${cand.part_number} (${cand.vendor_name}) | ` + + `basis: ${basis.join(", ")} | reach: ${fx.reach_meters}m vs ${cand.reach_meters}m | ` + + `wavelength: ${fx.wavelengths ?? "?"} vs ${cand.wavelengths ?? "?"} | ` + + `last_price: ${cand.last_price?.toISOString() ?? "never"} | ` + + `source: catalog-reconcile`; + + // Upsert — bereits approved/rejected Einträge nicht überschreiben + const { rowCount } = await pool.query(` + INSERT INTO transceiver_equivalences + (flexoptix_id, competitor_id, confidence, match_basis, match_notes, status) + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT (flexoptix_id, competitor_id) DO UPDATE SET + confidence = EXCLUDED.confidence, + match_basis = EXCLUDED.match_basis, + match_notes = EXCLUDED.match_notes, + updated_at = NOW() + WHERE transceiver_equivalences.status NOT IN ('approved', 'rejected', 'auto_approved') + `, [fx.id, cand.competitor_id, confidence, basis, notes, status]); + + const wasInsertOrUpdate = (rowCount ?? 0) > 0; + if (!wasInsertOrUpdate) { + result.skippedAlreadyMatched++; + continue; + } + + // Vendor-Breakdown tracken + if (!result.vendorBreakdown[cand.vendor_name]) { + result.vendorBreakdown[cand.vendor_name] = { autoApproved: 0, pending: 0 }; + } + + if (status === "auto_approved") { + result.newAutoApproved++; + result.vendorBreakdown[cand.vendor_name].autoApproved++; + + // competitor_verified auf FX-Produkt setzen + await pool.query(` + UPDATE transceivers + SET competitor_verified = true, + competitor_verified_at = NOW(), + competitor_status = 'matched', + competitor_status_updated_at = NOW() + WHERE id = $1 AND competitor_verified = false + `, [fx.id]); + } else { + result.newPending++; + result.vendorBreakdown[cand.vendor_name].pending++; + } + } + } + + result.durationMs = Date.now() - startMs; + + // ── Abschluss-Report ─────────────────────────────────────────────────────── + console.log("\n=== Catalog Reconcile Results ==="); + console.log(` Flexoptix processed: ${result.flexoptixProcessed}`); + console.log(` New auto_approved: ${result.newAutoApproved}`); + console.log(` New pending: ${result.newPending}`); + console.log(` Skipped (low confidence): ${result.skippedLowConfidence}`); + console.log(` Skipped (already matched): ${result.skippedAlreadyMatched}`); + console.log(` Duration: ${(result.durationMs / 1000).toFixed(1)}s`); + console.log("\n Vendor Breakdown:"); + for (const [vendor, counts] of Object.entries(result.vendorBreakdown).sort( + (a, b) => (b[1].autoApproved + b[1].pending) - (a[1].autoApproved + a[1].pending) + )) { + console.log(` ${vendor.padEnd(20)} auto_approved=${counts.autoApproved} pending=${counts.pending}`); + } + console.log("=================================\n"); + + return result; +} +``` + +--- + +## SCHRITT 3: scheduler.ts — neuen Job registrieren + +**Ändere in:** `packages/scraper/src/scheduler.ts` + +### 3a: Queue in der Pre-Create-Liste hinzufügen + +Suche nach dem Block mit `"sync:flexoptix-catalog"` in der queues-Liste und füge hinzu: + +```typescript +"catalog:reconcile", +``` + +### 3b: Worker registrieren + +Im Block wo `boss.work()` aufgerufen wird, nach dem `sync:flexoptix-catalog` Worker einfügen: + +```typescript +// ── Catalog Reconcile — Full Bulk Match ───────────────────────────────────── +await boss.work("catalog:reconcile", async () => { + const ts = new Date().toISOString(); + console.log(`[${ts}] Running: Full Catalog Reconcile (Flexoptix ↔ ALL competitors)`); + const { runCatalogReconcile } = await import("./robots/catalog-reconcile"); + const result = await runCatalogReconcile(); + console.log(`[catalog:reconcile] Done: ${result.newAutoApproved} auto_approved, ${result.newPending} pending`); +}); +``` + +### 3c: Schedule registrieren + +Nach dem `sync:flexoptix-catalog` Schedule: + +```typescript +// Catalog Reconcile: monatlich Sonntag 04:00 UTC +// + on-demand via POST /api/maintenance/run { job: "catalog:reconcile" } +await boss.schedule("catalog:reconcile", "0 4 1 * *", {}, { + retryLimit: 2, + expireInSeconds: 3600, +}); +``` + +### 3d: Nightly-Matcher erweitern (30-Tage-Filter lockern) + +Im bestehenden `maintenance:find-equivalences` Worker, den Kandidaten-Query ändern: + +**Vorher:** +```sql +JOIN price_observations po ON po.transceiver_id = t.id +WHERE UPPER(v.name) NOT LIKE '%FLEXOPTIX%' + AND po.time > NOW() - INTERVAL '30 days' + AND t.form_factor = $1 + AND t.speed_gbps = $2 +``` + +**Nachher (90-Tage-Fenster + UPPER-normalisiert):** +```sql +JOIN price_observations po ON po.transceiver_id = t.id +WHERE UPPER(v.name) NOT LIKE '%FLEXOPTIX%' + AND po.time > NOW() - INTERVAL '90 days' + AND UPPER(t.form_factor) = UPPER($1) + AND ROUND(t.speed_gbps::NUMERIC, 2) = ROUND($2::NUMERIC, 2) + AND t.id != $3 +``` + +Diese Änderung bewirkt, dass ATGBICS/NADDOD/10Gtek/ShopFiber24 — die zuletzt vor >30 +aber <90 Tagen gescrapt wurden — ab sofort als Kandidaten erscheinen. + +--- + +## SCHRITT 4: API-Endpunkt — On-Demand Reconcile + +**Ändere in:** `packages/api/src/routes/` (existierende maintenance-Route oder neue Datei) + +Suche nach einer existierenden maintenance/admin Route-Datei. Falls vorhanden, +dort einen neuen Endpunkt einfügen. Falls nicht vorhanden, erstelle: +`packages/api/src/routes/maintenance.ts` + +```typescript +import { FastifyPluginAsync } from "fastify"; +import PgBoss from "pg-boss"; + +export const maintenanceRoutes: FastifyPluginAsync = async (fastify) => { + // POST /api/maintenance/run — startet einen pg-boss Job on-demand + fastify.post<{ Body: { job: string } }>("/run", { + schema: { + body: { + type: "object", + required: ["job"], + properties: { + job: { + type: "string", + enum: [ + "catalog:reconcile", + "maintenance:find-equivalences", + "sync:flexoptix-catalog", + "enrich:wavelength", + ], + }, + }, + }, + }, + }, async (request, reply) => { + const boss = fastify.boss as PgBoss; // boss muss als Fastify-Decoration verfügbar sein + + const jobId = await boss.send(request.body.job, {}); + return reply.code(202).send({ + success: true, + job: request.body.job, + jobId, + message: `Job ${request.body.job} enqueued`, + }); + }); + + // GET /api/maintenance/equivalences/summary — Abdeckungs-Report + fastify.get("/equivalences/summary", async (_request, reply) => { + const { pool } = await import("../utils/db"); + + const { rows } = await pool.query(` + SELECT + v.name AS vendor, + COUNT(DISTINCT t.id)::int AS products_with_prices, + COUNT(DISTINCT te.flexoptix_id)::int AS fo_matches_approved, + ROUND( + 100.0 * COUNT(DISTINCT te.flexoptix_id) / NULLIF( + (SELECT COUNT(*) FROM transceivers fx + JOIN vendors fxv ON fxv.id = fx.vendor_id + WHERE UPPER(fxv.name) LIKE '%FLEXOPTIX%'), + 0 + ), 1 + )::float AS fo_coverage_pct + FROM vendors v + JOIN transceivers t ON t.vendor_id = v.id + JOIN price_observations po ON po.transceiver_id = t.id + LEFT JOIN transceiver_equivalences te ON te.competitor_id = t.id + AND te.status IN ('approved','auto_approved') + WHERE v.is_competitor = true + AND UPPER(v.name) NOT LIKE '%FLEXOPTIX%' + GROUP BY v.id, v.name + HAVING COUNT(DISTINCT t.id) >= 5 + ORDER BY fo_matches_approved DESC, products_with_prices DESC + `); + + return reply.send({ success: true, data: rows }); + }); +}; +``` + +--- + +## SCHRITT 5: Migration für Nightly-Matcher-Bugfix + +**Erstelle Datei:** `sql/109-fix-nightly-matcher-time-window.sql` + +```sql +-- Migration 109: Dokumentiert den 30→90 Tage Bugfix im Nightly-Matcher +-- Dieser SQL ist reine Dokumentation — die eigentliche Änderung ist in scheduler.ts + +COMMENT ON TABLE transceiver_equivalences IS + 'Flexoptix-zentrierter Equivalenz-Graph: flexoptix_id = Referenz-Anker, + competitor_id = äquivalentes Konkurrenzprodukt. + Status: pending (review nötig), auto_approved (Confidence ≥0.73), + approved (manuell), rejected (explizit ausgeschlossen). + KRITISCH: Matcher nutzt 90-Tage-Fenster (war: 30 Tage) damit Vendors + mit seltener Preisbeobachtung (ATGBICS/NADDOD/10Gtek/ShopFiber24) gefunden werden.'; +``` + +--- + +## SCHRITT 6: One-Shot Reconcile nach Deployment ausführen + +**Nachdem alles deployed ist**, einmalig auf Erik ausführen: + +```bash +# Auf Erik: Migration 108 ausführen +psql -U tip -d transceiver_intelligence -f /opt/tip/sql/108-form-factor-normalization.sql +psql -U tip -d transceiver_intelligence -f /opt/tip/sql/109-fix-nightly-matcher-time-window.sql + +# PM2 Daemon neu starten +pm2 restart tip-scraper-daemon --update-env + +# 2 Minuten warten bis pg-boss Workers registriert +sleep 120 + +# Full Catalog Reconcile on-demand triggern +curl -s -X POST http://localhost:3001/api/maintenance/run \ + -H "Content-Type: application/json" \ + -d '{"job": "catalog:reconcile"}' | jq . + +# Coverage-Report abrufen +curl -s http://localhost:3001/api/maintenance/equivalences/summary | jq '.data | .[:10]' +``` + +**Erwartetes Ergebnis des Reconcile:** +``` +ATGBICS: ~500-2.000 neue Matches (von 8.260 Produkten, bei Überschneidung ~10-25%) +NADDOD: ~100-300 neue Matches +ShopFiber24: ~50-150 neue Matches +10Gtek: ~10-30 neue Matches +``` + +--- + +## SCHRITT 7: Completeness Dashboard Query + +**Diese SQL** direkt in die TIP Dashboard-Komponente integrieren (z.B. in `packages/frontend` +oder als API-Response für das Admin-Dashboard): + +```sql +-- Flexoptix Coverage Report +-- Zeigt wie viele FX-Produkte pro Wettbewerber gematcht sind +SELECT + v.name AS vendor, + COUNT(DISTINCT t.id) AS competitor_products, + COUNT(DISTINCT te.flexoptix_id) AS fo_products_matched, + ROUND( + 100.0 * COUNT(DISTINCT te.flexoptix_id) / + NULLIF(fo_total.total, 0), 1 + ) AS coverage_pct, + MIN(po.time) AS first_observation, + MAX(po.time) AS last_observation +FROM vendors v +JOIN transceivers t ON t.vendor_id = v.id +JOIN price_observations po ON po.transceiver_id = t.id +LEFT JOIN transceiver_equivalences te ON te.competitor_id = t.id + AND te.status IN ('approved','auto_approved') +CROSS JOIN ( + SELECT COUNT(*) as total + FROM transceivers fx + JOIN vendors fxv ON fxv.id = fx.vendor_id + WHERE UPPER(fxv.name) LIKE '%FLEXOPTIX%' +) fo_total +WHERE v.is_competitor = true + AND UPPER(v.name) NOT LIKE '%FLEXOPTIX%' +GROUP BY v.id, v.name, fo_total.total +HAVING COUNT(DISTINCT t.id) >= 5 +ORDER BY coverage_pct DESC NULLS LAST, competitor_products DESC; +``` + +--- + +## Ausführungsreihenfolge + +``` +1. sql/108-form-factor-normalization.sql → psql ausführen +2. packages/scraper/src/robots/catalog-reconcile.ts → neue Datei erstellen +3. packages/scraper/src/scheduler.ts → 3 Änderungen (Worker, Schedule, 30→90 Tage) +4. packages/api/src/routes/maintenance.ts → neue Datei (oder in bestehende Route einfügen) +5. sql/109-fix-nightly-matcher-time-window.sql → psql ausführen (Dokumentation) +6. npm run build -w packages/scraper → Build +7. npm run build -w packages/api → Build +8. git add -A && git commit -m "feat: Flexoptix reference matching overhaul" +9. git push origin HEAD:main +10. Auf Erik: git fetch && git reset --hard origin/main +11. Auf Erik: sql/108 und sql/109 per psql ausführen +12. pm2 restart tip-scraper-daemon --update-env +13. sleep 120 (pg-boss Workers registrieren) +14. curl -X POST .../api/maintenance/run '{"job":"catalog:reconcile"}' +15. Ergebnis prüfen: curl .../api/maintenance/equivalences/summary +``` + +--- + +## Erwartetes Ergebnis nach Deployment + +| Vendor | Vorher | Nachher (geschätzt) | +|--------|--------|---------------------| +| ATGBICS | 0 Matches | 300–1.500 Matches | +| NADDOD | 0 Matches | 80–300 Matches | +| ShopFiber24 | 0 Matches | 40–150 Matches | +| 10Gtek | 0 Matches | 10–40 Matches | +| FiberMall | 1.011 Matches | ~1.011 (unverändert) | +| Coverage gesamt | ~22% | ~45–60% | + +**Warum nicht 100% Coverage?** +Flexoptix hat ~457 aktive Produkte. Viele Wettbewerber-Produkte sind herstellerspezifische +Kompatibilität (z.B. "Cisco-kompatibel SFP+") ohne exaktes Gegenstück im FX-Katalog. +Die verbleibende Lücke erklärt sich aus: +- Wettbewerber-Produkte ohne FX-Äquivalent (Nischenspezifikationen, OEM-only) +- Fehlende Normalisierungsfelder auf Wettbewerber-Seite (null fiber_type etc.) +- Produkte die FX aus dem Sortiment genommen hat + +--- + +## Was NICHT geändert wird + +- Bestehende `transceiver_equivalences`-Einträge mit `status='approved'` oder `status='rejected'` + → werden nie überschrieben (ON CONFLICT ... WHERE status NOT IN (...)) +- Alle existierenden scraper-Logiken → nicht anfassen +- PM2-Konfiguration, Cloudflare Tunnel, Umgebungsvariablen → nicht anfassen +- `CODEX-TASK-zero-manual-review.md` Aufgaben → separater Task, nicht überlappen + +--- + +## Zusammenhang mit CODEX-TASK-zero-manual-review.md + +| Task | Fokus | Reihenfolge | +|------|-------|-------------| +| **dieser Task** | Coverage: ATGBICS/NADDOD/10Gtek/ShopFiber24 von 0% auf ~50% | Zuerst | +| zero-manual-review | Qualität: Confidence-Scores durch deterministischen Matcher ersetzen | Danach | + +Erst Coverage herstellen, dann Qualität verbessern. Macht keinen Sinn, +die 0%-Vendors mit einem neuen Matcher zu matchen wenn sie nicht einmal im Kandidaten-Pool sind. diff --git a/packages/api/src/routes/blog.ts b/packages/api/src/routes/blog.ts index b90d648..973921f 100644 --- a/packages/api/src/routes/blog.ts +++ b/packages/api/src/routes/blog.ts @@ -1558,7 +1558,7 @@ blogRouter.post("/llm/reset-queue", (_req: Request, res: Response) => { }); // POST /api/blog/llm/switch — Switch active LLM provider at runtime (no restart needed) -// Body: { provider: "claude-code" | "anthropic" | "ollama", model?: "fo-blog-v7" | "qwen2.5:14b" } +// Body: { provider: "claude-code" | "anthropic" | "ollama", model?: "fo-blog-v10" | "qwen2.5:14b" | ... } blogRouter.post("/llm/switch", (req: Request, res: Response) => { const { provider, model } = req.body as { provider?: string; model?: string }; const validProviders = ["claude-code", "anthropic", "ollama"]; diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index 828e6f6..235177a 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -1346,11 +1346,11 @@
checking…
- -
+ +
-
🎯 fo-blog-v7
+
🎯 fo-blog-v10
Adapter Bridge / Mac Studio
@@ -1358,12 +1358,12 @@
★★★★ Blog-Qualität
★★★★★ Geschwindigkeit
-
✓ Neu trainierte RunPod-Version
+
✓ Neu trainierte Mac Studio Version
✓ Lokal / keine API-Kosten
⚠ Qualitaet laeuft schon, Feinschliff folgt
- BLOG_LLM_PROVIDER=ollama
OLLAMA_LLM_MODEL=fo-blog-v7
+ BLOG_LLM_PROVIDER=ollama
OLLAMA_LLM_MODEL=fo-blog-v10
checking…
@@ -5730,6 +5730,16 @@ async function loadBlogLLMStatus() { try { var data = await api('/api/blog/llm/status'); var llm = data.llm || {}; + + // Dynamically sync the FO_BlogLLM card to the actual current Ollama model name. + // Prevents the UI from going stale when Magatama trains a new fo-blog-vX version. + var foCard = document.getElementById('blog-model-card-fo'); + var foName = document.getElementById('blog-model-fo-name'); + var foEnv = document.getElementById('blog-model-fo-env'); + var currentFoModel = (llm.model && /^fo-blog-/.test(llm.model)) ? llm.model : (foCard && foCard.dataset.foModel) || 'fo-blog-v10'; + if (foCard) foCard.dataset.foModel = currentFoModel; + if (foName) foName.textContent = '🎯 ' + currentFoModel; + if (foEnv) foEnv.innerHTML = 'BLOG_LLM_PROVIDER=ollama
OLLAMA_LLM_MODEL=' + currentFoModel; var badge = document.getElementById('blog-llm-status-badge'); var activeModel = document.getElementById('blog-llm-active-model'); var activeProvider = document.getElementById('blog-llm-active-provider'); @@ -5798,7 +5808,7 @@ async function loadBlogLLMStatus() { if (foActive) foActive.style.display = 'inline'; var foStatusEl = document.getElementById('blog-model-fo-status'); if (foStatusEl) { - foStatusEl.textContent = llm.ok ? ('● Aktiv — ' + (llm.model || 'fo-blog-v7') + ' erreichbar') : '⚠ Ollama/Bridge nicht erreichbar: ' + (llm.error || '').slice(0, 60); + foStatusEl.textContent = llm.ok ? ('● Aktiv — ' + (llm.model || 'fo-blog-v10') + ' erreichbar') : '⚠ Ollama/Bridge nicht erreichbar: ' + (llm.error || '').slice(0, 60); foStatusEl.style.color = llm.ok ? '#1a7a3a' : '#b45309'; foStatusEl.style.fontWeight = '600'; }