From fa12697df25c807eff6f2893d9a25e5bc869eeaf Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Tue, 31 Mar 2026 09:02:10 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20spec=20enrichment=20script=20=E2=80=94?= =?UTF-8?q?=20fills=204,400+=20products=20with=20fiber/connector/wavelengt?= =?UTF-8?q?h/power/reach?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverage improvement: - fiber_type: 44% → 97% - connector: 19% → 96% - wavelengths: 23% → 96% - reach_meters: 42% → 93% - power_consumption_w: 6% → 79% --- scripts/enrich-specs.ts | 299 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 299 insertions(+) create mode 100644 scripts/enrich-specs.ts diff --git a/scripts/enrich-specs.ts b/scripts/enrich-specs.ts new file mode 100644 index 0000000..affa7fe --- /dev/null +++ b/scripts/enrich-specs.ts @@ -0,0 +1,299 @@ +/** + * Data Enrichment Script: Fill missing specs from form_factor + speed + reach_label + * + * Optical transceiver specs are deterministic from these three fields: + * - reach_label determines: fiber_type, connector, wavelengths, power, reach_meters + * - form_factor + speed determines: lanes, modulation, power consumption + */ +import { config } from "dotenv"; +import { join } from "path"; +import { Pool } from "pg"; + +config({ path: join(__dirname, "..", ".env") }); + +const pool = new Pool({ + host: process.env.POSTGRES_HOST || "localhost", + port: parseInt(process.env.POSTGRES_PORT || "5433"), + database: process.env.POSTGRES_DB || "transceiver_db", + user: process.env.POSTGRES_USER || "tip", + password: process.env.POSTGRES_PASSWORD || "tip_prod_2026", + max: 5, +}); + +// ============================================================ +// SPEC LOOKUP TABLES +// ============================================================ + +interface SpecRule { + fiber_type: string; + connector: string; + wavelengths: string; + power_w?: number; +} + +// Reach label → specs (fiber type, connector, wavelengths) +const REACH_SPECS: Record = { + // MMF / Short Reach + "30m": { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" }, + "100m": { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" }, + "220m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" }, + "300m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" }, + "400m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" }, + "550m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" }, + // SMF / Medium-Long Reach + "2km": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }, + "10km": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }, + "20km": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }, + "40km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }, + "60km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }, + "80km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }, + "100km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }, + "120km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }, + "160km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }, + // Parallel optics + "SR": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" }, + "SR4": { fiber_type: "MMF", connector: "MPO-12", wavelengths: "850nm" }, + "SR8": { fiber_type: "MMF", connector: "MPO-16", wavelengths: "850nm" }, + "LR": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }, + "LR4": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (4λ CWDM)" }, + "LR8": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (8λ)" }, + "ER": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }, + "ER4": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (4λ CWDM)" }, + "ZR": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm (C-band)" }, + "DR": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }, + "DR4": { fiber_type: "SMF", connector: "MPO-12", wavelengths: "1310nm (4λ CWDM)" }, + "DR8": { fiber_type: "SMF", connector: "MPO-16", wavelengths: "1310nm (8λ)" }, + "FR": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }, + "FR4": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (4λ CWDM)" }, + "FR8": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (8λ)" }, + "PSM4": { fiber_type: "SMF", connector: "MPO-12", wavelengths: "1310nm" }, + "CWDM4": { fiber_type: "SMF", connector: "LC", wavelengths: "1271/1291/1311/1331nm" }, + // DAC/AOC + "1m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" }, + "2m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" }, + "3m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" }, + "5m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" }, + "7m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, + "10m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, + "15m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, + "20m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, + "25m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, + "30m_aoc": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, + "50m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, + "100m_aoc": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }, +}; + +// reach_label → reach_meters mapping +const REACH_METERS: Record = { + "1m": 1, "2m": 2, "3m": 3, "5m": 5, "7m": 7, "10m": 10, "15m": 15, "20m": 20, + "25m": 25, "30m": 30, "50m": 50, "100m": 100, "220m": 220, "300m": 300, "400m": 400, + "550m": 550, "2km": 2000, "10km": 10000, "20km": 20000, "40km": 40000, + "60km": 60000, "80km": 80000, "100km": 100000, "120km": 120000, "160km": 160000, + "SR": 300, "SR4": 100, "SR8": 100, "LR": 10000, "LR4": 10000, "LR8": 10000, + "ER": 40000, "ER4": 40000, "ZR": 80000, "DR": 500, "DR4": 500, "DR8": 500, + "FR": 2000, "FR4": 2000, "FR8": 2000, "PSM4": 500, "CWDM4": 2000, +}; + +// Form factor + speed → power consumption (typical W) +const POWER_MAP: Record = { + "SFP_1": 1.0, "SFP_2.5": 1.0, + "SFP+_10": 1.0, "SFP+_8": 1.0, "SFP+_16": 1.5, + "SFP28_25": 1.0, "SFP28_32": 1.5, + "SFP56_50": 1.5, + "QSFP+_40": 2.5, + "QSFP28_100": 3.5, + "QSFP56_200": 5.0, + "QSFP-DD_400": 12.0, "QSFP-DD_200": 8.0, + "OSFP_400": 15.0, "OSFP_800": 20.0, + "QSFP-DD800_800": 18.0, + "CFP2_100": 8.0, "CFP2-DCO_400": 20.0, + "XFP_10": 3.5, +}; + +// Connector override for specific form factors with parallel optics +function getConnectorOverride(formFactor: string, speedGbps: number, reachLabel: string): string | null { + // QSFP+/QSFP28/QSFP-DD SR4 = MPO-12 + if (reachLabel === "100m" && speedGbps >= 40 && formFactor.startsWith("QSFP")) return "MPO-12"; + if (reachLabel === "300m" && speedGbps >= 40 && formFactor.startsWith("QSFP")) return "MPO-12"; + // DR4 = MPO-12, DR8 = MPO-16 + if (reachLabel === "500m" && speedGbps >= 400) return "MPO-12"; + // Copper SFP + if (reachLabel === "100m" && speedGbps <= 10 && formFactor === "SFP") return "RJ45"; + if (reachLabel === "30m" && speedGbps === 10) return "RJ45"; + return null; +} + +async function main() { + console.log("Starting spec enrichment...\n"); + + // Get all transceivers with missing specs + const result = await pool.query(` + SELECT id, slug, form_factor, speed_gbps, reach_label, reach_meters, + fiber_type, connector, wavelengths, power_consumption_w, category + FROM transceivers + WHERE (connector IS NULL OR connector = '' OR connector = '-') + OR (fiber_type IS NULL OR fiber_type = '') + OR (wavelengths IS NULL OR wavelengths = '') + OR (power_consumption_w IS NULL) + OR (reach_meters = 0 OR reach_meters IS NULL) + `); + + console.log(`Found ${result.rows.length} products needing enrichment\n`); + + let updated = 0; + let skipped = 0; + + for (const row of result.rows) { + const updates: string[] = []; + const values: any[] = []; + let idx = 1; + + const reachLabel = (row.reach_label || "").trim(); + const formFactor = row.form_factor; + const speedGbps = parseFloat(row.speed_gbps); + + // Check if this is a DAC/AOC based on slug or category + const slug = (row.slug || "").toLowerCase(); + const isDAC = slug.includes("dac") || slug.includes("cu") || slug.includes("passive") || slug.includes("twinax"); + const isAOC = slug.includes("aoc") || slug.includes("active-optical"); + + // Determine spec rule + let rule: SpecRule | null = null; + + if (isDAC) { + rule = { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" }; + } else if (isAOC) { + rule = { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" }; + } else if (REACH_SPECS[reachLabel]) { + rule = REACH_SPECS[reachLabel]!; + } else { + // Try to parse reach label + const kmMatch = reachLabel.match(/^(\d+)\s*km$/i); + const mMatch = reachLabel.match(/^(\d+)\s*m$/i); + if (kmMatch) { + const km = parseInt(kmMatch[1]!); + if (km <= 2) rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }; + else if (km <= 10) rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" }; + else if (km <= 40) rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }; + else rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" }; + } else if (mMatch) { + const m = parseInt(mMatch[1]!); + if (m <= 5) rule = { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" }; + else if (m <= 100) { + if (speedGbps >= 40) rule = { fiber_type: "MMF", connector: "MPO-12", wavelengths: "850nm" }; + else rule = { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" }; + } + else rule = { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" }; + } + } + + if (!rule) { + skipped++; + continue; + } + + // Apply connector override for parallel optics + const connOverride = getConnectorOverride(formFactor, speedGbps, reachLabel); + if (connOverride) rule = { ...rule, connector: connOverride }; + + // Fix: 100m SFP/SFP+ = copper RJ45, not MMF (unless explicitly MMF) + if (reachLabel === "100m" && speedGbps <= 10 && !row.fiber_type) { + // Could be copper or MMF — check slug + if (slug.includes("rj45") || slug.includes("copper") || slug.includes("-t-") || slug.includes("-te-")) { + rule = { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" }; + } + } + + // fiber_type + if (!row.fiber_type || row.fiber_type === "") { + updates.push(`fiber_type = $${idx}`); + values.push(rule.fiber_type); + idx++; + } + + // connector + if (!row.connector || row.connector === "" || row.connector === "-") { + updates.push(`connector = $${idx}`); + values.push(rule.connector); + idx++; + } + + // wavelengths + if (!row.wavelengths || row.wavelengths === "") { + updates.push(`wavelengths = $${idx}`); + values.push(rule.wavelengths); + idx++; + } + + // reach_meters + if (!row.reach_meters || row.reach_meters === 0) { + const meters = REACH_METERS[reachLabel]; + if (meters) { + updates.push(`reach_meters = $${idx}`); + values.push(meters); + idx++; + } else { + // Parse from label + const kmMatch = reachLabel.match(/^(\d+)\s*km$/i); + const mMatch = reachLabel.match(/^(\d+)\s*m$/i); + if (kmMatch) { updates.push(`reach_meters = $${idx}`); values.push(parseInt(kmMatch[1]!) * 1000); idx++; } + else if (mMatch) { updates.push(`reach_meters = $${idx}`); values.push(parseInt(mMatch[1]!)); idx++; } + } + } + + // power_consumption_w + if (row.power_consumption_w === null) { + const key = `${formFactor}_${speedGbps}`; + const power = POWER_MAP[key]; + if (power) { + // Adjust for reach (longer reach = more power for laser) + const reachMultiplier = (row.reach_meters || 0) > 40000 ? 1.5 : + (row.reach_meters || 0) > 10000 ? 1.2 : 1.0; + updates.push(`power_consumption_w = $${idx}`); + values.push(Math.round(power * reachMultiplier * 10) / 10); + idx++; + } + } + + if (updates.length === 0) { + skipped++; + continue; + } + + // Apply update + values.push(row.id); + await pool.query( + `UPDATE transceivers SET ${updates.join(", ")}, updated_at = NOW() WHERE id = $${idx}`, + values + ); + updated++; + } + + console.log(`\nEnrichment complete:`); + console.log(` Updated: ${updated}`); + console.log(` Skipped: ${skipped}`); + console.log(` Total processed: ${result.rows.length}`); + + // Print new coverage stats + const stats = await pool.query(` + SELECT + COUNT(*) AS total, + COUNT(*) FILTER (WHERE fiber_type IS NOT NULL AND fiber_type != '') AS has_fiber, + COUNT(*) FILTER (WHERE connector IS NOT NULL AND connector != '' AND connector != '-') AS has_connector, + COUNT(*) FILTER (WHERE wavelengths IS NOT NULL AND wavelengths != '') AS has_wavelength, + COUNT(*) FILTER (WHERE power_consumption_w IS NOT NULL) AS has_power, + COUNT(*) FILTER (WHERE reach_meters > 0) AS has_reach + FROM transceivers + `); + const s = stats.rows[0]; + console.log(`\nCoverage after enrichment:`); + console.log(` fiber_type: ${s.has_fiber}/${s.total} (${Math.round(s.has_fiber/s.total*100)}%)`); + console.log(` connector: ${s.has_connector}/${s.total} (${Math.round(s.has_connector/s.total*100)}%)`); + console.log(` wavelengths: ${s.has_wavelength}/${s.total} (${Math.round(s.has_wavelength/s.total*100)}%)`); + console.log(` power: ${s.has_power}/${s.total} (${Math.round(s.has_power/s.total*100)}%)`); + console.log(` reach_meters: ${s.has_reach}/${s.total} (${Math.round(s.has_reach/s.total*100)}%)`); + + await pool.end(); +} + +main().catch(err => { console.error("Enrichment failed:", err); process.exit(1); });