feat: spec enrichment script — fills 4,400+ products with fiber/connector/wavelength/power/reach

Coverage improvement:
- fiber_type: 44% → 97%
- connector: 19% → 96%
- wavelengths: 23% → 96%
- reach_meters: 42% → 93%
- power_consumption_w: 6% → 79%
This commit is contained in:
Rene Fichtmueller 2026-03-31 09:02:10 +02:00
parent ad7dc6fcaa
commit fa12697df2

299
scripts/enrich-specs.ts Normal file
View File

@ -0,0 +1,299 @@
/**
* Data Enrichment Script: Fill missing specs from form_factor + speed + reach_label
*
* Optical transceiver specs are deterministic from these three fields:
* - reach_label determines: fiber_type, connector, wavelengths, power, reach_meters
* - form_factor + speed determines: lanes, modulation, power consumption
*/
import { config } from "dotenv";
import { join } from "path";
import { Pool } from "pg";
config({ path: join(__dirname, "..", ".env") });
const pool = new Pool({
host: process.env.POSTGRES_HOST || "localhost",
port: parseInt(process.env.POSTGRES_PORT || "5433"),
database: process.env.POSTGRES_DB || "transceiver_db",
user: process.env.POSTGRES_USER || "tip",
password: process.env.POSTGRES_PASSWORD || "tip_prod_2026",
max: 5,
});
// ============================================================
// SPEC LOOKUP TABLES
// ============================================================
interface SpecRule {
fiber_type: string;
connector: string;
wavelengths: string;
power_w?: number;
}
// Reach label → specs (fiber type, connector, wavelengths)
const REACH_SPECS: Record<string, SpecRule> = {
// MMF / Short Reach
"30m": { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" },
"100m": { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" },
"220m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" },
"300m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" },
"400m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" },
"550m": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" },
// SMF / Medium-Long Reach
"2km": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" },
"10km": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" },
"20km": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" },
"40km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" },
"60km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" },
"80km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" },
"100km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" },
"120km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" },
"160km": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" },
// Parallel optics
"SR": { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" },
"SR4": { fiber_type: "MMF", connector: "MPO-12", wavelengths: "850nm" },
"SR8": { fiber_type: "MMF", connector: "MPO-16", wavelengths: "850nm" },
"LR": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" },
"LR4": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (4λ CWDM)" },
"LR8": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (8λ)" },
"ER": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" },
"ER4": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (4λ CWDM)" },
"ZR": { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm (C-band)" },
"DR": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" },
"DR4": { fiber_type: "SMF", connector: "MPO-12", wavelengths: "1310nm (4λ CWDM)" },
"DR8": { fiber_type: "SMF", connector: "MPO-16", wavelengths: "1310nm (8λ)" },
"FR": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" },
"FR4": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (4λ CWDM)" },
"FR8": { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm (8λ)" },
"PSM4": { fiber_type: "SMF", connector: "MPO-12", wavelengths: "1310nm" },
"CWDM4": { fiber_type: "SMF", connector: "LC", wavelengths: "1271/1291/1311/1331nm" },
// DAC/AOC
"1m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" },
"2m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" },
"3m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" },
"5m": { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" },
"7m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
"10m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
"15m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
"20m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
"25m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
"30m_aoc": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
"50m": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
"100m_aoc": { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" },
};
// reach_label → reach_meters mapping
const REACH_METERS: Record<string, number> = {
"1m": 1, "2m": 2, "3m": 3, "5m": 5, "7m": 7, "10m": 10, "15m": 15, "20m": 20,
"25m": 25, "30m": 30, "50m": 50, "100m": 100, "220m": 220, "300m": 300, "400m": 400,
"550m": 550, "2km": 2000, "10km": 10000, "20km": 20000, "40km": 40000,
"60km": 60000, "80km": 80000, "100km": 100000, "120km": 120000, "160km": 160000,
"SR": 300, "SR4": 100, "SR8": 100, "LR": 10000, "LR4": 10000, "LR8": 10000,
"ER": 40000, "ER4": 40000, "ZR": 80000, "DR": 500, "DR4": 500, "DR8": 500,
"FR": 2000, "FR4": 2000, "FR8": 2000, "PSM4": 500, "CWDM4": 2000,
};
// Form factor + speed → power consumption (typical W)
const POWER_MAP: Record<string, number> = {
"SFP_1": 1.0, "SFP_2.5": 1.0,
"SFP+_10": 1.0, "SFP+_8": 1.0, "SFP+_16": 1.5,
"SFP28_25": 1.0, "SFP28_32": 1.5,
"SFP56_50": 1.5,
"QSFP+_40": 2.5,
"QSFP28_100": 3.5,
"QSFP56_200": 5.0,
"QSFP-DD_400": 12.0, "QSFP-DD_200": 8.0,
"OSFP_400": 15.0, "OSFP_800": 20.0,
"QSFP-DD800_800": 18.0,
"CFP2_100": 8.0, "CFP2-DCO_400": 20.0,
"XFP_10": 3.5,
};
// Connector override for specific form factors with parallel optics
function getConnectorOverride(formFactor: string, speedGbps: number, reachLabel: string): string | null {
// QSFP+/QSFP28/QSFP-DD SR4 = MPO-12
if (reachLabel === "100m" && speedGbps >= 40 && formFactor.startsWith("QSFP")) return "MPO-12";
if (reachLabel === "300m" && speedGbps >= 40 && formFactor.startsWith("QSFP")) return "MPO-12";
// DR4 = MPO-12, DR8 = MPO-16
if (reachLabel === "500m" && speedGbps >= 400) return "MPO-12";
// Copper SFP
if (reachLabel === "100m" && speedGbps <= 10 && formFactor === "SFP") return "RJ45";
if (reachLabel === "30m" && speedGbps === 10) return "RJ45";
return null;
}
async function main() {
console.log("Starting spec enrichment...\n");
// Get all transceivers with missing specs
const result = await pool.query(`
SELECT id, slug, form_factor, speed_gbps, reach_label, reach_meters,
fiber_type, connector, wavelengths, power_consumption_w, category
FROM transceivers
WHERE (connector IS NULL OR connector = '' OR connector = '-')
OR (fiber_type IS NULL OR fiber_type = '')
OR (wavelengths IS NULL OR wavelengths = '')
OR (power_consumption_w IS NULL)
OR (reach_meters = 0 OR reach_meters IS NULL)
`);
console.log(`Found ${result.rows.length} products needing enrichment\n`);
let updated = 0;
let skipped = 0;
for (const row of result.rows) {
const updates: string[] = [];
const values: any[] = [];
let idx = 1;
const reachLabel = (row.reach_label || "").trim();
const formFactor = row.form_factor;
const speedGbps = parseFloat(row.speed_gbps);
// Check if this is a DAC/AOC based on slug or category
const slug = (row.slug || "").toLowerCase();
const isDAC = slug.includes("dac") || slug.includes("cu") || slug.includes("passive") || slug.includes("twinax");
const isAOC = slug.includes("aoc") || slug.includes("active-optical");
// Determine spec rule
let rule: SpecRule | null = null;
if (isDAC) {
rule = { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" };
} else if (isAOC) {
rule = { fiber_type: "AOC", connector: "AOC", wavelengths: "850nm" };
} else if (REACH_SPECS[reachLabel]) {
rule = REACH_SPECS[reachLabel]!;
} else {
// Try to parse reach label
const kmMatch = reachLabel.match(/^(\d+)\s*km$/i);
const mMatch = reachLabel.match(/^(\d+)\s*m$/i);
if (kmMatch) {
const km = parseInt(kmMatch[1]!);
if (km <= 2) rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" };
else if (km <= 10) rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1310nm" };
else if (km <= 40) rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" };
else rule = { fiber_type: "SMF", connector: "LC", wavelengths: "1550nm" };
} else if (mMatch) {
const m = parseInt(mMatch[1]!);
if (m <= 5) rule = { fiber_type: "Copper", connector: "DAC", wavelengths: "N/A" };
else if (m <= 100) {
if (speedGbps >= 40) rule = { fiber_type: "MMF", connector: "MPO-12", wavelengths: "850nm" };
else rule = { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" };
}
else rule = { fiber_type: "MMF", connector: "LC", wavelengths: "850nm" };
}
}
if (!rule) {
skipped++;
continue;
}
// Apply connector override for parallel optics
const connOverride = getConnectorOverride(formFactor, speedGbps, reachLabel);
if (connOverride) rule = { ...rule, connector: connOverride };
// Fix: 100m SFP/SFP+ = copper RJ45, not MMF (unless explicitly MMF)
if (reachLabel === "100m" && speedGbps <= 10 && !row.fiber_type) {
// Could be copper or MMF — check slug
if (slug.includes("rj45") || slug.includes("copper") || slug.includes("-t-") || slug.includes("-te-")) {
rule = { fiber_type: "Copper", connector: "RJ45", wavelengths: "N/A" };
}
}
// fiber_type
if (!row.fiber_type || row.fiber_type === "") {
updates.push(`fiber_type = $${idx}`);
values.push(rule.fiber_type);
idx++;
}
// connector
if (!row.connector || row.connector === "" || row.connector === "-") {
updates.push(`connector = $${idx}`);
values.push(rule.connector);
idx++;
}
// wavelengths
if (!row.wavelengths || row.wavelengths === "") {
updates.push(`wavelengths = $${idx}`);
values.push(rule.wavelengths);
idx++;
}
// reach_meters
if (!row.reach_meters || row.reach_meters === 0) {
const meters = REACH_METERS[reachLabel];
if (meters) {
updates.push(`reach_meters = $${idx}`);
values.push(meters);
idx++;
} else {
// Parse from label
const kmMatch = reachLabel.match(/^(\d+)\s*km$/i);
const mMatch = reachLabel.match(/^(\d+)\s*m$/i);
if (kmMatch) { updates.push(`reach_meters = $${idx}`); values.push(parseInt(kmMatch[1]!) * 1000); idx++; }
else if (mMatch) { updates.push(`reach_meters = $${idx}`); values.push(parseInt(mMatch[1]!)); idx++; }
}
}
// power_consumption_w
if (row.power_consumption_w === null) {
const key = `${formFactor}_${speedGbps}`;
const power = POWER_MAP[key];
if (power) {
// Adjust for reach (longer reach = more power for laser)
const reachMultiplier = (row.reach_meters || 0) > 40000 ? 1.5 :
(row.reach_meters || 0) > 10000 ? 1.2 : 1.0;
updates.push(`power_consumption_w = $${idx}`);
values.push(Math.round(power * reachMultiplier * 10) / 10);
idx++;
}
}
if (updates.length === 0) {
skipped++;
continue;
}
// Apply update
values.push(row.id);
await pool.query(
`UPDATE transceivers SET ${updates.join(", ")}, updated_at = NOW() WHERE id = $${idx}`,
values
);
updated++;
}
console.log(`\nEnrichment complete:`);
console.log(` Updated: ${updated}`);
console.log(` Skipped: ${skipped}`);
console.log(` Total processed: ${result.rows.length}`);
// Print new coverage stats
const stats = await pool.query(`
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE fiber_type IS NOT NULL AND fiber_type != '') AS has_fiber,
COUNT(*) FILTER (WHERE connector IS NOT NULL AND connector != '' AND connector != '-') AS has_connector,
COUNT(*) FILTER (WHERE wavelengths IS NOT NULL AND wavelengths != '') AS has_wavelength,
COUNT(*) FILTER (WHERE power_consumption_w IS NOT NULL) AS has_power,
COUNT(*) FILTER (WHERE reach_meters > 0) AS has_reach
FROM transceivers
`);
const s = stats.rows[0];
console.log(`\nCoverage after enrichment:`);
console.log(` fiber_type: ${s.has_fiber}/${s.total} (${Math.round(s.has_fiber/s.total*100)}%)`);
console.log(` connector: ${s.has_connector}/${s.total} (${Math.round(s.has_connector/s.total*100)}%)`);
console.log(` wavelengths: ${s.has_wavelength}/${s.total} (${Math.round(s.has_wavelength/s.total*100)}%)`);
console.log(` power: ${s.has_power}/${s.total} (${Math.round(s.has_power/s.total*100)}%)`);
console.log(` reach_meters: ${s.has_reach}/${s.total} (${Math.round(s.has_reach/s.total*100)}%)`);
await pool.end();
}
main().catch(err => { console.error("Enrichment failed:", err); process.exit(1); });