/** * Update transceiver specs with verified data from vendor product pages. * Only updates fields that are currently empty/estimated, preserving vendor-verified data. * Marks updated products as 'scraped_unverified' (higher confidence than 'enriched_estimated'). */ import { pool } from "./db"; export interface VerifiedSpecs { transceiverId: string; fiberType?: string; // SMF, MMF, Copper, AOC connector?: string; // LC, SC, MPO-12, MPO-16, RJ45, DAC, AOC wavelengths?: string; // "850nm", "1310nm", "1310nm (4λ CWDM)", etc. reachMeters?: number; reachLabel?: string; powerConsumptionW?: number; tempRange?: string; // COM, IND modulation?: string; // NRZ, PAM4 domSupport?: boolean; imageUrl?: string; datasheetUrl?: string; source: string; // "fs.com", "flexoptix.net", etc. } /** * Update transceiver with verified specs from a vendor product page. * Sets data_confidence to 'scraped_unverified' (better than 'enriched_estimated'). */ export async function updateVerifiedSpecs(specs: VerifiedSpecs): Promise { const updates: string[] = []; const values: any[] = []; let idx = 1; // Only update fields that have a new value if (specs.fiberType) { updates.push(`fiber_type = $${idx}`); values.push(specs.fiberType); idx++; } if (specs.connector) { updates.push(`connector = $${idx}`); values.push(specs.connector); idx++; } if (specs.wavelengths) { updates.push(`wavelengths = $${idx}`); values.push(specs.wavelengths); idx++; } if (specs.reachMeters && specs.reachMeters > 0) { updates.push(`reach_meters = $${idx}`); values.push(specs.reachMeters); idx++; } if (specs.reachLabel) { updates.push(`reach_label = $${idx}`); values.push(specs.reachLabel); idx++; } if (specs.powerConsumptionW && specs.powerConsumptionW > 0) { updates.push(`power_consumption_w = $${idx}`); values.push(specs.powerConsumptionW); idx++; } if (specs.tempRange) { updates.push(`temp_range = $${idx}`); values.push(specs.tempRange); idx++; } if (specs.modulation) { updates.push(`modulation = $${idx}`); values.push(specs.modulation); idx++; } if (specs.domSupport !== undefined) { updates.push(`dom_support = $${idx}`); values.push(specs.domSupport); idx++; } if (specs.imageUrl) { updates.push(`image_url = $${idx}, has_image = true`); values.push(specs.imageUrl); idx++; } if (specs.datasheetUrl) { // Use the correct column name based on schema updates.push(`datasheet_r2_key = $${idx}`); values.push(specs.datasheetUrl); idx++; } if (updates.length === 0) return false; // Always upgrade confidence from estimated to scraped updates.push(`data_confidence = 'scraped_unverified'`); updates.push(`updated_at = NOW()`); values.push(specs.transceiverId); await pool.query( `UPDATE transceivers SET ${updates.join(", ")} WHERE id = $${idx}`, values ); return true; } /** * Parse a spec table from a product page into structured data. * Works for fs.com, 10gtek, and similar HTML spec tables. */ export function parseSpecTable(specs: Record): Partial { const result: Partial = {}; for (const [rawKey, rawVal] of Object.entries(specs)) { const key = rawKey.toLowerCase().trim(); const val = rawVal.trim(); // Fiber Type if (key.includes("fiber") && key.includes("type") || key === "cable type" || key === "media") { if (/single.?mode|smf|os2/i.test(val)) result.fiberType = "SMF"; else if (/multi.?mode|mmf|om[1-5]/i.test(val)) result.fiberType = "MMF"; else if (/copper|cat[56]/i.test(val)) result.fiberType = "Copper"; else if (/aoc|active.optical/i.test(val)) result.fiberType = "AOC"; // 400G/800G parallel-optic standards: DR/FR/LR = SMF, SR = MMF else if (/\bdr\d*\b|\bfr\d*\b|\blr\d*\b|\ber\d*\b|\bzr\d*\b/i.test(val)) result.fiberType = "SMF"; else if (/\bsr\d*\b/i.test(val)) result.fiberType = "MMF"; } // Also infer fiber type from part-number-style keys when fiber key absent if (!result.fiberType && (key === "part number" || key === "model" || key === "sku")) { if (/\b(dr|fr|lr|er|zr)\d*\b/i.test(val)) result.fiberType = "SMF"; else if (/\bsr\d*\b/i.test(val)) result.fiberType = "MMF"; } // Connector if (key.includes("connector") || key.includes("interface")) { if (/duplex\s*lc|lc\s*duplex|lc\/pc|lc\/upc|lc\/apc/i.test(val)) result.connector = "LC"; else if (/\blc\b/i.test(val)) result.connector = "LC"; else if (/sc\/pc|sc\/apc|\bsc\b/i.test(val)) result.connector = "SC"; else if (/mpo-?24/i.test(val)) result.connector = "MPO-24"; else if (/mpo-?16/i.test(val)) result.connector = "MPO-16"; else if (/mpo-?12|mtp-?12|mpo\b|mtp\b/i.test(val)) result.connector = "MPO-12"; else if (/rj-?45|copper/i.test(val)) result.connector = "RJ45"; else if (/cs\b/i.test(val)) result.connector = "CS"; else if (/sn\b/i.test(val)) result.connector = "SN"; } // Wavelength if (key.includes("wavelength") || key.includes("laser") || key === "tx wavelength") { const nmMatch = val.match(/([\d.]+)\s*nm/i); if (nmMatch) result.wavelengths = nmMatch[1] + "nm"; // Check for multi-wavelength if (/cwdm/i.test(val)) result.wavelengths = val; if (/dwdm/i.test(val)) result.wavelengths = val; } // Reach / Distance if (key.includes("distance") || key.includes("reach") || key.includes("transmission") || key === "max link length") { const kmMatch = val.match(/([\d.]+)\s*km/i); const mMatch = val.match(/([\d.]+)\s*m\b/i); if (kmMatch) { const km = parseFloat(kmMatch[1]!); result.reachMeters = Math.round(km * 1000); result.reachLabel = km >= 1 ? `${km}km` : `${result.reachMeters}m`; } else if (mMatch) { result.reachMeters = parseInt(mMatch[1]!); result.reachLabel = `${result.reachMeters}m`; } } // Power Consumption if (key.includes("power") && (key.includes("consumption") || key.includes("dissipation") || key.includes("max"))) { const wMatch = val.match(/([\d.]+)\s*w/i); if (wMatch) result.powerConsumptionW = parseFloat(wMatch[1]!); } // Temperature if (key.includes("temperature") || key.includes("temp") && key.includes("range")) { if (/0.*70|commercial/i.test(val)) result.tempRange = "COM"; else if (/-40.*85|industrial/i.test(val)) result.tempRange = "IND"; } // DOM if (key.includes("dom") || key.includes("ddm") || key.includes("diagnostic")) { result.domSupport = /yes|supported|ddm|dom/i.test(val); } // Modulation if (key.includes("modulation") || key.includes("encoding")) { if (/pam4|pam-4/i.test(val)) result.modulation = "PAM4"; else if (/nrz/i.test(val)) result.modulation = "NRZ"; } } return result; }