Rene Fichtmueller d7144731e0 feat(scraper): add 100+ OEM seed scrapers + tip-llm-guided inference layer
New OEM transceiver seed scrapers (94 cron-scheduled, 24/7):
- Media/Broadcast: Evertz, Grass Valley, Haivision, Viasat
- Asian Optical: FiberHome, Oplink, Accelink, Hisense Broadband
- Optical Mfrs: Lumentum, II-VI/Coherent, Source Photonics, O-Net,
  InnoLight, AOI, Sumitomo Electric, NeoPhotonics
- Industrial: GE Grid, Schweitzer, Moxa Industrial, Cisco IE,
  Phoenix Contact, Beckhoff, Omron, ABB, Siemens, Schneider, Rockwell, Belden
- Enterprise/DC: Arista, Pica8, Pluribus, DriveNets, Cisco (Meraki/Catalyst/Nexus/ASR)
- Cloud: AWS, Azure, Google Cloud, Meta
- Storage: NetApp, Pure Storage, HPE Storage, IBM Storage, Dell Storage, Hitachi Vantara
- 5G/RAN: Samsung Networks, Nokia AirScale, Ericsson RAN, Mavenir
- Security: Check Point, Barracuda, Fortinet, Palo Alto
- Telecom Optical: ADVA, PacketLight, FiberHome, Accelink, Hisense

API: tip-llm-guided inference layer (strict schema + repair-retry + safe fallback)
- POST /api/tip-llm/infer|research-plan|extract|finding|health
- Hard JSON schema enforcement, create_finding=false on empty evidence
- Confidence gate (>= 0.4), validation with consistency check

Build: added incremental=true to scraper tsconfig (OOM prevention)
Scheduler: 87 → 94 registered workers
2026-04-27 00:00:14 +02:00

139 lines
10 KiB
TypeScript

/**
* CIMC Semiconductors (legacy II-VI / Finisar integration-era, now Coherent Corp) OEM Seed
*
* Seeds CIMC-branded transceiver PIDs covering the legacy II-VI/Finisar product
* lines that Coherent Corp continues to manufacture and sell under the CIMC
* Semiconductors brand. Includes datacenter optics, telecom/OTN, CFP2-DCO,
* DWDM tunable, and CWDM variants from the II-VI/Finisar integration era.
*
* Sources:
* - Coherent Corp (formerly II-VI) product catalog (coherent.com)
* - CIMC Semiconductors transceiver datasheets
* - Legacy II-VI/Finisar product numbering conventions
*
* Run: tsx packages/scraper/src/scrapers/cimc-oem.ts
* Cron: daily at 05:00
*/
import { pool, ensureVendor } from "../utils/db";
interface CimcPID {
pid: string;
formFactor: string;
speedGbps: number;
speed: string;
reachMeters: number;
reachLabel: string;
fiberType: string;
connector: string;
wavelengths?: string;
standard?: string;
category?: string;
notes?: string;
}
// PIDs that fall into Telecom/carrier category rather than DataCenter
const TELECOM_PIDS = new Set([
"CIMC-CFP2-DCO-200G",
"CIMC-QSFP-DD-ZR-400G",
"CIMC-SFP-OTN-10G",
"CIMC-SFP-CWDM-1550",
"CIMC-DWDM-SFP-C",
]);
const CIMC_PIDS: CimcPID[] = [
// ── 1G SFP ──────────────────────────────────────────────────────────────
{ pid: "CIMC-SFP-1G-SX", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 550, reachLabel: "SX", fiberType: "MMF", connector: "LC", wavelengths: "850nm", standard: "1000BASE-SX", notes: "CIMC 1G MM SFP — legacy Finisar FTLF8524P3BNV lineage" },
{ pid: "CIMC-SFP-1G-LX", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 10000, reachLabel: "LX", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "1000BASE-LX", notes: "CIMC 1G SM SFP — legacy Finisar FTLF1324P2BTL lineage" },
// ── 10G SFP+ ────────────────────────────────────────────────────────────
{ pid: "CIMC-SFP-10G-SR", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 300, reachLabel: "SR", fiberType: "MMF", connector: "LC", wavelengths: "850nm", standard: "10GBASE-SR", notes: "CIMC 10G SR SFP+ — legacy Finisar FTLX8571D3BCL lineage" },
{ pid: "CIMC-SFP-10G-LR", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 10000, reachLabel: "LR", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "10GBASE-LR", notes: "CIMC 10G LR SFP+ — legacy Finisar FTLX1475D3BCL lineage" },
{ pid: "CIMC-SFP-10G-ER", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 40000, reachLabel: "ER", fiberType: "SMF", connector: "LC", wavelengths: "1550nm", standard: "10GBASE-ER" },
{ pid: "CIMC-SFP-10G-ZR", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 80000, reachLabel: "ZR", fiberType: "SMF", connector: "LC", wavelengths: "1550nm", standard: "10GBASE-ZR", notes: "CIMC 10G ZR 80km — ex-II-VI long-haul series" },
// ── 10G OTN / CWDM / DWDM (Telecom) ────────────────────────────────────
{ pid: "CIMC-SFP-OTN-10G", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 80000, reachLabel: "OTN", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "OTU2", category: "Telecom", notes: "CIMC 10G OTN SFP+ for OTU2 transport — ex-II-VI Finisar OTN lineage" },
{ pid: "CIMC-SFP-CWDM-1550", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 40000, reachLabel: "CWDM", fiberType: "SMF", connector: "LC", wavelengths: "1550nm", standard: "CWDM", category: "Telecom", notes: "CIMC 10G CWDM SFP+ 1550nm — legacy Finisar CWDM channel optic" },
{ pid: "CIMC-DWDM-SFP-C", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 80000, reachLabel: "DWDM", fiberType: "SMF", connector: "LC", wavelengths: "C-band ITU", standard: "DWDM ITU-T G.694.1",category: "Telecom", notes: "CIMC 10G DWDM tunable SFP+ C-band — ex-II-VI Finisar DWDM series" },
// ── 25G SFP28 ───────────────────────────────────────────────────────────
{ pid: "CIMC-SFP28-25G-SR", formFactor: "SFP28", speedGbps: 25, speed: "25G", reachMeters: 100, reachLabel: "SR", fiberType: "MMF", connector: "LC", wavelengths: "850nm", standard: "25GBASE-SR", notes: "CIMC 25G SR SFP28 — ex-II-VI high-volume hyperscale series" },
{ pid: "CIMC-SFP28-25G-LR", formFactor: "SFP28", speedGbps: 25, speed: "25G", reachMeters: 10000, reachLabel: "LR", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "25GBASE-LR" },
// ── 100G QSFP28 ─────────────────────────────────────────────────────────
{ pid: "CIMC-QSFP28-100G-SR4", formFactor: "QSFP28", speedGbps: 100, speed: "100G", reachMeters: 100, reachLabel: "SR4", fiberType: "MMF", connector: "MPO", wavelengths: "850nm", standard: "100GBASE-SR4", notes: "CIMC 100G SR4 QSFP28 — ex-II-VI Finisar FTLC9551REPM lineage" },
{ pid: "CIMC-QSFP28-100G-LR4", formFactor: "QSFP28", speedGbps: 100, speed: "100G", reachMeters: 10000, reachLabel: "LR4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm", standard: "100GBASE-LR4" },
{ pid: "CIMC-QSFP28-100G-ER4", formFactor: "QSFP28", speedGbps: 100, speed: "100G", reachMeters: 40000, reachLabel: "ER4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm", standard: "100GBASE-ER4", notes: "CIMC 100G ER4 40km QSFP28 — ex-II-VI extended-reach datacenter" },
// ── 200G CFP2-DCO (Telecom) ─────────────────────────────────────────────
{ pid: "CIMC-CFP2-DCO-200G", formFactor: "CFP2", speedGbps: 200, speed: "200G", reachMeters: 1000000, reachLabel: "DCO", fiberType: "SMF", connector: "LC", wavelengths: "C-band tunable", standard: "OIF CFP2-DCO", category: "Telecom", notes: "CIMC CFP2-DCO 200G coherent — ex-II-VI Finisar DCO lineage, 1000km EDFA reach" },
// ── 400G QSFP-DD ────────────────────────────────────────────────────────
{ pid: "CIMC-QSFP-DD-400G-DR4", formFactor: "QSFP-DD", speedGbps: 400, speed: "400G", reachMeters: 500, reachLabel: "DR4", fiberType: "SMF", connector: "MPO", wavelengths: "1310nm", standard: "400GBASE-DR4", notes: "CIMC 400G DR4 QSFP-DD — Coherent Corp next-gen datacenter optic" },
{ pid: "CIMC-QSFP-DD-400G-SR8", formFactor: "QSFP-DD", speedGbps: 400, speed: "400G", reachMeters: 100, reachLabel: "SR8", fiberType: "MMF", connector: "MPO", wavelengths: "850nm", standard: "400GBASE-SR8", notes: "CIMC 400G SR8 QSFP-DD MPO for short-reach hyperscale fabric" },
// ── 400G ZR Coherent (Telecom) ──────────────────────────────────────────
{ pid: "CIMC-QSFP-DD-ZR-400G", formFactor: "QSFP-DD", speedGbps: 400, speed: "400G", reachMeters: 120000, reachLabel: "ZR", fiberType: "SMF", connector: "LC", wavelengths: "C-band tunable", standard: "OpenZR+ / OIF 400ZR", category: "Telecom", notes: "CIMC 400G ZR QSFP-DD coherent — ex-II-VI Finisar 400ZR, 120km unamplified" },
];
export async function scrapeCimcOem(): Promise<void> {
console.log("=== CIMC Semiconductors (Coherent Corp / ex-II-VI Finisar) OEM Transceiver Seed ===\n");
const vendorId = await ensureVendor(
"CIMC Semiconductors",
"oem",
"https://www.coherent.com",
undefined
);
let inserted = 0;
let updated = 0;
let errors = 0;
for (const p of CIMC_PIDS) {
const slug = `cimc-${p.pid.toLowerCase().replace(/[^a-z0-9]+/g, "-")}`;
const category = p.category ?? (TELECOM_PIDS.has(p.pid) ? "Telecom" : "DataCenter");
try {
const res = await pool.query(
`INSERT INTO transceivers
(slug, part_number, vendor_id, form_factor, speed, speed_gbps,
reach_meters, reach_label, fiber_type, connector, wavelengths,
dom_support, ieee_reference, market_status, category, notes)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,true,$12,'Mainstream',$13,$14)
ON CONFLICT (slug) DO UPDATE SET
speed_gbps = EXCLUDED.speed_gbps,
reach_meters = CASE WHEN EXCLUDED.reach_meters > 0 THEN EXCLUDED.reach_meters ELSE transceivers.reach_meters END,
fiber_type = CASE WHEN EXCLUDED.fiber_type <> '' THEN EXCLUDED.fiber_type ELSE transceivers.fiber_type END,
wavelengths = COALESCE(EXCLUDED.wavelengths, transceivers.wavelengths),
updated_at = NOW()
RETURNING (xmax = 0) as was_inserted`,
[slug, p.pid, vendorId, p.formFactor, p.speed, p.speedGbps,
p.reachMeters, p.reachLabel, p.fiberType, p.connector,
p.wavelengths ?? null, p.standard ?? null, category, p.notes ?? null]
);
if (res.rows[0]?.was_inserted) inserted++; else updated++;
} catch (err) {
console.warn(` Skip ${p.pid}: ${(err as Error).message.slice(0, 80)}`);
errors++;
}
}
console.log(`\n=== CIMC Semiconductors OEM Seed Complete ===`);
console.log(` Inserted: ${inserted}`);
console.log(` Updated: ${updated}`);
console.log(` Errors: ${errors}`);
console.log(` Total PIDs: ${CIMC_PIDS.length}`);
console.log(` Telecom PIDs: ${[...TELECOM_PIDS].length} (CFP2-DCO, ZR, OTN, CWDM, DWDM)\n`);
}
if (require.main === module) {
scrapeCimcOem()
.then(() => pool.end())
.catch((err) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});
}