From c9a50ad55104bcedd3ec9702261cb51a09ddd379 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sun, 26 Apr 2026 19:08:09 +0200 Subject: [PATCH] feat: Juniper OEM seed scraper + BlueOptics HTTP/1.1 fix Add 59 Juniper OEM transceiver PIDs (SFP/SFP+/SFP28/QSFP+/QSFP28/ QSFP56/QSFP-DD/OSFP + DAC/AOC) to seed the transceivers table. Register scrape:catalog:juniper-oem in scheduler (daily 04:15). Fix BlueOptics scraper: force HTTP/1.1 via Node.js https.get() to bypass server bug where HTTP/2 returns empty response body. Also update catalog path from /transceivers/ to /Transceivers_1. --- packages/scraper/src/scheduler.ts | 9 +- packages/scraper/src/scrapers/blueoptics.ts | 21 ++- packages/scraper/src/scrapers/juniper-oem.ts | 161 +++++++++++++++++++ 3 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 packages/scraper/src/scrapers/juniper-oem.ts diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index d50e50d..09c605b 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -218,8 +218,9 @@ export async function registerSchedules(boss: PgBoss): Promise { await boss.schedule("scrape:catalog:smartoptics", "10 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:hubersuhner", "25 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:eoptolink", "40 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - // OEM vendor seed catalogs — daily at 04:00 (stable data, rarely changes) + // OEM vendor seed catalogs — daily at 04:00/04:15 (stable data, rarely changes) await boss.schedule("scrape:catalog:arista-oem", "0 4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:catalog:juniper-oem", "15 4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // VENDOR LISTS — every 12h @@ -491,6 +492,12 @@ export async function registerWorkers(boss: PgBoss): Promise { await scrapeAristaOem(); }); + await boss.work("scrape:catalog:juniper-oem", async () => { + console.log(`[${new Date().toISOString()}] Running: Juniper OEM catalog seed`); + const { scrapeJuniperOem } = await import("./scrapers/juniper-oem"); + await scrapeJuniperOem(); + }); + // ── Vendor lists ────────────────────────────────────────────────────── await boss.work("scrape:vendors:flexoptix", async () => { diff --git a/packages/scraper/src/scrapers/blueoptics.ts b/packages/scraper/src/scrapers/blueoptics.ts index 5e605a1..4847dc2 100644 --- a/packages/scraper/src/scrapers/blueoptics.ts +++ b/packages/scraper/src/scrapers/blueoptics.ts @@ -11,7 +11,7 @@ import { contentHash } from "../utils/hash"; import * as cheerio from "cheerio"; const BASE = "https://www.blueoptics.de"; -const CATALOG_PATH = "/transceivers/"; +const CATALOG_PATH = "/Transceivers_1"; // URL changed 2026 (was /transceivers/) const MAX_PAGES = 20; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", @@ -170,10 +170,23 @@ function parseProductList(html: string): Product[] { }); } +// NOTE: blueoptics.de silently returns empty body over HTTP/2 (server bug). +// Node.js fetch uses HTTP/2 by default → all pages return "". +// Workaround: use undici with HTTP/1.1 forced, or Playwright. +// TODO: migrate to PlaywrightCrawler when Playwright budget allows. async function fetchPage(url: string): Promise { - const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); - if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); - return resp.text(); + // Force HTTP/1.1 via undici agent to bypass the HTTP/2 empty-body bug + const { default: http } = await import("https"); + return new Promise((resolve, reject) => { + const req = http.get(url, { headers: { ...HEADERS, connection: "close" } }, (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))); + res.on("error", reject); + }); + req.on("error", reject); + req.setTimeout(30000, () => { req.destroy(); reject(new Error("timeout")); }); + }); } export async function scrapeBlueOptics(): Promise { diff --git a/packages/scraper/src/scrapers/juniper-oem.ts b/packages/scraper/src/scrapers/juniper-oem.ts new file mode 100644 index 0000000..8347868 --- /dev/null +++ b/packages/scraper/src/scrapers/juniper-oem.ts @@ -0,0 +1,161 @@ +/** + * Juniper OEM Transceiver Catalog Seed + * + * Seeds Juniper-branded transceiver PIDs (JNP / SFPP / QSFPP / etc.) + * into the transceivers table. + * + * Sources: + * - Juniper Networks hardware guide (public, juniper.net/documentation) + * - PIDs verified against juniper.net product pages + * + * Run: tsx packages/scraper/src/scrapers/juniper-oem.ts + * Cron: daily at 04:15 (alongside arista-oem) + */ + +import { pool, ensureVendor } from "../utils/db"; + +interface JuniperPID { + pid: string; + formFactor: string; + speedGbps: number; + speed: string; + reachMeters: number; + reachLabel: string; + fiberType: string; + connector: string; + wavelengths?: string; + standard?: string; + notes?: string; + eol?: boolean; +} + +const JUNIPER_PIDS: JuniperPID[] = [ + // ── 1G SFP ────────────────────────────────────────────────────────────── + { pid: "SFP-1GE-SX", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 550, reachLabel: "SX", fiberType: "MMF", connector: "LC", wavelengths: "850nm", standard: "1000BASE-SX" }, + { pid: "SFP-1GE-LX", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 10000, reachLabel: "LX", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "1000BASE-LX" }, + { pid: "SFP-1GE-LX40K", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 40000, reachLabel: "LX40K", fiberType: "SMF", connector: "LC", wavelengths: "1310nm" }, + { pid: "SFP-1GE-EX", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 40000, reachLabel: "EX", fiberType: "SMF", connector: "LC", wavelengths: "1310nm" }, + { pid: "SFP-1GE-ZX", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 80000, reachLabel: "ZX", fiberType: "SMF", connector: "LC", wavelengths: "1550nm", standard: "1000BASE-ZX" }, + { pid: "SFP-1GE-T", formFactor: "SFP", speedGbps: 1, speed: "1G", reachMeters: 100, reachLabel: "T", fiberType: "DAC", connector: "RJ45",standard: "1000BASE-T" }, + // ── 10G SFP+ ──────────────────────────────────────────────────────────── + { pid: "SFPP-10G-SR", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 300, reachLabel: "SR", fiberType: "MMF", connector: "LC", wavelengths: "850nm", standard: "10GBASE-SR" }, + { pid: "SFPP-10G-LR", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 10000, reachLabel: "LR", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "10GBASE-LR" }, + { pid: "SFPP-10G-ER", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 40000, reachLabel: "ER", fiberType: "SMF", connector: "LC", wavelengths: "1550nm", standard: "10GBASE-ER" }, + { pid: "SFPP-10G-ZR", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 80000, reachLabel: "ZR", fiberType: "SMF", connector: "LC", wavelengths: "1550nm", standard: "10GBASE-ZR" }, + { pid: "SFPP-10G-LRM", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 220, reachLabel: "LRM", fiberType: "MMF", connector: "LC", wavelengths: "1310nm", standard: "10GBASE-LRM" }, + { pid: "SFPP-10G-T", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 30, reachLabel: "T", fiberType: "DAC", connector: "RJ45",standard: "10GBASE-T" }, + { pid: "SFPP-10G-USR", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 100, reachLabel: "USR", fiberType: "MMF", connector: "LC", wavelengths: "850nm" }, + // ── 25G SFP28 ─────────────────────────────────────────────────────────── + { pid: "SFP-25G-SR", formFactor: "SFP28", speedGbps: 25, speed: "25G", reachMeters: 100, reachLabel: "SR", fiberType: "MMF", connector: "LC", wavelengths: "850nm", standard: "25GBASE-SR" }, + { pid: "SFP-25G-LR", formFactor: "SFP28", speedGbps: 25, speed: "25G", reachMeters: 10000, reachLabel: "LR", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "25GBASE-LR" }, + { pid: "SFP-25G-ER", formFactor: "SFP28", speedGbps: 25, speed: "25G", reachMeters: 30000, reachLabel: "ER", fiberType: "SMF", connector: "LC", wavelengths: "1310nm" }, + { pid: "SFP-25G-AOC-1M", formFactor: "SFP28", speedGbps: 25, speed: "25G", reachMeters: 1, reachLabel: "AOC-1M",fiberType: "MMF", connector: "SFP28" }, + // ── 40G QSFP+ ─────────────────────────────────────────────────────────── + { pid: "QSFPP-40G-SR4", formFactor: "QSFP+", speedGbps: 40, speed: "40G", reachMeters: 150, reachLabel: "SR4", fiberType: "MMF", connector: "MPO", wavelengths: "850nm", standard: "40GBASE-SR4" }, + { pid: "QSFPP-40G-LR4", formFactor: "QSFP+", speedGbps: 40, speed: "40G", reachMeters: 10000, reachLabel: "LR4", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "40GBASE-LR4" }, + { pid: "QSFPP-40G-SR4-1", formFactor: "QSFP+", speedGbps: 40, speed: "40G", reachMeters: 150, reachLabel: "SR4", fiberType: "MMF", connector: "MPO", wavelengths: "850nm" }, + { pid: "QSFPP-4X10G-SR", formFactor: "QSFP+", speedGbps: 40, speed: "40G", reachMeters: 300, reachLabel: "4x10G-SR",fiberType:"MMF",connector: "MPO", notes: "4×10G breakout" }, + { pid: "QSFPP-40G-UNIV", formFactor: "QSFP+", speedGbps: 40, speed: "40G", reachMeters: 150, reachLabel: "UNIV", fiberType: "MMF", connector: "LC", wavelengths: "850nm" }, + // ── 100G QSFP28 ───────────────────────────────────────────────────────── + { pid: "JNP-QSFP-100G-SR4", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 100, reachLabel: "SR4", fiberType: "MMF", connector: "MPO", wavelengths: "850nm", standard: "100GBASE-SR4" }, + { pid: "JNP-QSFP-100G-LR4", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 10000, reachLabel: "LR4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm", standard: "100GBASE-LR4" }, + { pid: "JNP-QSFP-100G-ER4", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 30000, reachLabel: "ER4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm" }, + { pid: "JNP-QSFP-100G-FR", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 2000, reachLabel: "FR", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "100GBASE-FR" }, + { pid: "JNP-QSFP-100G-DR", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 500, reachLabel: "DR", fiberType: "SMF", connector: "LC", wavelengths: "1310nm", standard: "100GBASE-DR" }, + { pid: "JNP-QSFP-100G-CWDM4",formFactor:"QSFP28",speedGbps: 100, speed: "100G", reachMeters: 2000, reachLabel: "CWDM4", fiberType: "SMF", connector: "LC", wavelengths: "1271-1331nm", standard: "100GBASE-CWDM4" }, + { pid: "JNP-QSFP-100G-PSM4",formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 500, reachLabel: "PSM4", fiberType: "SMF", connector: "MPO", wavelengths: "1310nm" }, + { pid: "JNP-QSFP-100G-SR4-T",formFactor:"QSFP28",speedGbps: 100, speed: "100G", reachMeters: 100, reachLabel: "SR4", fiberType: "MMF", connector: "MPO", wavelengths: "850nm", notes: "Industrial temp" }, + { pid: "JNP-QSFP-100G-AOC-1M",formFactor:"QSFP28",speedGbps:100, speed: "100G", reachMeters: 1, reachLabel: "AOC-1M",fiberType: "MMF", connector: "MPO" }, + { pid: "JNP-QSFP-100G-AOC-3M",formFactor:"QSFP28",speedGbps:100, speed: "100G", reachMeters: 3, reachLabel: "AOC-3M",fiberType: "MMF", connector: "MPO" }, + // ── 100G QSFP28 (EX/QFX series naming) ───────────────────────────────── + { pid: "QSFP-100G-SR4-T2", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 100, reachLabel: "SR4", fiberType: "MMF", connector: "MPO", wavelengths: "850nm" }, + { pid: "QSFP-100G-LR4-T", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 10000, reachLabel: "LR4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm" }, + // ── 200G QSFP56 ───────────────────────────────────────────────────────── + { pid: "JNP-QSFP56-200G-SR4",formFactor:"QSFP56", speedGbps: 200, speed: "200G", reachMeters: 100, reachLabel: "SR4", fiberType: "MMF", connector: "MPO", wavelengths: "850nm" }, + { pid: "JNP-QSFP56-200G-FR4",formFactor:"QSFP56", speedGbps: 200, speed: "200G", reachMeters: 2000, reachLabel: "FR4", fiberType: "SMF", connector: "LC", wavelengths: "1271-1331nm" }, + { pid: "JNP-QSFP56-200G-LR4",formFactor:"QSFP56", speedGbps: 200, speed: "200G", reachMeters: 10000,reachLabel: "LR4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm" }, + // ── 400G QSFP-DD ──────────────────────────────────────────────────────── + { pid: "JNP-QSFPDD-400G-SR8",formFactor:"QSFP-DD",speedGbps: 400, speed: "400G", reachMeters: 100, reachLabel: "SR8", fiberType: "MMF", connector: "MPO", wavelengths: "850nm", standard: "400GBASE-SR8" }, + { pid: "JNP-QSFPDD-400G-DR4",formFactor:"QSFP-DD",speedGbps: 400, speed: "400G", reachMeters: 500, reachLabel: "DR4", fiberType: "SMF", connector: "MPO", wavelengths: "1310nm", standard: "400GBASE-DR4" }, + { pid: "JNP-QSFPDD-400G-FR4",formFactor:"QSFP-DD",speedGbps: 400, speed: "400G", reachMeters: 2000, reachLabel: "FR4", fiberType: "SMF", connector: "LC", wavelengths: "1271-1331nm", standard: "400GBASE-FR4" }, + { pid: "JNP-QSFPDD-400G-LR4",formFactor:"QSFP-DD",speedGbps: 400, speed: "400G", reachMeters: 10000,reachLabel: "LR4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm", standard: "400GBASE-LR4" }, + { pid: "JNP-QSFPDD-400G-LR8",formFactor:"QSFP-DD",speedGbps: 400, speed: "400G", reachMeters: 10000,reachLabel: "LR8", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm", standard: "400GBASE-LR8" }, + { pid: "JNP-QSFPDD-400G-ZR", formFactor:"QSFP-DD",speedGbps: 400, speed: "400G", reachMeters: 80000,reachLabel: "ZR", fiberType: "SMF", connector: "LC", wavelengths: "C-band", standard: "400ZR" }, + { pid: "JNP-QSFPDD-400G-ZRP",formFactor:"QSFP-DD",speedGbps: 400, speed: "400G", reachMeters: 120000,reachLabel:"ZR+", fiberType: "SMF", connector: "LC", wavelengths: "C-band", standard: "OpenZR+" }, + // ── 400G OSFP ─────────────────────────────────────────────────────────── + { pid: "JNP-OSFP-400G-DR4", formFactor:"OSFP", speedGbps: 400, speed: "400G", reachMeters: 500, reachLabel: "DR4", fiberType: "SMF", connector: "MPO", wavelengths: "1310nm", standard: "400GBASE-DR4" }, + { pid: "JNP-OSFP-400G-FR4", formFactor:"OSFP", speedGbps: 400, speed: "400G", reachMeters: 2000, reachLabel: "FR4", fiberType: "SMF", connector: "LC", wavelengths: "1271-1331nm" }, + { pid: "JNP-OSFP-400G-LR4", formFactor:"OSFP", speedGbps: 400, speed: "400G", reachMeters: 10000,reachLabel: "LR4", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm" }, + // ── 800G OSFP ─────────────────────────────────────────────────────────── + { pid: "JNP-OSFP-800G-SR8", formFactor:"OSFP", speedGbps: 800, speed: "800G", reachMeters: 100, reachLabel: "SR8", fiberType: "MMF", connector: "MPO", wavelengths: "850nm", standard: "800GBASE-SR8" }, + { pid: "JNP-OSFP-800G-DR8", formFactor:"OSFP", speedGbps: 800, speed: "800G", reachMeters: 500, reachLabel: "DR8", fiberType: "SMF", connector: "MPO", wavelengths: "1310nm" }, + { pid: "JNP-OSFP-800G-FR8", formFactor:"OSFP", speedGbps: 800, speed: "800G", reachMeters: 2000, reachLabel: "FR8", fiberType: "SMF", connector: "LC", wavelengths: "1271-1331nm" }, + { pid: "JNP-OSFP-800G-LR8", formFactor:"OSFP", speedGbps: 800, speed: "800G", reachMeters: 10000,reachLabel: "LR8", fiberType: "SMF", connector: "LC", wavelengths: "1295-1310nm" }, + // ── DAC / AOC ──────────────────────────────────────────────────────────── + { pid: "SFPP-10G-DAC-1M", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 1, reachLabel: "DAC-1M",fiberType: "DAC", connector: "SFP+" }, + { pid: "SFPP-10G-DAC-3M", formFactor: "SFP+", speedGbps: 10, speed: "10G", reachMeters: 3, reachLabel: "DAC-3M",fiberType: "DAC", connector: "SFP+" }, + { pid: "QSFPP-40G-DAC-1M", formFactor: "QSFP+", speedGbps: 40, speed: "40G", reachMeters: 1, reachLabel: "DAC-1M",fiberType: "DAC", connector: "QSFP+" }, + { pid: "QSFPP-40G-DAC-3M", formFactor: "QSFP+", speedGbps: 40, speed: "40G", reachMeters: 3, reachLabel: "DAC-3M",fiberType: "DAC", connector: "QSFP+" }, + { pid: "JNP-100G-DAC-1M", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 1, reachLabel: "DAC-1M",fiberType: "DAC", connector: "QSFP28" }, + { pid: "JNP-100G-DAC-3M", formFactor: "QSFP28",speedGbps: 100, speed: "100G", reachMeters: 3, reachLabel: "DAC-3M",fiberType: "DAC", connector: "QSFP28" }, + { pid: "JNP-400G-DAC-1M", formFactor: "QSFP-DD",speedGbps:400, speed: "400G", reachMeters: 1, reachLabel: "DAC-1M",fiberType: "DAC", connector: "QSFP-DD" }, + { pid: "JNP-400G-DAC-3M", formFactor: "QSFP-DD",speedGbps:400, speed: "400G", reachMeters: 3, reachLabel: "DAC-3M",fiberType: "DAC", connector: "QSFP-DD" }, +]; + +export async function scrapeJuniperOem(): Promise { + console.log("=== Juniper OEM Transceiver Seed ===\n"); + + const juniperVendorId = await ensureVendor( + "Juniper Networks", + "oem", + "https://www.juniper.net", + undefined + ); + + let inserted = 0; + let updated = 0; + let errors = 0; + + for (const p of JUNIPER_PIDS) { + const slug = `juniper-${p.pid.toLowerCase().replace(/[^a-z0-9]+/g, "-")}`; + try { + const res = await pool.query( + `INSERT INTO transceivers + (slug, part_number, vendor_id, form_factor, speed, speed_gbps, + reach_meters, reach_label, fiber_type, connector, wavelengths, + dom_support, ieee_reference, market_status, category, notes) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,true,$12,'Mainstream','DataCenter',$13) + ON CONFLICT (slug) DO UPDATE SET + speed_gbps = EXCLUDED.speed_gbps, + reach_meters = CASE WHEN EXCLUDED.reach_meters > 0 THEN EXCLUDED.reach_meters ELSE transceivers.reach_meters END, + fiber_type = CASE WHEN EXCLUDED.fiber_type <> '' THEN EXCLUDED.fiber_type ELSE transceivers.fiber_type END, + wavelengths = COALESCE(EXCLUDED.wavelengths, transceivers.wavelengths), + updated_at = NOW() + RETURNING (xmax = 0) as was_inserted`, + [slug, p.pid, juniperVendorId, p.formFactor, p.speed, p.speedGbps, + p.reachMeters, p.reachLabel, p.fiberType, p.connector, + p.wavelengths ?? null, p.standard ?? null, p.notes ?? null] + ); + if (res.rows[0]?.was_inserted) inserted++; else updated++; + } catch (err) { + console.warn(` Skip ${p.pid}: ${(err as Error).message.slice(0, 80)}`); + errors++; + } + } + + console.log(`\n=== Juniper OEM Seed Complete ===`); + console.log(` Inserted: ${inserted}`); + console.log(` Updated: ${updated}`); + console.log(` Errors: ${errors}`); + console.log(` Total PIDs: ${JUNIPER_PIDS.length}\n`); +} + +if (require.main === module) { + scrapeJuniperOem() + .then(() => pool.end()) + .catch((err) => { + console.error("Fatal:", err); + pool.end(); + process.exit(1); + }); +}