/** * Eoptolink Manufacturer Catalog Scraper * * Source: www.eoptolink.com — One of China's top-3 optical transceiver OEMs. * (Finisar competitor, supplies tier-1 cloud hyperscalers) * Target: Discover all product families + part numbers, seed transceivers table * as manufacturer=Eoptolink entries. * * Strategy: * Phase 1: Fetch homepage → extract all /product-solutions/* category URLs (≈90) * Phase 2: Fetch each category page → parse product name + Eoptolink part numbers * (format: E[A-Z]{2,5}-\d{2,4}[A-Z0-9-]*) * * Note: Eoptolink does NOT publish retail prices (B2B OEM manufacturer). * This scraper adds manufacturer catalog entries — no price_observations. * * Rate limit: 1 req/2s — polite crawl of OEM's website. */ import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db"; const BASE = "https://www.eoptolink.com"; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", }; const DELAY_MS = 2000; function sleep(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)); } async function fetchHtml(url: string): Promise { const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20_000) }); if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); return resp.text(); } // ── Classification helpers ────────────────────────────────────────────────── function speedFromSlug(slug: string): { speed: string; speedGbps: number } { if (/\b1\.?6t\b/i.test(slug)) return { speed: "1.6T", speedGbps: 1600 }; if (/\b800g\b/i.test(slug)) return { speed: "800G", speedGbps: 800 }; if (/\b400g\b/i.test(slug)) return { speed: "400G", speedGbps: 400 }; if (/\b200g\b/i.test(slug)) return { speed: "200G", speedGbps: 200 }; if (/\b100g\b/i.test(slug)) return { speed: "100G", speedGbps: 100 }; if (/\b50g\b/i.test(slug)) return { speed: "50G", speedGbps: 50 }; if (/\b40g\b/i.test(slug)) return { speed: "40G", speedGbps: 40 }; if (/\b32g\b/i.test(slug)) return { speed: "32G", speedGbps: 32 }; if (/\b25g\b/i.test(slug)) return { speed: "25G", speedGbps: 25 }; if (/\b16g\b/i.test(slug)) return { speed: "16G", speedGbps: 16 }; if (/\b10g\b/i.test(slug)) return { speed: "10G", speedGbps: 10 }; if (/\b8g\b/i.test(slug)) return { speed: "8G", speedGbps: 8 }; if (/\b4g\b/i.test(slug)) return { speed: "4G", speedGbps: 4 }; if (/\b1g\b/i.test(slug)) return { speed: "1G", speedGbps: 1 }; return { speed: "Unknown", speedGbps: 0 }; } function formFactorFromText(text: string): string { const t = text.toUpperCase(); if (/\bOSFP\b/.test(t)) return "OSFP"; if (/\bQSFP.?DD800\b|\bQSFP-DD800\b/.test(t)) return "QSFP-DD800"; if (/\bQSFP.?DD\b/.test(t)) return "QSFP-DD"; if (/\bQSFP56\b/.test(t)) return "QSFP56"; if (/\bQSFP112\b/.test(t)) return "QSFP112"; if (/\bQSFP28\b/.test(t)) return "QSFP28"; if (/\bQSFP\+|\bQSFP PLUS\b/.test(t)) return "QSFP+"; if (/\bSFP56.DD\b/.test(t)) return "SFP56-DD"; if (/\bSFP56\b/.test(t)) return "SFP56"; if (/\bSFP28\b/.test(t)) return "SFP28"; if (/\bSFP\+|SFP-PLUS|SFP PLUS\b/.test(t)) return "SFP+"; if (/\bXFP\b/.test(t)) return "XFP"; if (/\bCFP4\b/.test(t)) return "CFP4"; if (/\bCFP2\b/.test(t)) return "CFP2"; if (/\bCFP\b/.test(t)) return "CFP"; if (/\bSFP\b/.test(t)) return "SFP"; return "SFP"; } function fiberFromText(text: string): string { const t = text.toLowerCase(); if (/multimode|mmf|sr|om[1-5]/i.test(t)) return "MMF"; if (/single.?mode|smf|lr|er|zr|fr|dr|bidi|cwdm|dwdm|coherent/i.test(t)) return "SMF"; return "SMF"; // OEM products default to SMF } function categoryFromText(text: string): string { const t = text.toLowerCase(); if (/coherent|zr|dpsk/.test(t)) return "Coherent"; if (/dwdm/.test(t)) return "DWDM"; if (/cwdm/.test(t)) return "CWDM"; if (/aoc/.test(t)) return "AOC"; if (/dac/.test(t)) return "DAC"; if (/pon|gpon|gepon/.test(t)) return "PON"; return "DataCenter"; } // ── Phase 1: Discover product solution URLs ────────────────────────────────── async function fetchProductSolutionUrls(): Promise { console.log(` Fetching Eoptolink homepage for product solution links...`); const html = await fetchHtml(`${BASE}/`); const links = html.match(/href="(\/product-solutions\/[^"#?]+)"/gi) ?? []; const unique = [...new Set(links.map((l) => l.match(/href="([^"]+)"/)?.[1] ?? "").filter(Boolean))]; // Skip OSA (optical sub-assemblies) and test-board entries — no transceiver catalog const filtered = unique.filter((u) => !u.includes("/osa/") && !u.includes("/other/") && !u.endsWith("/400g/") && !u.endsWith("/800g/") && !u.endsWith("/product-solutions/") ); console.log(` Found ${filtered.length} product solution pages`); return filtered; } // ── Phase 2: Parse product detail page ────────────────────────────────────── interface EoptolinkProduct { pageTitle: string; partNumbers: string[]; speed: string; speedGbps: number; formFactor: string; fiberType: string; category: string; pageUrl: string; } function parseProductPage(html: string, pageUrl: string): EoptolinkProduct | null { // Page title const titleMatch = html.match(/([^<]+)/i) || html.match(/<h1[^>]*>([^<]{5,80})</i); const pageTitle = (titleMatch?.[1] ?? "").replace(/\s*\|.*$/, "").replace(/[||]+[^||]*$/, "").trim(); if (!pageTitle || pageTitle.length < 3) return null; // Eoptolink part numbers: format like EOLO-168HG-10-XDX, EOLQ-128HG-02-PX const pnRegex = /E[A-Z]{2,5}-\d{2,3}[A-Z0-9]{1,3}(?:-\d{1,3})?(?:-[A-Z0-9]{1,6})*/g; const partNumbers = [...new Set([...(html.matchAll(pnRegex) ?? [])].map((m) => m[0].trim()))]; const slug = pageUrl.split("/").slice(-2).join("-"); const { speed, speedGbps } = speedFromSlug(slug + " " + pageTitle); const formFactor = formFactorFromText(pageTitle + " " + slug); const fiberType = fiberFromText(pageTitle + " " + slug); const category = categoryFromText(pageTitle + " " + slug); return { pageTitle, partNumbers, speed, speedGbps, formFactor, fiberType, category, pageUrl }; } // ── Main ──────────────────────────────────────────────────────────────────── export async function scrapeEoptolink(): Promise<void> { console.log("=== Eoptolink Manufacturer Catalog Scraper ===\n"); const vendorId = await ensureVendor( "Eoptolink", "manufacturer", "https://www.eoptolink.com", "https://www.eoptolink.com/product-solutions/" ); console.log(` Vendor ID: ${vendorId}`); // Phase 1: Collect product solution URLs let productUrls: string[]; try { productUrls = await fetchProductSolutionUrls(); } catch (err) { console.error(` Homepage fetch failed: ${(err as Error).message}`); return; } console.log(`\n[Phase 2] Fetching ${productUrls.length} product detail pages...\n`); let added = 0; let skipped = 0; let errors = 0; for (const relPath of productUrls) { await sleep(DELAY_MS); const url = `${BASE}${relPath}`; try { const html = await fetchHtml(url); const product = parseProductPage(html, relPath); if (!product || product.speedGbps === 0) { skipped++; continue; } // Use page title as the primary product entry; also seed one row per part number const namesToSeed: string[] = product.partNumbers.length > 0 ? product.partNumbers.slice(0, 10) // max 10 part numbers per product family page : [product.pageTitle]; for (const partNumber of namesToSeed) { try { await findOrCreateScrapedTransceiver({ partNumber: partNumber.slice(0, 80), vendorId, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, fiberType: product.fiberType, category: product.category, }); added++; } catch (dbErr) { // Duplicate or constraint error — expected for re-runs } } console.log( ` ✓ ${product.pageTitle.padEnd(45)} ff=${product.formFactor.padEnd(8)} speed=${product.speed.padEnd(5)} pn=${product.partNumbers.length}` ); } catch (err: unknown) { errors++; if (errors <= 10) console.warn(` ✗ Error ${relPath}: ${(err as Error).message.slice(0, 60)}`); } } console.log(`\n=== Eoptolink Catalog Scraper Complete ===`); console.log(` Pages processed: ${productUrls.length - errors}`); console.log(` Transceivers seeded: ${added}`); console.log(` Skipped (no speed): ${skipped}`); console.log(` Errors: ${errors}`); } // ── CLI ──────────────────────────────────────────────────────────────────── if (require.main === module) { scrapeEoptolink() .then(() => pool.end()) .catch((err: unknown) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }