/** * SmartOptics Scraper — Premium coherent/DWDM transceiver manufacturer * * smartoptics.com — WordPress site, no prices (B2B, RFQ model). * Scrapes product catalog for specs, images, datasheets. * Products listed at /products/optical-transceivers/ → individual /product/SKU/ pages. */ import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db"; const BASE = "https://smartoptics.com"; const CATALOG_URL = `${BASE}/products/optical-transceivers/`; const HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml", }; function sleep(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)); } function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } { const t = text.toLowerCase(); if (t.includes("qsfp-dd800") || t.includes("sfp-dd800") || t.includes("800ge")) return { formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 }; if (t.includes("qsfp-dd") || (t.includes("400g") && t.includes("qsfp"))) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }; if (t.includes("qsfp112")) return { formFactor: "QSFP112", speed: "400G", speedGbps: 400 }; if (t.includes("qsfp56")) return { formFactor: "QSFP56", speed: "200G", speedGbps: 200 }; if (t.includes("qsfp28") || t.includes("100ge") || t.includes("100g")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 }; if (t.includes("sfp28") || t.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 }; if (t.includes("qsfp+") || t.includes("40g")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 }; if (t.includes("sfp+") || t.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 }; if (t.includes("sfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 }; return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 }; } function detectReach(text: string): { label: string; meters: number } | undefined { const kmMatch = text.match(/(\d+)\s*km/i); if (kmMatch) { const km = parseInt(kmMatch[1]); return { label: `${km}km`, meters: km * 1000 }; } const mMatch = text.match(/(\d+)\s*m\b/i); if (mMatch) { const m = parseInt(mMatch[1]); return { label: `${m}m`, meters: m }; } return undefined; } function detectFiber(text: string): string { if (/dwdm|cwdm|coherent|coh|single.?mode|smf/i.test(text)) return "SMF"; if (/multi.?mode|mmf|sr/i.test(text)) return "MMF"; return "SMF"; // SmartOptics is almost exclusively SMF/coherent } async function fetchPage(url: string): Promise { const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); return resp.text(); } function extractProductUrls(html: string): string[] { const urls = new Set(); const regex = /href="(https?:\/\/smartoptics\.com\/product\/[^"]+)"/gi; let m: RegExpExecArray | null; while ((m = regex.exec(html)) !== null) { const u = m[1].replace(/\/$/, "") + "/"; urls.add(u); } return Array.from(urls); } interface ProductData { sku: string; name: string; url: string; imageUrl?: string; formFactor: string; speed: string; speedGbps: number; reachLabel?: string; reachMeters?: number; fiberType: string; coherent: boolean; wdmType?: string; } async function scrapeProductPage(url: string): Promise { try { const html = await fetchPage(url); const nameMatch = html.match(/]*>([^<]+)<\/h1>/) || html.match(/og:title" content="([^"]+)"/); const name = nameMatch ? nameMatch[1].trim().replace(/ \| Smartoptics$/, "") : ""; if (!name) return null; const sku = url.split("/").filter(Boolean).pop()?.toUpperCase() || name.replace(/\s+/g, "-"); const imgMatch = html.match(/property="og:image" content="([^"]+)"/) || html.match(/]+src="([^"]*wp-content\/uploads[^"]*\.(?:png|jpg|webp))"[^>]* class="[^"]*product/i); const imageUrl = imgMatch ? imgMatch[1] : undefined; const ff = detectFormFactor(name); const reach = detectReach(name); const coherent = /coherent|coh-t|coh\.|dwdm|dp-qpsk|qpsk|cfp2/i.test(name + html.slice(0, 3000)); const wdmType = /dwdm/i.test(name) ? "DWDM" : /cwdm/i.test(name) ? "CWDM" : undefined; return { sku, name, url, imageUrl, ...ff, reachLabel: reach?.label, reachMeters: reach?.meters, fiberType: detectFiber(name), coherent, wdmType, }; } catch (err) { console.warn(` Failed ${url}: ${(err as Error).message}`); return null; } } export async function scrapeSmartOptics(): Promise { console.log("=== SmartOptics Scraper Starting ===\n"); console.log("Note: SmartOptics is B2B — no public prices. Scraping specs + images only.\n"); const vendorId = await ensureVendor( "SmartOptics", "manufacturer", "https://www.smartoptics.com", "https://smartoptics.com/products/optical-transceivers/" ); const productUrls = new Set(); for (let page = 1; page <= 10; page++) { try { const url = page === 1 ? CATALOG_URL : `${CATALOG_URL}page/${page}/`; const html = await fetchPage(url); const urls = extractProductUrls(html); if (urls.length === 0) break; urls.forEach((u) => productUrls.add(u)); console.log(` Catalog page ${page}: ${urls.length} products`); await sleep(1500); } catch { break; } } console.log(`\nTotal product URLs: ${productUrls.size}`); if (productUrls.size === 0) { console.log("No products found — site may have changed structure"); return; } let saved = 0; let withImages = 0; for (const url of productUrls) { const product = await scrapeProductPage(url); if (!product) continue; try { await findOrCreateScrapedTransceiver({ partNumber: product.sku, vendorId, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, reachMeters: product.reachMeters, reachLabel: product.reachLabel, fiberType: product.fiberType, wavelengths: product.wdmType ? "DWDM-tunable" : undefined, category: product.coherent ? "Coherent" : "DataCenter", imageUrl: product.imageUrl, }); saved++; if (product.imageUrl) withImages++; console.log(` ✓ ${product.sku} — ${product.name.slice(0, 60)}`); } catch (err) { console.warn(` Error saving ${product.sku}: ${(err as Error).message.slice(0, 80)}`); } await sleep(1500); } console.log(`\n=== SmartOptics Complete: ${saved} products, ${withImages} with images ===`); } if (require.main === module) { scrapeSmartOptics() .then(() => pool.end()) .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }