/** * Ascent Optics Scraper — Chinese OEM transceiver manufacturer * * ascentoptics.com — product catalog loaded via JSON API endpoint. * Products are served via /product-list?is_render=1&category_id=CID * (HTML table in JSON response). No retail pricing — "Get Quote" model. * Category IDs are discovered from data-cid attributes on sub-category pages. * * Rate limited: 1 req/2sec. */ import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db"; import * as cheerio from "cheerio"; const BASE = "https://ascentoptics.com"; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml,application/json,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", "X-Requested-With": "XMLHttpRequest", Referer: "https://ascentoptics.com/optical-transceivers/", }; // Known transceiver categories: slug → { category_id, formFactor, speedGbps, speed } // DAC/AOC/DCO/LPO excluded — transceivers only const CATEGORIES = [ { slug: "/10g-sfp/", categoryId: 33, formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { slug: "/10g-xfp/", categoryId: 34, formFactor: "XFP", speed: "10G", speedGbps: 10 }, { slug: "/25g-sfp28/", categoryId: 22, formFactor: "SFP28", speed: "25G", speedGbps: 25 }, { slug: "/40g-qsfp/", categoryId: 20, formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, { slug: "/100g-qsfp28/", categoryId: 15, formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, { slug: "/100g-sfp112/", categoryId: 0, formFactor: "SFP112", speed: "100G", speedGbps: 100 }, { slug: "/200g-qsfp56/", categoryId: 3, formFactor: "QSFP56", speed: "200G", speedGbps: 200 }, { slug: "/200g-qsfp28-dd/", categoryId: 4, formFactor: "QSFP-DD", speed: "200G", speedGbps: 200 }, { slug: "/400g-qsfp56-dd/", categoryId: 5, formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, { slug: "/400g-osfp/", categoryId: 6, formFactor: "OSFP", speed: "400G", speedGbps: 400 }, { slug: "/400g-qsfp112/", categoryId: 7, formFactor: "QSFP112", speed: "400G", speedGbps: 400 }, { slug: "/800g-osfp/", categoryId: 9, formFactor: "OSFP", speed: "800G", speedGbps: 800 }, { slug: "/800g-qsfp-dd800-200g-per-line/", categoryId: 121, formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 }, { slug: "/800g-qsfp112-dd/", categoryId: 0, formFactor: "QSFP112", speed: "800G", speedGbps: 800 }, { slug: "/50g-sfp56/", categoryId: 0, formFactor: "SFP56", speed: "50G", speedGbps: 50 }, { slug: "/16g-sfp/", categoryId: 0, formFactor: "SFP", speed: "16G", speedGbps: 16 }, ]; interface Product { partNumber: string; name: string; url: string; formFactor: string; speed: string; speedGbps: number; reachLabel?: string; reachMeters?: number; fiberType?: string; wavelength?: string; } function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } function detectReach(text: string): { label: string; meters: number } | undefined { const t = text.toUpperCase(); const patterns: [RegExp, string, number][] = [ [/\b120\s*KM\b/, "120km", 120000], [/\b80\s*KM\b/, "80km", 80000], [/\b70\s*KM\b/, "70km", 70000], [/\b60\s*KM\b/, "60km", 60000], [/\b40\s*KM\b/, "40km", 40000], [/\b20\s*KM\b/, "20km", 20000], [/\b10\s*KM\b/, "10km", 10000], [/\b5\s*KM\b/, "5km", 5000], [/\b2\s*KM\b/, "2km", 2000], [/\b550\s*M\b/, "550m", 550], [/\b500\s*M\b/, "500m", 500], [/\b300\s*M\b/, "300m", 300], [/\b220\s*M\b/, "220m", 220], [/\b100\s*M\b/, "100m", 100], [/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000], [/\bER\b/, "40km", 40000], [/\bZR\b/, "80km", 80000], [/\bSR\b/, "300m", 300], [/\bDR\b/, "500m", 500], [/\bFR\b/, "2km", 2000], ]; for (const [regex, label, meters] of patterns) { if (regex.test(t)) return { label, meters }; } return undefined; } function detectFiber(text: string): string { if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper"; return "SMF"; } function detectWavelength(text: string): string { const match = text.match(/(\d{3,4})\s*nm/i); return match ? match[1] : ""; } /** Fetch category ID from a sub-page's data-cid attribute (fallback for categoryId=0 entries) */ async function fetchCategoryId(slug: string): Promise { try { const resp = await fetch(BASE + slug, { headers: { ...HEADERS, "X-Requested-With": "" }, signal: AbortSignal.timeout(15000), }); if (!resp.ok) return 0; const html = await resp.text(); const m = html.match(/data-cid="(\d+)"/); return m ? parseInt(m[1]) : 0; } catch { return 0; } } /** Fetch product table HTML for a category via the JSON API */ async function fetchProductTable(categoryId: number, slug: string): Promise { const url = `${BASE}/product-list?is_render=1&category_id=${categoryId}`; const resp = await fetch(url, { headers: { ...HEADERS, Referer: BASE + slug }, signal: AbortSignal.timeout(30000), }); if (!resp.ok) throw new Error(`HTTP ${resp.status} for /product-list?category_id=${categoryId}`); const data = await resp.json() as { product_table_list?: string }; return data.product_table_list ?? ""; } /** Parse HTML table returned by /product-list API */ function parseProductTable( tableHtml: string, cat: typeof CATEGORIES[number], ): Product[] { const $ = cheerio.load(tableHtml); const products: Product[] = []; $("tr").each((_i, row) => { const cells = $(row).find("td"); if (cells.length < 3) return; // Column layout: [image] [part_number] [description] [data_rate] [distance] [wavelength] [connector] [datasheet] [quote] const partNumberCell = $(cells[1]); const descCell = $(cells[2]); const rawPart = partNumberCell.text().trim(); const desc = descCell.text().trim(); // Skip header rows and non-product rows if (!rawPart || rawPart.length < 3 || /part\s*no|description/i.test(rawPart)) return; if (rawPart.length > 80) return; const url = (() => { const a = partNumberCell.find("a[href]").first().attr("href") ?? descCell.find("a[href]").first().attr("href"); if (!a) return BASE + cat.slug; return a.startsWith("http") ? a : BASE + a; })(); const combined = `${rawPart} ${desc}`; const reach = detectReach(combined); products.push({ partNumber: rawPart, name: desc || rawPart, url, formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, reachLabel: reach?.label, reachMeters: reach?.meters, fiberType: detectFiber(combined), wavelength: detectWavelength(combined), }); }); return products; } export async function scrapeAscentOptics(): Promise { console.log("=== Ascent Optics Scraper Starting ===\n"); const vendorId = await ensureVendor( "Ascent Optics", "compatible", "https://ascentoptics.com", BASE + "/optical-transceivers/", ); let totalProducts = 0; for (const cat of CATEGORIES) { let cid = cat.categoryId; // Resolve unknown category IDs dynamically if (cid === 0) { cid = await fetchCategoryId(cat.slug); if (cid === 0) { console.log(` Skipping ${cat.slug} — category ID not found`); await sleep(1000); continue; } } console.log(`\n--- ${cat.formFactor} (${cat.speed}) [cid=${cid}] ---`); try { const tableHtml = await fetchProductTable(cid, cat.slug); const products = parseProductTable(tableHtml, cat); console.log(` Found ${products.length} products`); for (const product of products) { try { await findOrCreateScrapedTransceiver({ partNumber: product.partNumber, vendorId, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, reachMeters: product.reachMeters, reachLabel: product.reachLabel, fiberType: product.fiberType, wavelengths: product.wavelength, category: "DataCenter", }); totalProducts++; } catch (err) { console.warn(` Error: ${(err as Error).message.slice(0, 80)}`); } } } catch (err) { console.error(` Category failed: ${(err as Error).message}`); } await sleep(2000); } console.log(`\n=== Ascent Optics Complete: ${totalProducts} products (catalog only — no pricing) ===`); } if (require.main === module) { scrapeAscentOptics() .then(() => pool.end()) .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }