/** * Comms Express Scraper — comms-express.com * * UK-based reseller with excellent coverage of: * CFP, CFP2, CFP4, CFP2-DCO, CSFP, SFP-DD, QSFP112, OSFP, legacy GBIC/XENPAK/CXP * Good pricing transparency, cheerio-friendly. * * Schedule: every 8h */ import * as cheerio from "cheerio"; import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db"; import { contentHash, parsePrice, parseStockLevel } from "../utils/hash"; import { logger } from "../utils/logger"; const BASE = "https://www.comms-express.com"; // Categories mapped to form_factor const CATEGORIES: Array<{ url: string; form_factor: string; speed_hint?: string }> = [ { url: "/transceivers/sfp-modules/", form_factor: "SFP" }, { url: "/transceivers/sfp-plus-modules/", form_factor: "SFP+" }, { url: "/transceivers/sfp28-modules/", form_factor: "SFP28" }, { url: "/transceivers/sfp56-modules/", form_factor: "SFP56" }, { url: "/transceivers/sfp-dd-modules/", form_factor: "SFP-DD" }, { url: "/transceivers/qsfp-modules/", form_factor: "QSFP+" }, { url: "/transceivers/qsfp28-modules/", form_factor: "QSFP28" }, { url: "/transceivers/qsfp56-modules/", form_factor: "QSFP56" }, { url: "/transceivers/qsfp-dd-modules/", form_factor: "QSFP-DD" }, { url: "/transceivers/qsfp-dd-800-modules/", form_factor: "QSFP-DD800" }, { url: "/transceivers/qsfp112-modules/", form_factor: "QSFP112" }, { url: "/transceivers/osfp-modules/", form_factor: "OSFP" }, { url: "/transceivers/osfp112-modules/", form_factor: "OSFP112" }, { url: "/transceivers/cfp-modules/", form_factor: "CFP" }, { url: "/transceivers/cfp2-modules/", form_factor: "CFP2" }, { url: "/transceivers/cfp2-dco-modules/", form_factor: "CFP2-DCO" }, { url: "/transceivers/cfp4-modules/", form_factor: "CFP4" }, { url: "/transceivers/csfp-modules/", form_factor: "CSFP" }, { url: "/transceivers/xfp-modules/", form_factor: "XFP" }, { url: "/transceivers/cxp-modules/", form_factor: "CXP" }, { url: "/transceivers/gbic-modules/", form_factor: "GBIC" }, { url: "/transceivers/xenpak-modules/", form_factor: "XENPAK" }, ]; interface Product { partNumber: string; name: string; price: number; currency: string; stock: string; url: string; formFactor: string; } async function fetchCategory(cat: typeof CATEGORIES[0], vendorId: string, page = 1): Promise { const url = `${BASE}${cat.url}?page=${page}`; const resp = await fetch(url, { headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0; +https://tip.context-x.org)" }, signal: AbortSignal.timeout(30_000), }); if (!resp.ok) return []; const html = await resp.text(); const $ = cheerio.load(html); const products: Product[] = []; $(".product-item, .product-list-item, [data-product-id]").each((_, el) => { const $el = $(el); const name = $el.find(".product-name, .product-title, h3, h4").first().text().trim(); const priceText = $el.find(".price, .product-price, [class*=price]").first().text().trim(); const href = $el.find("a[href*='/transceivers/']").first().attr("href") || ""; const partMatch = name.match(/[A-Z0-9]{2,4}-[A-Z0-9][A-Z0-9\-\.]+/) || href.match(/\/([A-Z0-9\-]{6,30})\/?$/i); const partNumber = partMatch ? partMatch[1] || partMatch[0] : name.substring(0, 40); const { price, currency } = parsePrice(priceText); const stockText = $el.find(".stock, .availability, [class*=stock]").first().text().trim(); if (!partNumber || price <= 0) return; products.push({ partNumber: partNumber.toUpperCase(), name, price, currency: currency || "GBP", stock: parseStockLevel(stockText), url: href.startsWith("http") ? href : `${BASE}${href}`, formFactor: cat.form_factor, }); }); // Check for next page const hasNext = $(".pagination .next, a[rel=next]").length > 0; if (hasNext && products.length > 0 && page < 20) { const next = await fetchCategory(cat, vendorId, page + 1); products.push(...next); } return products; } export async function scrapeCommsExpress(): Promise { logger.info("Comms Express scraper starting"); const vendorId = await ensureVendor("Comms Express", "https://www.comms-express.com"); let total = 0; let newItems = 0; for (const cat of CATEGORIES) { try { const products = await fetchCategory(cat, vendorId); for (const p of products) { const transceiverResult = await findOrCreateScrapedTransceiver({ partNumber: p.partNumber, vendorId, formFactor: p.formFactor, name: p.name, url: p.url, }); const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`); const isNew = await upsertPriceObservation({ transceiverId: transceiverResult.id, sourceVendorId: vendorId, price: p.price, currency: p.currency, stockLevel: p.stock, url: p.url, contentHash: hash, }); if (isNew) newItems++; total++; } if (products.length > 0) { logger.info(`Comms Express ${cat.form_factor}: ${products.length} products`); } } catch (e) { logger.warn(`Comms Express ${cat.form_factor} failed`, { err: e }); } } logger.info(`Comms Express done — ${total} total, ${newItems} new`); }