/** * Router-Switch.com Scraper * * Massive catalog of Cisco/Arista/Juniper/HP transceivers including: * CSFP (GLC-BX-D/U), GBIC (WS-G5484), XENPAK, CFP, XFP, legacy SFP * Cheerio-friendly category pages, good price transparency. * * Schedule: every 8h */ import * as cheerio from "cheerio"; import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db"; import { contentHash, parsePrice, parseStockLevel } from "../utils/hash"; import { logger } from "../utils/logger"; const BASE = "https://www.router-switch.com"; const CATEGORIES: Array<{ url: string; form_factor: string }> = [ { url: "/sfp-modules.html", form_factor: "SFP" }, { url: "/sfp-plus.html", form_factor: "SFP+" }, { url: "/sfp28.html", form_factor: "SFP28" }, { url: "/qsfp-plus.html", form_factor: "QSFP+" }, { url: "/qsfp28.html", form_factor: "QSFP28" }, { url: "/qsfp-dd.html", form_factor: "QSFP-DD" }, { url: "/osfp.html", form_factor: "OSFP" }, { url: "/xfp.html", form_factor: "XFP" }, { url: "/csfp.html", form_factor: "CSFP" }, { url: "/cfp.html", form_factor: "CFP" }, { url: "/cfp2.html", form_factor: "CFP2" }, { url: "/gbic-transceiver.html", form_factor: "GBIC" }, { url: "/xenpak.html", form_factor: "XENPAK" }, { url: "/cxp-transceiver.html", form_factor: "CXP" }, ]; async function fetchPage(catUrl: string, form_factor: string, vendorId: string, page = 1): Promise { const sep = catUrl.includes("?") ? "&" : "?"; const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`; const resp = await fetch(url, { headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" }, signal: AbortSignal.timeout(30_000), }); if (!resp.ok) return 0; const html = await resp.text(); const $ = cheerio.load(html); let count = 0; const items = $(".products-grid .item, .product-item, li.item"); for (let i = 0; i < items.length; i++) { const $el = $(items[i]); const name = $el.find(".product-name a, h2.product-name, .product-name").first().text().trim(); const priceText = $el.find(".price, .regular-price, .special-price").first().text().trim(); const href = $el.find("a[href]").first().attr("href") || ""; if (!name || !priceText) continue; // Extract part number from name or URL const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/); if (!partMatch) continue; const partNumber = partMatch[1].toUpperCase(); const { price, currency } = parsePrice(priceText); if (price <= 0) continue; const stockText = $el.find(".availability span, .stock").text().trim(); const productUrl = href.startsWith("http") ? href : `${BASE}${href}`; try { const t = await findOrCreateScrapedTransceiver({ partNumber, vendorId, formFactor: form_factor, name, url: productUrl, }); await upsertPriceObservation({ transceiverId: t.id, sourceVendorId: vendorId, price, currency: currency || "USD", stockLevel: parseStockLevel(stockText), url: productUrl, contentHash: contentHash(`${partNumber}:${price}:${currency}`), }); count++; } catch { /* skip */ } } // Paginate up to 10 pages const hasNext = $("a.next, .pages a:contains('Next')").length > 0; if (hasNext && count > 0 && page < 10) { count += await fetchPage(catUrl, form_factor, vendorId, page + 1); } return count; } export async function scrapeRouterSwitch(): Promise { logger.info("Router-Switch.com scraper starting"); const vendorId = await ensureVendor("Router-Switch.com", "https://www.router-switch.com"); let total = 0; for (const cat of CATEGORIES) { try { const n = await fetchPage(cat.url, cat.form_factor, vendorId); if (n > 0) logger.info(`Router-Switch ${cat.form_factor}: ${n} products`); total += n; } catch (e) { logger.warn(`Router-Switch ${cat.form_factor} failed`, { err: e }); } } logger.info(`Router-Switch done — ${total} total`); }