- 16 commercial scrapers: replace TIP-Bot/1.0 with Chrome/120 UA (GBICS confirmed returning 0 bytes for bot UA, Chrome UA returns 200KB) - gbics.ts: fix User-Agent (was returning empty HTML, now returns products) - optictransceiver.ts: disable — domain repurposed as plant shop (2026-04-06) Alocasia Regal Shield is not a transceiver.
109 lines
4.2 KiB
TypeScript
109 lines
4.2 KiB
TypeScript
/**
|
|
* Router-Switch.com Scraper
|
|
*
|
|
* Massive catalog of Cisco/Arista/Juniper/HP transceivers including:
|
|
* CSFP (GLC-BX-D/U), GBIC (WS-G5484), XENPAK, CFP, XFP, legacy SFP
|
|
* Cheerio-friendly category pages, good price transparency.
|
|
*
|
|
* Schedule: every 8h
|
|
*/
|
|
import * as cheerio from "cheerio";
|
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
|
|
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
|
|
import { logger } from "../utils/logger";
|
|
|
|
const BASE = "https://www.router-switch.com";
|
|
|
|
const CATEGORIES: Array<{ url: string; form_factor: string }> = [
|
|
{ url: "/sfp-modules.html", form_factor: "SFP" },
|
|
{ url: "/sfp-plus.html", form_factor: "SFP+" },
|
|
{ url: "/sfp28.html", form_factor: "SFP28" },
|
|
{ url: "/qsfp-plus.html", form_factor: "QSFP+" },
|
|
{ url: "/qsfp28.html", form_factor: "QSFP28" },
|
|
{ url: "/qsfp-dd.html", form_factor: "QSFP-DD" },
|
|
{ url: "/osfp.html", form_factor: "OSFP" },
|
|
{ url: "/xfp.html", form_factor: "XFP" },
|
|
{ url: "/csfp.html", form_factor: "CSFP" },
|
|
{ url: "/cfp.html", form_factor: "CFP" },
|
|
{ url: "/cfp2.html", form_factor: "CFP2" },
|
|
{ url: "/gbic-transceiver.html", form_factor: "GBIC" },
|
|
{ url: "/xenpak.html", form_factor: "XENPAK" },
|
|
{ url: "/cxp-transceiver.html", form_factor: "CXP" },
|
|
];
|
|
|
|
async function fetchPage(catUrl: string, form_factor: string, vendorId: string, page = 1): Promise<number> {
|
|
const sep = catUrl.includes("?") ? "&" : "?";
|
|
const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`;
|
|
const resp = await fetch(url, {
|
|
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
|
|
signal: AbortSignal.timeout(30_000),
|
|
});
|
|
if (!resp.ok) return 0;
|
|
const html = await resp.text();
|
|
const $ = cheerio.load(html);
|
|
let count = 0;
|
|
|
|
const items = $(".products-grid .item, .product-item, li.item");
|
|
for (let i = 0; i < items.length; i++) {
|
|
const $el = $(items[i]);
|
|
const name = $el.find(".product-name a, h2.product-name, .product-name").first().text().trim();
|
|
const priceText = $el.find(".price, .regular-price, .special-price").first().text().trim();
|
|
const href = $el.find("a[href]").first().attr("href") || "";
|
|
if (!name || !priceText) continue;
|
|
|
|
// Extract part number from name or URL
|
|
const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/);
|
|
if (!partMatch) continue;
|
|
const partNumber = partMatch[1].toUpperCase();
|
|
const { price, currency } = parsePrice(priceText);
|
|
if (price <= 0) continue;
|
|
|
|
const stockText = $el.find(".availability span, .stock").text().trim();
|
|
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
|
|
|
|
try {
|
|
const t = await findOrCreateScrapedTransceiver({
|
|
partNumber,
|
|
vendorId,
|
|
formFactor: form_factor,
|
|
name,
|
|
url: productUrl,
|
|
});
|
|
await upsertPriceObservation({
|
|
transceiverId: t.id,
|
|
sourceVendorId: vendorId,
|
|
price,
|
|
currency: currency || "USD",
|
|
stockLevel: parseStockLevel(stockText),
|
|
url: productUrl,
|
|
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
|
|
});
|
|
count++;
|
|
} catch { /* skip */ }
|
|
}
|
|
|
|
// Paginate up to 10 pages
|
|
const hasNext = $("a.next, .pages a:contains('Next')").length > 0;
|
|
if (hasNext && count > 0 && page < 10) {
|
|
count += await fetchPage(catUrl, form_factor, vendorId, page + 1);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
export async function scrapeRouterSwitch(): Promise<void> {
|
|
logger.info("Router-Switch.com scraper starting");
|
|
const vendorId = await ensureVendor("Router-Switch.com", "https://www.router-switch.com");
|
|
let total = 0;
|
|
|
|
for (const cat of CATEGORIES) {
|
|
try {
|
|
const n = await fetchPage(cat.url, cat.form_factor, vendorId);
|
|
if (n > 0) logger.info(`Router-Switch ${cat.form_factor}: ${n} products`);
|
|
total += n;
|
|
} catch (e) {
|
|
logger.warn(`Router-Switch ${cat.form_factor} failed`, { err: e });
|
|
}
|
|
}
|
|
logger.info(`Router-Switch done — ${total} total`);
|
|
}
|