Rene Fichtmueller 2e852e0a2f fix(scrapers): replace bot User-Agents with Chrome UA + disable dead domain
- 16 commercial scrapers: replace TIP-Bot/1.0 with Chrome/120 UA
  (GBICS confirmed returning 0 bytes for bot UA, Chrome UA returns 200KB)
- gbics.ts: fix User-Agent (was returning empty HTML, now returns products)
- optictransceiver.ts: disable — domain repurposed as plant shop (2026-04-06)
  Alocasia Regal Shield is not a transceiver.
2026-04-06 02:17:50 +02:00

109 lines
4.2 KiB
TypeScript

/**
* Router-Switch.com Scraper
*
* Massive catalog of Cisco/Arista/Juniper/HP transceivers including:
* CSFP (GLC-BX-D/U), GBIC (WS-G5484), XENPAK, CFP, XFP, legacy SFP
* Cheerio-friendly category pages, good price transparency.
*
* Schedule: every 8h
*/
import * as cheerio from "cheerio";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
import { logger } from "../utils/logger";
const BASE = "https://www.router-switch.com";
const CATEGORIES: Array<{ url: string; form_factor: string }> = [
{ url: "/sfp-modules.html", form_factor: "SFP" },
{ url: "/sfp-plus.html", form_factor: "SFP+" },
{ url: "/sfp28.html", form_factor: "SFP28" },
{ url: "/qsfp-plus.html", form_factor: "QSFP+" },
{ url: "/qsfp28.html", form_factor: "QSFP28" },
{ url: "/qsfp-dd.html", form_factor: "QSFP-DD" },
{ url: "/osfp.html", form_factor: "OSFP" },
{ url: "/xfp.html", form_factor: "XFP" },
{ url: "/csfp.html", form_factor: "CSFP" },
{ url: "/cfp.html", form_factor: "CFP" },
{ url: "/cfp2.html", form_factor: "CFP2" },
{ url: "/gbic-transceiver.html", form_factor: "GBIC" },
{ url: "/xenpak.html", form_factor: "XENPAK" },
{ url: "/cxp-transceiver.html", form_factor: "CXP" },
];
async function fetchPage(catUrl: string, form_factor: string, vendorId: string, page = 1): Promise<number> {
const sep = catUrl.includes("?") ? "&" : "?";
const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`;
const resp = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
signal: AbortSignal.timeout(30_000),
});
if (!resp.ok) return 0;
const html = await resp.text();
const $ = cheerio.load(html);
let count = 0;
const items = $(".products-grid .item, .product-item, li.item");
for (let i = 0; i < items.length; i++) {
const $el = $(items[i]);
const name = $el.find(".product-name a, h2.product-name, .product-name").first().text().trim();
const priceText = $el.find(".price, .regular-price, .special-price").first().text().trim();
const href = $el.find("a[href]").first().attr("href") || "";
if (!name || !priceText) continue;
// Extract part number from name or URL
const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/);
if (!partMatch) continue;
const partNumber = partMatch[1].toUpperCase();
const { price, currency } = parsePrice(priceText);
if (price <= 0) continue;
const stockText = $el.find(".availability span, .stock").text().trim();
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
try {
const t = await findOrCreateScrapedTransceiver({
partNumber,
vendorId,
formFactor: form_factor,
name,
url: productUrl,
});
await upsertPriceObservation({
transceiverId: t.id,
sourceVendorId: vendorId,
price,
currency: currency || "USD",
stockLevel: parseStockLevel(stockText),
url: productUrl,
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
});
count++;
} catch { /* skip */ }
}
// Paginate up to 10 pages
const hasNext = $("a.next, .pages a:contains('Next')").length > 0;
if (hasNext && count > 0 && page < 10) {
count += await fetchPage(catUrl, form_factor, vendorId, page + 1);
}
return count;
}
export async function scrapeRouterSwitch(): Promise<void> {
logger.info("Router-Switch.com scraper starting");
const vendorId = await ensureVendor("Router-Switch.com", "https://www.router-switch.com");
let total = 0;
for (const cat of CATEGORIES) {
try {
const n = await fetchPage(cat.url, cat.form_factor, vendorId);
if (n > 0) logger.info(`Router-Switch ${cat.form_factor}: ${n} products`);
total += n;
} catch (e) {
logger.warn(`Router-Switch ${cat.form_factor} failed`, { err: e });
}
}
logger.info(`Router-Switch done — ${total} total`);
}