Rene Fichtmueller 45c48755e4 feat(scraper): add NADDOD/QSFPTEK/AddOn to scheduler, fix pre-existing TS build errors
- Register scrape:pricing:naddod (48 */2), qsfptek (52 */2), addon (55 */2) in pg-boss
- Add boss.work() handlers for all three (fetch-based, run on Erik)
- Fix findOrCreateScrapedTransceiver callers: remove invalid `name`/`url` params,
  fix `t.id` → `t` (function already returns string ID)
- Fix ebay-enricher: remove invalid `extractType` option, use extraction.standard_name
  instead of non-existent `.description`, fix cheerio type incompatibility
- Fix community-issues: description → summary, publishedDate → published_at
- Startup zombie cleanup already deployed (index.ts) — no changes needed
- ProLabs rewritten to fetch-based catalog scraper (no Playwright, bypasses WAF)
2026-04-11 03:17:33 +02:00

107 lines
4.2 KiB
TypeScript

/**
* Router-Switch.com Scraper
*
* Massive catalog of Cisco/Arista/Juniper/HP transceivers including:
* CSFP (GLC-BX-D/U), GBIC (WS-G5484), XENPAK, CFP, XFP, legacy SFP
* Cheerio-friendly category pages, good price transparency.
*
* Schedule: every 8h
*/
import * as cheerio from "cheerio";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
import { logger } from "../utils/logger";
const BASE = "https://www.router-switch.com";
const CATEGORIES: Array<{ url: string; form_factor: string }> = [
{ url: "/sfp-modules.html", form_factor: "SFP" },
{ url: "/sfp-plus.html", form_factor: "SFP+" },
{ url: "/sfp28.html", form_factor: "SFP28" },
{ url: "/qsfp-plus.html", form_factor: "QSFP+" },
{ url: "/qsfp28.html", form_factor: "QSFP28" },
{ url: "/qsfp-dd.html", form_factor: "QSFP-DD" },
{ url: "/osfp.html", form_factor: "OSFP" },
{ url: "/xfp.html", form_factor: "XFP" },
{ url: "/csfp.html", form_factor: "CSFP" },
{ url: "/cfp.html", form_factor: "CFP" },
{ url: "/cfp2.html", form_factor: "CFP2" },
{ url: "/gbic-transceiver.html", form_factor: "GBIC" },
{ url: "/xenpak.html", form_factor: "XENPAK" },
{ url: "/cxp-transceiver.html", form_factor: "CXP" },
];
async function fetchPage(catUrl: string, form_factor: string, vendorId: string, page = 1): Promise<number> {
const sep = catUrl.includes("?") ? "&" : "?";
const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`;
const resp = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
signal: AbortSignal.timeout(30_000),
});
if (!resp.ok) return 0;
const html = await resp.text();
const $ = cheerio.load(html);
let count = 0;
const items = $(".products-grid .item, .product-item, li.item");
for (let i = 0; i < items.length; i++) {
const $el = $(items[i]);
const name = $el.find(".product-name a, h2.product-name, .product-name").first().text().trim();
const priceText = $el.find(".price, .regular-price, .special-price").first().text().trim();
const href = $el.find("a[href]").first().attr("href") || "";
if (!name || !priceText) continue;
// Extract part number from name or URL
const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/);
if (!partMatch) continue;
const partNumber = partMatch[1].toUpperCase();
const { price, currency } = parsePrice(priceText);
if (price <= 0) continue;
const stockText = $el.find(".availability span, .stock").text().trim();
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
try {
const transceiverId = await findOrCreateScrapedTransceiver({
partNumber,
vendorId,
formFactor: form_factor,
});
await upsertPriceObservation({
transceiverId,
sourceVendorId: vendorId,
price,
currency: currency || "USD",
stockLevel: parseStockLevel(stockText),
url: productUrl,
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
});
count++;
} catch { /* skip */ }
}
// Paginate up to 10 pages
const hasNext = $("a.next, .pages a:contains('Next')").length > 0;
if (hasNext && count > 0 && page < 10) {
count += await fetchPage(catUrl, form_factor, vendorId, page + 1);
}
return count;
}
export async function scrapeRouterSwitch(): Promise<void> {
logger.info("Router-Switch.com scraper starting");
const vendorId = await ensureVendor("Router-Switch.com", "https://www.router-switch.com");
let total = 0;
for (const cat of CATEGORIES) {
try {
const n = await fetchPage(cat.url, cat.form_factor, vendorId);
if (n > 0) logger.info(`Router-Switch ${cat.form_factor}: ${n} products`);
total += n;
} catch (e) {
logger.warn(`Router-Switch ${cat.form_factor} failed`, { err: e });
}
}
logger.info(`Router-Switch done — ${total} total`);
}