Add Comms-Express, Router-Switch.com, Multimode Inc, OpticTransceiver.com, and Wiitek scrapers covering CFP2-DCO, CFP4, OSFP224, QSFP112, CXP, GBIC, XENPAK, CSFP, SFP-DD, SFP56, QSFP56 and other previously-uncovered form factors. Each scheduled every 8h. Worker registrations added to scheduler. Also export db alias in utils/db.ts to fix eBay enricher + community scrapers crashing with 'Cannot read properties of undefined (reading query)'.
135 lines
5.6 KiB
TypeScript
135 lines
5.6 KiB
TypeScript
/**
|
|
* Comms Express Scraper — comms-express.com
|
|
*
|
|
* UK-based reseller with excellent coverage of:
|
|
* CFP, CFP2, CFP4, CFP2-DCO, CSFP, SFP-DD, QSFP112, OSFP, legacy GBIC/XENPAK/CXP
|
|
* Good pricing transparency, cheerio-friendly.
|
|
*
|
|
* Schedule: every 8h
|
|
*/
|
|
import * as cheerio from "cheerio";
|
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db";
|
|
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
|
|
import { logger } from "../utils/logger";
|
|
|
|
const BASE = "https://www.comms-express.com";
|
|
|
|
// Categories mapped to form_factor
|
|
const CATEGORIES: Array<{ url: string; form_factor: string; speed_hint?: string }> = [
|
|
{ url: "/transceivers/sfp-modules/", form_factor: "SFP" },
|
|
{ url: "/transceivers/sfp-plus-modules/", form_factor: "SFP+" },
|
|
{ url: "/transceivers/sfp28-modules/", form_factor: "SFP28" },
|
|
{ url: "/transceivers/sfp56-modules/", form_factor: "SFP56" },
|
|
{ url: "/transceivers/sfp-dd-modules/", form_factor: "SFP-DD" },
|
|
{ url: "/transceivers/qsfp-modules/", form_factor: "QSFP+" },
|
|
{ url: "/transceivers/qsfp28-modules/", form_factor: "QSFP28" },
|
|
{ url: "/transceivers/qsfp56-modules/", form_factor: "QSFP56" },
|
|
{ url: "/transceivers/qsfp-dd-modules/", form_factor: "QSFP-DD" },
|
|
{ url: "/transceivers/qsfp-dd-800-modules/", form_factor: "QSFP-DD800" },
|
|
{ url: "/transceivers/qsfp112-modules/", form_factor: "QSFP112" },
|
|
{ url: "/transceivers/osfp-modules/", form_factor: "OSFP" },
|
|
{ url: "/transceivers/osfp112-modules/", form_factor: "OSFP112" },
|
|
{ url: "/transceivers/cfp-modules/", form_factor: "CFP" },
|
|
{ url: "/transceivers/cfp2-modules/", form_factor: "CFP2" },
|
|
{ url: "/transceivers/cfp2-dco-modules/", form_factor: "CFP2-DCO" },
|
|
{ url: "/transceivers/cfp4-modules/", form_factor: "CFP4" },
|
|
{ url: "/transceivers/csfp-modules/", form_factor: "CSFP" },
|
|
{ url: "/transceivers/xfp-modules/", form_factor: "XFP" },
|
|
{ url: "/transceivers/cxp-modules/", form_factor: "CXP" },
|
|
{ url: "/transceivers/gbic-modules/", form_factor: "GBIC" },
|
|
{ url: "/transceivers/xenpak-modules/", form_factor: "XENPAK" },
|
|
];
|
|
|
|
interface Product {
|
|
partNumber: string;
|
|
name: string;
|
|
price: number;
|
|
currency: string;
|
|
stock: string;
|
|
url: string;
|
|
formFactor: string;
|
|
}
|
|
|
|
async function fetchCategory(cat: typeof CATEGORIES[0], vendorId: string, page = 1): Promise<Product[]> {
|
|
const url = `${BASE}${cat.url}?page=${page}`;
|
|
const resp = await fetch(url, {
|
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0; +https://tip.context-x.org)" },
|
|
signal: AbortSignal.timeout(30_000),
|
|
});
|
|
if (!resp.ok) return [];
|
|
const html = await resp.text();
|
|
const $ = cheerio.load(html);
|
|
const products: Product[] = [];
|
|
|
|
$(".product-item, .product-list-item, [data-product-id]").each((_, el) => {
|
|
const $el = $(el);
|
|
const name = $el.find(".product-name, .product-title, h3, h4").first().text().trim();
|
|
const priceText = $el.find(".price, .product-price, [class*=price]").first().text().trim();
|
|
const href = $el.find("a[href*='/transceivers/']").first().attr("href") || "";
|
|
const partMatch = name.match(/[A-Z0-9]{2,4}-[A-Z0-9][A-Z0-9\-\.]+/) ||
|
|
href.match(/\/([A-Z0-9\-]{6,30})\/?$/i);
|
|
const partNumber = partMatch ? partMatch[1] || partMatch[0] : name.substring(0, 40);
|
|
const { price, currency } = parsePrice(priceText);
|
|
const stockText = $el.find(".stock, .availability, [class*=stock]").first().text().trim();
|
|
|
|
if (!partNumber || price <= 0) return;
|
|
products.push({
|
|
partNumber: partNumber.toUpperCase(),
|
|
name,
|
|
price,
|
|
currency: currency || "GBP",
|
|
stock: parseStockLevel(stockText),
|
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
|
formFactor: cat.form_factor,
|
|
});
|
|
});
|
|
|
|
// Check for next page
|
|
const hasNext = $(".pagination .next, a[rel=next]").length > 0;
|
|
if (hasNext && products.length > 0 && page < 20) {
|
|
const next = await fetchCategory(cat, vendorId, page + 1);
|
|
products.push(...next);
|
|
}
|
|
return products;
|
|
}
|
|
|
|
export async function scrapeCommsExpress(): Promise<void> {
|
|
logger.info("Comms Express scraper starting");
|
|
const vendorId = await ensureVendor("Comms Express", "https://www.comms-express.com");
|
|
let total = 0;
|
|
let newItems = 0;
|
|
|
|
for (const cat of CATEGORIES) {
|
|
try {
|
|
const products = await fetchCategory(cat, vendorId);
|
|
for (const p of products) {
|
|
const transceiverResult = await findOrCreateScrapedTransceiver({
|
|
partNumber: p.partNumber,
|
|
vendorId,
|
|
formFactor: p.formFactor,
|
|
name: p.name,
|
|
url: p.url,
|
|
});
|
|
const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`);
|
|
const isNew = await upsertPriceObservation({
|
|
transceiverId: transceiverResult.id,
|
|
sourceVendorId: vendorId,
|
|
price: p.price,
|
|
currency: p.currency,
|
|
stockLevel: p.stock,
|
|
url: p.url,
|
|
contentHash: hash,
|
|
});
|
|
if (isNew) newItems++;
|
|
total++;
|
|
}
|
|
if (products.length > 0) {
|
|
logger.info(`Comms Express ${cat.form_factor}: ${products.length} products`);
|
|
}
|
|
} catch (e) {
|
|
logger.warn(`Comms Express ${cat.form_factor} failed`, { err: e });
|
|
}
|
|
}
|
|
logger.info(`Comms Express done — ${total} total, ${newItems} new`);
|
|
}
|