- 16 commercial scrapers: replace TIP-Bot/1.0 with Chrome/120 UA (GBICS confirmed returning 0 bytes for bot UA, Chrome UA returns 200KB) - gbics.ts: fix User-Agent (was returning empty HTML, now returns products) - optictransceiver.ts: disable — domain repurposed as plant shop (2026-04-06) Alocasia Regal Shield is not a transceiver.
106 lines
4.4 KiB
TypeScript
106 lines
4.4 KiB
TypeScript
/**
|
|
* OpticTransceiver.com Scraper
|
|
*
|
|
* Competitive pricing, very broad form factor coverage:
|
|
* CSFP, SFP-DD, QSFP56, SFP56, CXP, legacy + modern.
|
|
* Static HTML, cheerio-friendly.
|
|
*
|
|
* Schedule: every 8h
|
|
*/
|
|
import * as cheerio from "cheerio";
|
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
|
|
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
|
|
import { logger } from "../utils/logger";
|
|
|
|
const BASE = "https://www.optictransceiver.com";
|
|
|
|
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
|
|
{ path: "/sfp-modules/", form_factor: "SFP" },
|
|
{ path: "/sfp-plus-transceivers/", form_factor: "SFP+" },
|
|
{ path: "/sfp28-transceiver/", form_factor: "SFP28" },
|
|
{ path: "/sfp56-transceiver/", form_factor: "SFP56" },
|
|
{ path: "/sfp-dd-transceiver/", form_factor: "SFP-DD" },
|
|
{ path: "/csfp-transceiver/", form_factor: "CSFP" },
|
|
{ path: "/qsfp-plus-transceiver/", form_factor: "QSFP+" },
|
|
{ path: "/qsfp28-transceiver/", form_factor: "QSFP28" },
|
|
{ path: "/qsfp56-transceiver/", form_factor: "QSFP56" },
|
|
{ path: "/qsfp-dd-transceiver/", form_factor: "QSFP-DD" },
|
|
{ path: "/qsfp-dd-800-transceiver/", form_factor: "QSFP-DD800" },
|
|
{ path: "/qsfp112-transceiver/", form_factor: "QSFP112" },
|
|
{ path: "/osfp-transceiver/", form_factor: "OSFP" },
|
|
{ path: "/osfp112-transceiver/", form_factor: "OSFP112" },
|
|
{ path: "/cfp-transceiver/", form_factor: "CFP" },
|
|
{ path: "/cfp2-transceiver/", form_factor: "CFP2" },
|
|
{ path: "/xfp-transceiver/", form_factor: "XFP" },
|
|
{ path: "/cxp-transceiver/", form_factor: "CXP" },
|
|
{ path: "/gbic-transceiver/", form_factor: "GBIC" },
|
|
];
|
|
|
|
async function scrapeCategory(path: string, form_factor: string, vendorId: string): Promise<number> {
|
|
let page = 1;
|
|
let total = 0;
|
|
|
|
while (page <= 15) {
|
|
const url = `${BASE}${path}${page > 1 ? `page/${page}/` : ""}`;
|
|
try {
|
|
const resp = await fetch(url, {
|
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
|
|
signal: AbortSignal.timeout(25_000),
|
|
});
|
|
if (!resp.ok || resp.status === 404) break;
|
|
const $ = cheerio.load(await resp.text());
|
|
const items = $(".product, .product-item, article.product");
|
|
if (items.length === 0) break;
|
|
|
|
for (let i = 0; i < items.length; i++) {
|
|
const $el = $(items[i]);
|
|
const name = $el.find("h2, h3, .product-title").first().text().trim();
|
|
const priceText = $el.find(".price, .amount").first().text().trim();
|
|
const href = ($el.find("a").first().attr("href") || "").trim();
|
|
if (!name || !href) continue;
|
|
|
|
const partMatch = name.match(/([A-Z0-9]{2,8}[\-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/);
|
|
const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase();
|
|
const { price, currency } = parsePrice(priceText);
|
|
if (price <= 0) continue;
|
|
|
|
try {
|
|
const t = await findOrCreateScrapedTransceiver({
|
|
partNumber, vendorId, formFactor: form_factor, name,
|
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
|
});
|
|
await upsertPriceObservation({
|
|
transceiverId: t.id, sourceVendorId: vendorId,
|
|
price, currency: currency || "USD",
|
|
stockLevel: "unknown",
|
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
|
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
|
|
});
|
|
total++;
|
|
} catch { /* skip */ }
|
|
}
|
|
page++;
|
|
} catch { break; }
|
|
}
|
|
return total;
|
|
}
|
|
|
|
export async function scrapeOpticTransceiver(): Promise<void> {
|
|
// Domain repurposed as plant shop (2026-04-06) — skip entirely
|
|
logger.warn("OpticTransceiver.com is no longer an optics vendor (domain repurposed). Scraper disabled.");
|
|
return;
|
|
const vendorId = await ensureVendor("OpticTransceiver", BASE);
|
|
let total = 0;
|
|
|
|
for (const cat of CATEGORIES) {
|
|
try {
|
|
const n = await scrapeCategory(cat.path, cat.form_factor, vendorId);
|
|
if (n > 0) logger.info(`OpticTransceiver ${cat.form_factor}: ${n} products`);
|
|
total += n;
|
|
} catch (e) {
|
|
logger.warn(`OpticTransceiver ${cat.form_factor} failed`, { err: e });
|
|
}
|
|
}
|
|
logger.info(`OpticTransceiver done — ${total} total`);
|
|
}
|