feat: add 5 form-factor coverage scrapers with worker registrations

Add Comms-Express, Router-Switch.com, Multimode Inc, OpticTransceiver.com,
and Wiitek scrapers covering CFP2-DCO, CFP4, OSFP224, QSFP112, CXP, GBIC,
XENPAK, CSFP, SFP-DD, SFP56, QSFP56 and other previously-uncovered form
factors. Each scheduled every 8h. Worker registrations added to scheduler.

Also export db alias in utils/db.ts to fix eBay enricher + community scrapers
crashing with 'Cannot read properties of undefined (reading query)'.
This commit is contained in:
Rene Fichtmueller 2026-04-02 08:39:17 +02:00
parent b7613538bf
commit f146ac873e
7 changed files with 571 additions and 3 deletions

View File

@ -110,6 +110,12 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
// ── Compute (every 4h, after pricing waves) ─────────────────────── // ── Compute (every 4h, after pricing waves) ───────────────────────
"compute:abc", "compute:abc",
"compute:reorder-signals", "compute:reorder-signals",
// ── New form-factor coverage scrapers (every 8h) ──────────────────
"scrape:pricing:comms-express",
"scrape:pricing:router-switch",
"scrape:pricing:multimode-inc",
"scrape:pricing:optictransceiver",
"scrape:pricing:wiitek",
// ── Prediction Signal Scrapers (new) ────────────────────────────── // ── Prediction Signal Scrapers (new) ──────────────────────────────
"scrape:signals:sec-edgar", "scrape:signals:sec-edgar",
"scrape:signals:github", "scrape:signals:github",
@ -193,6 +199,16 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════
// FORM-FACTOR COVERAGE SCRAPERS — every 8h (CFP, CSFP, SFP-DD, legacy)
// ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:pricing:comms-express", "40 2,10,18 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:router-switch", "0 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
await boss.schedule("scrape:pricing:multimode-inc", "20 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:optictransceiver", "45 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
await boss.schedule("scrape:pricing:wiitek", "5 4,12,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
// VENDOR LISTS — every 12h // VENDOR LISTS — every 12h
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
@ -561,5 +577,37 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await runForecastEngine(); await runForecastEngine();
}); });
console.log("All workers registered (50 jobs, 24/7 continuous)"); // ── Form-factor coverage scrapers ─────────────────────────────────────
await boss.work("scrape:pricing:comms-express", async () => {
console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`);
const { scrapeCommsExpress } = await import("./scrapers/comms-express");
await scrapeCommsExpress();
});
await boss.work("scrape:pricing:router-switch", async () => {
console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`);
const { scrapeRouterSwitch } = await import("./scrapers/router-switch");
await scrapeRouterSwitch();
});
await boss.work("scrape:pricing:multimode-inc", async () => {
console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`);
const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc");
await scrapeMultimodeInc();
});
await boss.work("scrape:pricing:optictransceiver", async () => {
console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`);
const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver");
await scrapeOpticTransceiver();
});
await boss.work("scrape:pricing:wiitek", async () => {
console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`);
const { scrapeWiitek } = await import("./scrapers/wiitek");
await scrapeWiitek();
});
console.log("All workers registered (55 jobs, 24/7 continuous)");
} }

View File

@ -0,0 +1,134 @@
/**
* Comms Express Scraper comms-express.com
*
* UK-based reseller with excellent coverage of:
* CFP, CFP2, CFP4, CFP2-DCO, CSFP, SFP-DD, QSFP112, OSFP, legacy GBIC/XENPAK/CXP
* Good pricing transparency, cheerio-friendly.
*
* Schedule: every 8h
*/
import * as cheerio from "cheerio";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db";
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
import { logger } from "../utils/logger";
const BASE = "https://www.comms-express.com";
// Categories mapped to form_factor
const CATEGORIES: Array<{ url: string; form_factor: string; speed_hint?: string }> = [
{ url: "/transceivers/sfp-modules/", form_factor: "SFP" },
{ url: "/transceivers/sfp-plus-modules/", form_factor: "SFP+" },
{ url: "/transceivers/sfp28-modules/", form_factor: "SFP28" },
{ url: "/transceivers/sfp56-modules/", form_factor: "SFP56" },
{ url: "/transceivers/sfp-dd-modules/", form_factor: "SFP-DD" },
{ url: "/transceivers/qsfp-modules/", form_factor: "QSFP+" },
{ url: "/transceivers/qsfp28-modules/", form_factor: "QSFP28" },
{ url: "/transceivers/qsfp56-modules/", form_factor: "QSFP56" },
{ url: "/transceivers/qsfp-dd-modules/", form_factor: "QSFP-DD" },
{ url: "/transceivers/qsfp-dd-800-modules/", form_factor: "QSFP-DD800" },
{ url: "/transceivers/qsfp112-modules/", form_factor: "QSFP112" },
{ url: "/transceivers/osfp-modules/", form_factor: "OSFP" },
{ url: "/transceivers/osfp112-modules/", form_factor: "OSFP112" },
{ url: "/transceivers/cfp-modules/", form_factor: "CFP" },
{ url: "/transceivers/cfp2-modules/", form_factor: "CFP2" },
{ url: "/transceivers/cfp2-dco-modules/", form_factor: "CFP2-DCO" },
{ url: "/transceivers/cfp4-modules/", form_factor: "CFP4" },
{ url: "/transceivers/csfp-modules/", form_factor: "CSFP" },
{ url: "/transceivers/xfp-modules/", form_factor: "XFP" },
{ url: "/transceivers/cxp-modules/", form_factor: "CXP" },
{ url: "/transceivers/gbic-modules/", form_factor: "GBIC" },
{ url: "/transceivers/xenpak-modules/", form_factor: "XENPAK" },
];
interface Product {
partNumber: string;
name: string;
price: number;
currency: string;
stock: string;
url: string;
formFactor: string;
}
async function fetchCategory(cat: typeof CATEGORIES[0], vendorId: string, page = 1): Promise<Product[]> {
const url = `${BASE}${cat.url}?page=${page}`;
const resp = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0; +https://tip.context-x.org)" },
signal: AbortSignal.timeout(30_000),
});
if (!resp.ok) return [];
const html = await resp.text();
const $ = cheerio.load(html);
const products: Product[] = [];
$(".product-item, .product-list-item, [data-product-id]").each((_, el) => {
const $el = $(el);
const name = $el.find(".product-name, .product-title, h3, h4").first().text().trim();
const priceText = $el.find(".price, .product-price, [class*=price]").first().text().trim();
const href = $el.find("a[href*='/transceivers/']").first().attr("href") || "";
const partMatch = name.match(/[A-Z0-9]{2,4}-[A-Z0-9][A-Z0-9\-\.]+/) ||
href.match(/\/([A-Z0-9\-]{6,30})\/?$/i);
const partNumber = partMatch ? partMatch[1] || partMatch[0] : name.substring(0, 40);
const { price, currency } = parsePrice(priceText);
const stockText = $el.find(".stock, .availability, [class*=stock]").first().text().trim();
if (!partNumber || price <= 0) return;
products.push({
partNumber: partNumber.toUpperCase(),
name,
price,
currency: currency || "GBP",
stock: parseStockLevel(stockText),
url: href.startsWith("http") ? href : `${BASE}${href}`,
formFactor: cat.form_factor,
});
});
// Check for next page
const hasNext = $(".pagination .next, a[rel=next]").length > 0;
if (hasNext && products.length > 0 && page < 20) {
const next = await fetchCategory(cat, vendorId, page + 1);
products.push(...next);
}
return products;
}
export async function scrapeCommsExpress(): Promise<void> {
logger.info("Comms Express scraper starting");
const vendorId = await ensureVendor("Comms Express", "https://www.comms-express.com");
let total = 0;
let newItems = 0;
for (const cat of CATEGORIES) {
try {
const products = await fetchCategory(cat, vendorId);
for (const p of products) {
const transceiverResult = await findOrCreateScrapedTransceiver({
partNumber: p.partNumber,
vendorId,
formFactor: p.formFactor,
name: p.name,
url: p.url,
});
const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`);
const isNew = await upsertPriceObservation({
transceiverId: transceiverResult.id,
sourceVendorId: vendorId,
price: p.price,
currency: p.currency,
stockLevel: p.stock,
url: p.url,
contentHash: hash,
});
if (isNew) newItems++;
total++;
}
if (products.length > 0) {
logger.info(`Comms Express ${cat.form_factor}: ${products.length} products`);
}
} catch (e) {
logger.warn(`Comms Express ${cat.form_factor} failed`, { err: e });
}
}
logger.info(`Comms Express done — ${total} total, ${newItems} new`);
}

View File

@ -0,0 +1,82 @@
/**
* MultiMode Inc Scraper multimode-inc.com
*
* Specialist for high-speed coherent transceivers:
* CFP, CFP2, CFP2-DCO, CFP4, QSFP112, OSFP112, OSFP224
* Plus broad 400G/800G coverage.
*
* Schedule: every 8h
*/
import * as cheerio from "cheerio";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
import { logger } from "../utils/logger";
const BASE = "https://www.multimode.com";
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
{ path: "/cfp-transceivers/", form_factor: "CFP" },
{ path: "/cfp2-transceivers/", form_factor: "CFP2" },
{ path: "/cfp2-dco/", form_factor: "CFP2-DCO" },
{ path: "/cfp4-transceivers/", form_factor: "CFP4" },
{ path: "/osfp-transceivers/", form_factor: "OSFP" },
{ path: "/osfp112/", form_factor: "OSFP112" },
{ path: "/osfp224/", form_factor: "OSFP224" },
{ path: "/qsfp112/", form_factor: "QSFP112" },
{ path: "/qsfp-dd-800/", form_factor: "QSFP-DD800" },
{ path: "/qsfp-dd/", form_factor: "QSFP-DD" },
{ path: "/sfp-dd/", form_factor: "SFP-DD" },
{ path: "/qsfp28-transceivers/", form_factor: "QSFP28" },
];
export async function scrapeMultimodeInc(): Promise<void> {
logger.info("Multimode Inc scraper starting");
const vendorId = await ensureVendor("Multimode Inc", BASE);
let total = 0;
let newItems = 0;
for (const cat of CATEGORIES) {
try {
const resp = await fetch(`${BASE}${cat.path}`, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
signal: AbortSignal.timeout(25_000),
});
if (!resp.ok) continue;
const $ = cheerio.load(await resp.text());
const rows = $(".product, .woocommerce-loop-product__link, article.product");
for (let i = 0; i < rows.length; i++) {
const $el = $(rows[i]);
const name = $el.find(".woocommerce-loop-product__title, h2, h3").first().text().trim();
const priceText = $el.find(".price, .woocommerce-Price-amount").first().text().trim();
const href = $el.find("a").first().attr("href") || $el.closest("a").attr("href") || "";
if (!name) continue;
const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/);
const partNumber = partMatch ? partMatch[1].toUpperCase() : name.substring(0, 50);
const { price, currency } = parsePrice(priceText);
if (price <= 0) continue;
try {
const t = await findOrCreateScrapedTransceiver({
partNumber, vendorId, formFactor: cat.form_factor, name,
url: href.startsWith("http") ? href : `${BASE}${href}`,
});
const isNew = await upsertPriceObservation({
transceiverId: t.id, sourceVendorId: vendorId,
price, currency: currency || "USD",
stockLevel: "unknown",
url: href.startsWith("http") ? href : `${BASE}${href}`,
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
});
if (isNew) newItems++;
total++;
} catch { /* skip */ }
}
if (rows.length > 0) logger.info(`Multimode Inc ${cat.form_factor}: ${rows.length} products`);
} catch (e) {
logger.warn(`Multimode Inc ${cat.form_factor} failed`, { err: e });
}
}
logger.info(`Multimode Inc done — ${total} total, ${newItems} new`);
}

View File

@ -0,0 +1,103 @@
/**
* OpticTransceiver.com Scraper
*
* Competitive pricing, very broad form factor coverage:
* CSFP, SFP-DD, QSFP56, SFP56, CXP, legacy + modern.
* Static HTML, cheerio-friendly.
*
* Schedule: every 8h
*/
import * as cheerio from "cheerio";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
import { logger } from "../utils/logger";
const BASE = "https://www.optictransceiver.com";
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
{ path: "/sfp-modules/", form_factor: "SFP" },
{ path: "/sfp-plus-transceivers/", form_factor: "SFP+" },
{ path: "/sfp28-transceiver/", form_factor: "SFP28" },
{ path: "/sfp56-transceiver/", form_factor: "SFP56" },
{ path: "/sfp-dd-transceiver/", form_factor: "SFP-DD" },
{ path: "/csfp-transceiver/", form_factor: "CSFP" },
{ path: "/qsfp-plus-transceiver/", form_factor: "QSFP+" },
{ path: "/qsfp28-transceiver/", form_factor: "QSFP28" },
{ path: "/qsfp56-transceiver/", form_factor: "QSFP56" },
{ path: "/qsfp-dd-transceiver/", form_factor: "QSFP-DD" },
{ path: "/qsfp-dd-800-transceiver/", form_factor: "QSFP-DD800" },
{ path: "/qsfp112-transceiver/", form_factor: "QSFP112" },
{ path: "/osfp-transceiver/", form_factor: "OSFP" },
{ path: "/osfp112-transceiver/", form_factor: "OSFP112" },
{ path: "/cfp-transceiver/", form_factor: "CFP" },
{ path: "/cfp2-transceiver/", form_factor: "CFP2" },
{ path: "/xfp-transceiver/", form_factor: "XFP" },
{ path: "/cxp-transceiver/", form_factor: "CXP" },
{ path: "/gbic-transceiver/", form_factor: "GBIC" },
];
async function scrapeCategory(path: string, form_factor: string, vendorId: string): Promise<number> {
let page = 1;
let total = 0;
while (page <= 15) {
const url = `${BASE}${path}${page > 1 ? `page/${page}/` : ""}`;
try {
const resp = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
signal: AbortSignal.timeout(25_000),
});
if (!resp.ok || resp.status === 404) break;
const $ = cheerio.load(await resp.text());
const items = $(".product, .product-item, article.product");
if (items.length === 0) break;
for (let i = 0; i < items.length; i++) {
const $el = $(items[i]);
const name = $el.find("h2, h3, .product-title").first().text().trim();
const priceText = $el.find(".price, .amount").first().text().trim();
const href = ($el.find("a").first().attr("href") || "").trim();
if (!name || !href) continue;
const partMatch = name.match(/([A-Z0-9]{2,8}[\-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/);
const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase();
const { price, currency } = parsePrice(priceText);
if (price <= 0) continue;
try {
const t = await findOrCreateScrapedTransceiver({
partNumber, vendorId, formFactor: form_factor, name,
url: href.startsWith("http") ? href : `${BASE}${href}`,
});
await upsertPriceObservation({
transceiverId: t.id, sourceVendorId: vendorId,
price, currency: currency || "USD",
stockLevel: "unknown",
url: href.startsWith("http") ? href : `${BASE}${href}`,
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
});
total++;
} catch { /* skip */ }
}
page++;
} catch { break; }
}
return total;
}
export async function scrapeOpticTransceiver(): Promise<void> {
logger.info("OpticTransceiver.com scraper starting");
const vendorId = await ensureVendor("OpticTransceiver", BASE);
let total = 0;
for (const cat of CATEGORIES) {
try {
const n = await scrapeCategory(cat.path, cat.form_factor, vendorId);
if (n > 0) logger.info(`OpticTransceiver ${cat.form_factor}: ${n} products`);
total += n;
} catch (e) {
logger.warn(`OpticTransceiver ${cat.form_factor} failed`, { err: e });
}
}
logger.info(`OpticTransceiver done — ${total} total`);
}

View File

@ -0,0 +1,108 @@
/**
* Router-Switch.com Scraper
*
* Massive catalog of Cisco/Arista/Juniper/HP transceivers including:
* CSFP (GLC-BX-D/U), GBIC (WS-G5484), XENPAK, CFP, XFP, legacy SFP
* Cheerio-friendly category pages, good price transparency.
*
* Schedule: every 8h
*/
import * as cheerio from "cheerio";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
import { logger } from "../utils/logger";
const BASE = "https://www.router-switch.com";
const CATEGORIES: Array<{ url: string; form_factor: string }> = [
{ url: "/sfp-modules.html", form_factor: "SFP" },
{ url: "/sfp-plus.html", form_factor: "SFP+" },
{ url: "/sfp28.html", form_factor: "SFP28" },
{ url: "/qsfp-plus.html", form_factor: "QSFP+" },
{ url: "/qsfp28.html", form_factor: "QSFP28" },
{ url: "/qsfp-dd.html", form_factor: "QSFP-DD" },
{ url: "/osfp.html", form_factor: "OSFP" },
{ url: "/xfp.html", form_factor: "XFP" },
{ url: "/csfp.html", form_factor: "CSFP" },
{ url: "/cfp.html", form_factor: "CFP" },
{ url: "/cfp2.html", form_factor: "CFP2" },
{ url: "/gbic-transceiver.html", form_factor: "GBIC" },
{ url: "/xenpak.html", form_factor: "XENPAK" },
{ url: "/cxp-transceiver.html", form_factor: "CXP" },
];
async function fetchPage(catUrl: string, form_factor: string, vendorId: string, page = 1): Promise<number> {
const sep = catUrl.includes("?") ? "&" : "?";
const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`;
const resp = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
signal: AbortSignal.timeout(30_000),
});
if (!resp.ok) return 0;
const html = await resp.text();
const $ = cheerio.load(html);
let count = 0;
const items = $(".products-grid .item, .product-item, li.item");
for (let i = 0; i < items.length; i++) {
const $el = $(items[i]);
const name = $el.find(".product-name a, h2.product-name, .product-name").first().text().trim();
const priceText = $el.find(".price, .regular-price, .special-price").first().text().trim();
const href = $el.find("a[href]").first().attr("href") || "";
if (!name || !priceText) continue;
// Extract part number from name or URL
const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/);
if (!partMatch) continue;
const partNumber = partMatch[1].toUpperCase();
const { price, currency } = parsePrice(priceText);
if (price <= 0) continue;
const stockText = $el.find(".availability span, .stock").text().trim();
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
try {
const t = await findOrCreateScrapedTransceiver({
partNumber,
vendorId,
formFactor: form_factor,
name,
url: productUrl,
});
await upsertPriceObservation({
transceiverId: t.id,
sourceVendorId: vendorId,
price,
currency: currency || "USD",
stockLevel: parseStockLevel(stockText),
url: productUrl,
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
});
count++;
} catch { /* skip */ }
}
// Paginate up to 10 pages
const hasNext = $("a.next, .pages a:contains('Next')").length > 0;
if (hasNext && count > 0 && page < 10) {
count += await fetchPage(catUrl, form_factor, vendorId, page + 1);
}
return count;
}
export async function scrapeRouterSwitch(): Promise<void> {
logger.info("Router-Switch.com scraper starting");
const vendorId = await ensureVendor("Router-Switch.com", "https://www.router-switch.com");
let total = 0;
for (const cat of CATEGORIES) {
try {
const n = await fetchPage(cat.url, cat.form_factor, vendorId);
if (n > 0) logger.info(`Router-Switch ${cat.form_factor}: ${n} products`);
total += n;
} catch (e) {
logger.warn(`Router-Switch ${cat.form_factor} failed`, { err: e });
}
}
logger.info(`Router-Switch done — ${total} total`);
}

View File

@ -0,0 +1,90 @@
/**
* Wiitek Scraper wiitek.com
*
* Chinese manufacturer/reseller, direct factory prices.
* Covers: CSFP, SFP-DD, CFP, CFP2, OSFP224, QSFP112 and all standard forms.
* Good for price benchmarking.
*
* Schedule: every 8h
*/
import * as cheerio from "cheerio";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
import { contentHash, parsePrice } from "../utils/hash";
import { logger } from "../utils/logger";
const BASE = "https://www.wiitek.com";
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
{ path: "/SFP-Transceiver/", form_factor: "SFP" },
{ path: "/SFP-Plus-Transceiver/", form_factor: "SFP+" },
{ path: "/SFP28-Transceiver/", form_factor: "SFP28" },
{ path: "/SFP56-Transceiver/", form_factor: "SFP56" },
{ path: "/SFP-DD-Transceiver/", form_factor: "SFP-DD" },
{ path: "/CSFP-Transceiver/", form_factor: "CSFP" },
{ path: "/QSFP-Transceiver/", form_factor: "QSFP+" },
{ path: "/QSFP28-Transceiver/", form_factor: "QSFP28" },
{ path: "/QSFP56-Transceiver/", form_factor: "QSFP56" },
{ path: "/QSFP-DD-Transceiver/", form_factor: "QSFP-DD" },
{ path: "/QSFP-DD800-Transceiver/", form_factor: "QSFP-DD800" },
{ path: "/QSFP112-Transceiver/", form_factor: "QSFP112" },
{ path: "/OSFP-Transceiver/", form_factor: "OSFP" },
{ path: "/OSFP112-Transceiver/", form_factor: "OSFP112" },
{ path: "/OSFP224-Transceiver/", form_factor: "OSFP224" },
{ path: "/CFP-Transceiver/", form_factor: "CFP" },
{ path: "/CFP2-Transceiver/", form_factor: "CFP2" },
{ path: "/XFP-Transceiver/", form_factor: "XFP" },
{ path: "/GBIC-Transceiver/", form_factor: "GBIC" },
{ path: "/XENPAK-Transceiver/", form_factor: "XENPAK" },
{ path: "/CXP-Transceiver/", form_factor: "CXP" },
];
export async function scrapeWiitek(): Promise<void> {
logger.info("Wiitek scraper starting");
const vendorId = await ensureVendor("Wiitek", BASE);
let total = 0;
let newItems = 0;
for (const cat of CATEGORIES) {
try {
const resp = await fetch(`${BASE}${cat.path}`, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
signal: AbortSignal.timeout(20_000),
});
if (!resp.ok) continue;
const $ = cheerio.load(await resp.text());
const items = $(".product-item, .goods-item, .pro-item, [class*=product]");
for (let i = 0; i < items.length; i++) {
const $el = $(items[i]);
const name = $el.find("h2,h3,h4,.title,.name").first().text().trim();
const priceText = $el.find(".price,.cost,[class*=price]").first().text().trim();
const href = $el.find("a[href]").first().attr("href") || "";
if (!name || !priceText) continue;
const partMatch = name.match(/([A-Z0-9]{2,8}[-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/);
const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase();
const { price, currency } = parsePrice(priceText);
if (price <= 0) continue;
try {
const t = await findOrCreateScrapedTransceiver({
partNumber, vendorId, formFactor: cat.form_factor, name,
url: href.startsWith("http") ? href : `${BASE}${href}`,
});
const isNew = await upsertPriceObservation({
transceiverId: t.id, sourceVendorId: vendorId,
price, currency: currency || "USD",
stockLevel: "unknown",
url: href.startsWith("http") ? href : `${BASE}${href}`,
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
});
if (isNew) newItems++;
total++;
} catch { /* skip */ }
}
} catch (e) {
logger.warn(`Wiitek ${cat.form_factor} failed`, { err: e });
}
}
logger.info(`Wiitek done — ${total} total, ${newItems} new`);
}

View File

@ -13,6 +13,9 @@ export const pool = new Pool({
max: 10, max: 10,
}); });
// Alias — some scrapers import { db } instead of { pool }
export const db = pool;
export async function upsertPriceObservation(params: { export async function upsertPriceObservation(params: {
transceiverId: string; transceiverId: string;
sourceVendorId: string; sourceVendorId: string;