feat: add 5 form-factor coverage scrapers with worker registrations
Add Comms-Express, Router-Switch.com, Multimode Inc, OpticTransceiver.com, and Wiitek scrapers covering CFP2-DCO, CFP4, OSFP224, QSFP112, CXP, GBIC, XENPAK, CSFP, SFP-DD, SFP56, QSFP56 and other previously-uncovered form factors. Each scheduled every 8h. Worker registrations added to scheduler. Also export db alias in utils/db.ts to fix eBay enricher + community scrapers crashing with 'Cannot read properties of undefined (reading query)'.
This commit is contained in:
parent
b7613538bf
commit
f146ac873e
@ -110,6 +110,12 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|||||||
// ── Compute (every 4h, after pricing waves) ───────────────────────
|
// ── Compute (every 4h, after pricing waves) ───────────────────────
|
||||||
"compute:abc",
|
"compute:abc",
|
||||||
"compute:reorder-signals",
|
"compute:reorder-signals",
|
||||||
|
// ── New form-factor coverage scrapers (every 8h) ──────────────────
|
||||||
|
"scrape:pricing:comms-express",
|
||||||
|
"scrape:pricing:router-switch",
|
||||||
|
"scrape:pricing:multimode-inc",
|
||||||
|
"scrape:pricing:optictransceiver",
|
||||||
|
"scrape:pricing:wiitek",
|
||||||
// ── Prediction Signal Scrapers (new) ──────────────────────────────
|
// ── Prediction Signal Scrapers (new) ──────────────────────────────
|
||||||
"scrape:signals:sec-edgar",
|
"scrape:signals:sec-edgar",
|
||||||
"scrape:signals:github",
|
"scrape:signals:github",
|
||||||
@ -190,8 +196,18 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|||||||
// MANUFACTURER CATALOGS — every 8h (product data, no prices)
|
// MANUFACTURER CATALOGS — every 8h (product data, no prices)
|
||||||
// ══════════════════════════════════════════════════════════════════════
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
|
||||||
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
|
// FORM-FACTOR COVERAGE SCRAPERS — every 8h (CFP, CSFP, SFP-DD, legacy)
|
||||||
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
await boss.schedule("scrape:pricing:comms-express", "40 2,10,18 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
|
||||||
|
await boss.schedule("scrape:pricing:router-switch", "0 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 });
|
||||||
|
await boss.schedule("scrape:pricing:multimode-inc", "20 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
await boss.schedule("scrape:pricing:optictransceiver", "45 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
await boss.schedule("scrape:pricing:wiitek", "5 4,12,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||||
|
|
||||||
// ══════════════════════════════════════════════════════════════════════
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
// VENDOR LISTS — every 12h
|
// VENDOR LISTS — every 12h
|
||||||
@ -561,5 +577,37 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
|||||||
await runForecastEngine();
|
await runForecastEngine();
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log("All workers registered (50 jobs, 24/7 continuous)");
|
// ── Form-factor coverage scrapers ─────────────────────────────────────
|
||||||
|
|
||||||
|
await boss.work("scrape:pricing:comms-express", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`);
|
||||||
|
const { scrapeCommsExpress } = await import("./scrapers/comms-express");
|
||||||
|
await scrapeCommsExpress();
|
||||||
|
});
|
||||||
|
|
||||||
|
await boss.work("scrape:pricing:router-switch", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`);
|
||||||
|
const { scrapeRouterSwitch } = await import("./scrapers/router-switch");
|
||||||
|
await scrapeRouterSwitch();
|
||||||
|
});
|
||||||
|
|
||||||
|
await boss.work("scrape:pricing:multimode-inc", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`);
|
||||||
|
const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc");
|
||||||
|
await scrapeMultimodeInc();
|
||||||
|
});
|
||||||
|
|
||||||
|
await boss.work("scrape:pricing:optictransceiver", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`);
|
||||||
|
const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver");
|
||||||
|
await scrapeOpticTransceiver();
|
||||||
|
});
|
||||||
|
|
||||||
|
await boss.work("scrape:pricing:wiitek", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`);
|
||||||
|
const { scrapeWiitek } = await import("./scrapers/wiitek");
|
||||||
|
await scrapeWiitek();
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("All workers registered (55 jobs, 24/7 continuous)");
|
||||||
}
|
}
|
||||||
|
|||||||
134
packages/scraper/src/scrapers/comms-express.ts
Normal file
134
packages/scraper/src/scrapers/comms-express.ts
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
/**
|
||||||
|
* Comms Express Scraper — comms-express.com
|
||||||
|
*
|
||||||
|
* UK-based reseller with excellent coverage of:
|
||||||
|
* CFP, CFP2, CFP4, CFP2-DCO, CSFP, SFP-DD, QSFP112, OSFP, legacy GBIC/XENPAK/CXP
|
||||||
|
* Good pricing transparency, cheerio-friendly.
|
||||||
|
*
|
||||||
|
* Schedule: every 8h
|
||||||
|
*/
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db";
|
||||||
|
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
|
||||||
|
import { logger } from "../utils/logger";
|
||||||
|
|
||||||
|
const BASE = "https://www.comms-express.com";
|
||||||
|
|
||||||
|
// Categories mapped to form_factor
|
||||||
|
const CATEGORIES: Array<{ url: string; form_factor: string; speed_hint?: string }> = [
|
||||||
|
{ url: "/transceivers/sfp-modules/", form_factor: "SFP" },
|
||||||
|
{ url: "/transceivers/sfp-plus-modules/", form_factor: "SFP+" },
|
||||||
|
{ url: "/transceivers/sfp28-modules/", form_factor: "SFP28" },
|
||||||
|
{ url: "/transceivers/sfp56-modules/", form_factor: "SFP56" },
|
||||||
|
{ url: "/transceivers/sfp-dd-modules/", form_factor: "SFP-DD" },
|
||||||
|
{ url: "/transceivers/qsfp-modules/", form_factor: "QSFP+" },
|
||||||
|
{ url: "/transceivers/qsfp28-modules/", form_factor: "QSFP28" },
|
||||||
|
{ url: "/transceivers/qsfp56-modules/", form_factor: "QSFP56" },
|
||||||
|
{ url: "/transceivers/qsfp-dd-modules/", form_factor: "QSFP-DD" },
|
||||||
|
{ url: "/transceivers/qsfp-dd-800-modules/", form_factor: "QSFP-DD800" },
|
||||||
|
{ url: "/transceivers/qsfp112-modules/", form_factor: "QSFP112" },
|
||||||
|
{ url: "/transceivers/osfp-modules/", form_factor: "OSFP" },
|
||||||
|
{ url: "/transceivers/osfp112-modules/", form_factor: "OSFP112" },
|
||||||
|
{ url: "/transceivers/cfp-modules/", form_factor: "CFP" },
|
||||||
|
{ url: "/transceivers/cfp2-modules/", form_factor: "CFP2" },
|
||||||
|
{ url: "/transceivers/cfp2-dco-modules/", form_factor: "CFP2-DCO" },
|
||||||
|
{ url: "/transceivers/cfp4-modules/", form_factor: "CFP4" },
|
||||||
|
{ url: "/transceivers/csfp-modules/", form_factor: "CSFP" },
|
||||||
|
{ url: "/transceivers/xfp-modules/", form_factor: "XFP" },
|
||||||
|
{ url: "/transceivers/cxp-modules/", form_factor: "CXP" },
|
||||||
|
{ url: "/transceivers/gbic-modules/", form_factor: "GBIC" },
|
||||||
|
{ url: "/transceivers/xenpak-modules/", form_factor: "XENPAK" },
|
||||||
|
];
|
||||||
|
|
||||||
|
interface Product {
|
||||||
|
partNumber: string;
|
||||||
|
name: string;
|
||||||
|
price: number;
|
||||||
|
currency: string;
|
||||||
|
stock: string;
|
||||||
|
url: string;
|
||||||
|
formFactor: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchCategory(cat: typeof CATEGORIES[0], vendorId: string, page = 1): Promise<Product[]> {
|
||||||
|
const url = `${BASE}${cat.url}?page=${page}`;
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0; +https://tip.context-x.org)" },
|
||||||
|
signal: AbortSignal.timeout(30_000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) return [];
|
||||||
|
const html = await resp.text();
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
const products: Product[] = [];
|
||||||
|
|
||||||
|
$(".product-item, .product-list-item, [data-product-id]").each((_, el) => {
|
||||||
|
const $el = $(el);
|
||||||
|
const name = $el.find(".product-name, .product-title, h3, h4").first().text().trim();
|
||||||
|
const priceText = $el.find(".price, .product-price, [class*=price]").first().text().trim();
|
||||||
|
const href = $el.find("a[href*='/transceivers/']").first().attr("href") || "";
|
||||||
|
const partMatch = name.match(/[A-Z0-9]{2,4}-[A-Z0-9][A-Z0-9\-\.]+/) ||
|
||||||
|
href.match(/\/([A-Z0-9\-]{6,30})\/?$/i);
|
||||||
|
const partNumber = partMatch ? partMatch[1] || partMatch[0] : name.substring(0, 40);
|
||||||
|
const { price, currency } = parsePrice(priceText);
|
||||||
|
const stockText = $el.find(".stock, .availability, [class*=stock]").first().text().trim();
|
||||||
|
|
||||||
|
if (!partNumber || price <= 0) return;
|
||||||
|
products.push({
|
||||||
|
partNumber: partNumber.toUpperCase(),
|
||||||
|
name,
|
||||||
|
price,
|
||||||
|
currency: currency || "GBP",
|
||||||
|
stock: parseStockLevel(stockText),
|
||||||
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
formFactor: cat.form_factor,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check for next page
|
||||||
|
const hasNext = $(".pagination .next, a[rel=next]").length > 0;
|
||||||
|
if (hasNext && products.length > 0 && page < 20) {
|
||||||
|
const next = await fetchCategory(cat, vendorId, page + 1);
|
||||||
|
products.push(...next);
|
||||||
|
}
|
||||||
|
return products;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function scrapeCommsExpress(): Promise<void> {
|
||||||
|
logger.info("Comms Express scraper starting");
|
||||||
|
const vendorId = await ensureVendor("Comms Express", "https://www.comms-express.com");
|
||||||
|
let total = 0;
|
||||||
|
let newItems = 0;
|
||||||
|
|
||||||
|
for (const cat of CATEGORIES) {
|
||||||
|
try {
|
||||||
|
const products = await fetchCategory(cat, vendorId);
|
||||||
|
for (const p of products) {
|
||||||
|
const transceiverResult = await findOrCreateScrapedTransceiver({
|
||||||
|
partNumber: p.partNumber,
|
||||||
|
vendorId,
|
||||||
|
formFactor: p.formFactor,
|
||||||
|
name: p.name,
|
||||||
|
url: p.url,
|
||||||
|
});
|
||||||
|
const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`);
|
||||||
|
const isNew = await upsertPriceObservation({
|
||||||
|
transceiverId: transceiverResult.id,
|
||||||
|
sourceVendorId: vendorId,
|
||||||
|
price: p.price,
|
||||||
|
currency: p.currency,
|
||||||
|
stockLevel: p.stock,
|
||||||
|
url: p.url,
|
||||||
|
contentHash: hash,
|
||||||
|
});
|
||||||
|
if (isNew) newItems++;
|
||||||
|
total++;
|
||||||
|
}
|
||||||
|
if (products.length > 0) {
|
||||||
|
logger.info(`Comms Express ${cat.form_factor}: ${products.length} products`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
logger.warn(`Comms Express ${cat.form_factor} failed`, { err: e });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.info(`Comms Express done — ${total} total, ${newItems} new`);
|
||||||
|
}
|
||||||
82
packages/scraper/src/scrapers/multimode-inc.ts
Normal file
82
packages/scraper/src/scrapers/multimode-inc.ts
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
/**
|
||||||
|
* MultiMode Inc Scraper — multimode-inc.com
|
||||||
|
*
|
||||||
|
* Specialist for high-speed coherent transceivers:
|
||||||
|
* CFP, CFP2, CFP2-DCO, CFP4, QSFP112, OSFP112, OSFP224
|
||||||
|
* Plus broad 400G/800G coverage.
|
||||||
|
*
|
||||||
|
* Schedule: every 8h
|
||||||
|
*/
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
|
||||||
|
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
|
||||||
|
import { logger } from "../utils/logger";
|
||||||
|
|
||||||
|
const BASE = "https://www.multimode.com";
|
||||||
|
|
||||||
|
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
|
||||||
|
{ path: "/cfp-transceivers/", form_factor: "CFP" },
|
||||||
|
{ path: "/cfp2-transceivers/", form_factor: "CFP2" },
|
||||||
|
{ path: "/cfp2-dco/", form_factor: "CFP2-DCO" },
|
||||||
|
{ path: "/cfp4-transceivers/", form_factor: "CFP4" },
|
||||||
|
{ path: "/osfp-transceivers/", form_factor: "OSFP" },
|
||||||
|
{ path: "/osfp112/", form_factor: "OSFP112" },
|
||||||
|
{ path: "/osfp224/", form_factor: "OSFP224" },
|
||||||
|
{ path: "/qsfp112/", form_factor: "QSFP112" },
|
||||||
|
{ path: "/qsfp-dd-800/", form_factor: "QSFP-DD800" },
|
||||||
|
{ path: "/qsfp-dd/", form_factor: "QSFP-DD" },
|
||||||
|
{ path: "/sfp-dd/", form_factor: "SFP-DD" },
|
||||||
|
{ path: "/qsfp28-transceivers/", form_factor: "QSFP28" },
|
||||||
|
];
|
||||||
|
|
||||||
|
export async function scrapeMultimodeInc(): Promise<void> {
|
||||||
|
logger.info("Multimode Inc scraper starting");
|
||||||
|
const vendorId = await ensureVendor("Multimode Inc", BASE);
|
||||||
|
let total = 0;
|
||||||
|
let newItems = 0;
|
||||||
|
|
||||||
|
for (const cat of CATEGORIES) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(`${BASE}${cat.path}`, {
|
||||||
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
|
||||||
|
signal: AbortSignal.timeout(25_000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) continue;
|
||||||
|
const $ = cheerio.load(await resp.text());
|
||||||
|
|
||||||
|
const rows = $(".product, .woocommerce-loop-product__link, article.product");
|
||||||
|
for (let i = 0; i < rows.length; i++) {
|
||||||
|
const $el = $(rows[i]);
|
||||||
|
const name = $el.find(".woocommerce-loop-product__title, h2, h3").first().text().trim();
|
||||||
|
const priceText = $el.find(".price, .woocommerce-Price-amount").first().text().trim();
|
||||||
|
const href = $el.find("a").first().attr("href") || $el.closest("a").attr("href") || "";
|
||||||
|
if (!name) continue;
|
||||||
|
|
||||||
|
const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/);
|
||||||
|
const partNumber = partMatch ? partMatch[1].toUpperCase() : name.substring(0, 50);
|
||||||
|
const { price, currency } = parsePrice(priceText);
|
||||||
|
if (price <= 0) continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const t = await findOrCreateScrapedTransceiver({
|
||||||
|
partNumber, vendorId, formFactor: cat.form_factor, name,
|
||||||
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
});
|
||||||
|
const isNew = await upsertPriceObservation({
|
||||||
|
transceiverId: t.id, sourceVendorId: vendorId,
|
||||||
|
price, currency: currency || "USD",
|
||||||
|
stockLevel: "unknown",
|
||||||
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
|
||||||
|
});
|
||||||
|
if (isNew) newItems++;
|
||||||
|
total++;
|
||||||
|
} catch { /* skip */ }
|
||||||
|
}
|
||||||
|
if (rows.length > 0) logger.info(`Multimode Inc ${cat.form_factor}: ${rows.length} products`);
|
||||||
|
} catch (e) {
|
||||||
|
logger.warn(`Multimode Inc ${cat.form_factor} failed`, { err: e });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.info(`Multimode Inc done — ${total} total, ${newItems} new`);
|
||||||
|
}
|
||||||
103
packages/scraper/src/scrapers/optictransceiver.ts
Normal file
103
packages/scraper/src/scrapers/optictransceiver.ts
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
/**
|
||||||
|
* OpticTransceiver.com Scraper
|
||||||
|
*
|
||||||
|
* Competitive pricing, very broad form factor coverage:
|
||||||
|
* CSFP, SFP-DD, QSFP56, SFP56, CXP, legacy + modern.
|
||||||
|
* Static HTML, cheerio-friendly.
|
||||||
|
*
|
||||||
|
* Schedule: every 8h
|
||||||
|
*/
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
|
||||||
|
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
|
||||||
|
import { logger } from "../utils/logger";
|
||||||
|
|
||||||
|
const BASE = "https://www.optictransceiver.com";
|
||||||
|
|
||||||
|
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
|
||||||
|
{ path: "/sfp-modules/", form_factor: "SFP" },
|
||||||
|
{ path: "/sfp-plus-transceivers/", form_factor: "SFP+" },
|
||||||
|
{ path: "/sfp28-transceiver/", form_factor: "SFP28" },
|
||||||
|
{ path: "/sfp56-transceiver/", form_factor: "SFP56" },
|
||||||
|
{ path: "/sfp-dd-transceiver/", form_factor: "SFP-DD" },
|
||||||
|
{ path: "/csfp-transceiver/", form_factor: "CSFP" },
|
||||||
|
{ path: "/qsfp-plus-transceiver/", form_factor: "QSFP+" },
|
||||||
|
{ path: "/qsfp28-transceiver/", form_factor: "QSFP28" },
|
||||||
|
{ path: "/qsfp56-transceiver/", form_factor: "QSFP56" },
|
||||||
|
{ path: "/qsfp-dd-transceiver/", form_factor: "QSFP-DD" },
|
||||||
|
{ path: "/qsfp-dd-800-transceiver/", form_factor: "QSFP-DD800" },
|
||||||
|
{ path: "/qsfp112-transceiver/", form_factor: "QSFP112" },
|
||||||
|
{ path: "/osfp-transceiver/", form_factor: "OSFP" },
|
||||||
|
{ path: "/osfp112-transceiver/", form_factor: "OSFP112" },
|
||||||
|
{ path: "/cfp-transceiver/", form_factor: "CFP" },
|
||||||
|
{ path: "/cfp2-transceiver/", form_factor: "CFP2" },
|
||||||
|
{ path: "/xfp-transceiver/", form_factor: "XFP" },
|
||||||
|
{ path: "/cxp-transceiver/", form_factor: "CXP" },
|
||||||
|
{ path: "/gbic-transceiver/", form_factor: "GBIC" },
|
||||||
|
];
|
||||||
|
|
||||||
|
async function scrapeCategory(path: string, form_factor: string, vendorId: string): Promise<number> {
|
||||||
|
let page = 1;
|
||||||
|
let total = 0;
|
||||||
|
|
||||||
|
while (page <= 15) {
|
||||||
|
const url = `${BASE}${path}${page > 1 ? `page/${page}/` : ""}`;
|
||||||
|
try {
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
|
||||||
|
signal: AbortSignal.timeout(25_000),
|
||||||
|
});
|
||||||
|
if (!resp.ok || resp.status === 404) break;
|
||||||
|
const $ = cheerio.load(await resp.text());
|
||||||
|
const items = $(".product, .product-item, article.product");
|
||||||
|
if (items.length === 0) break;
|
||||||
|
|
||||||
|
for (let i = 0; i < items.length; i++) {
|
||||||
|
const $el = $(items[i]);
|
||||||
|
const name = $el.find("h2, h3, .product-title").first().text().trim();
|
||||||
|
const priceText = $el.find(".price, .amount").first().text().trim();
|
||||||
|
const href = ($el.find("a").first().attr("href") || "").trim();
|
||||||
|
if (!name || !href) continue;
|
||||||
|
|
||||||
|
const partMatch = name.match(/([A-Z0-9]{2,8}[\-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/);
|
||||||
|
const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase();
|
||||||
|
const { price, currency } = parsePrice(priceText);
|
||||||
|
if (price <= 0) continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const t = await findOrCreateScrapedTransceiver({
|
||||||
|
partNumber, vendorId, formFactor: form_factor, name,
|
||||||
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
});
|
||||||
|
await upsertPriceObservation({
|
||||||
|
transceiverId: t.id, sourceVendorId: vendorId,
|
||||||
|
price, currency: currency || "USD",
|
||||||
|
stockLevel: "unknown",
|
||||||
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
|
||||||
|
});
|
||||||
|
total++;
|
||||||
|
} catch { /* skip */ }
|
||||||
|
}
|
||||||
|
page++;
|
||||||
|
} catch { break; }
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function scrapeOpticTransceiver(): Promise<void> {
|
||||||
|
logger.info("OpticTransceiver.com scraper starting");
|
||||||
|
const vendorId = await ensureVendor("OpticTransceiver", BASE);
|
||||||
|
let total = 0;
|
||||||
|
|
||||||
|
for (const cat of CATEGORIES) {
|
||||||
|
try {
|
||||||
|
const n = await scrapeCategory(cat.path, cat.form_factor, vendorId);
|
||||||
|
if (n > 0) logger.info(`OpticTransceiver ${cat.form_factor}: ${n} products`);
|
||||||
|
total += n;
|
||||||
|
} catch (e) {
|
||||||
|
logger.warn(`OpticTransceiver ${cat.form_factor} failed`, { err: e });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.info(`OpticTransceiver done — ${total} total`);
|
||||||
|
}
|
||||||
108
packages/scraper/src/scrapers/router-switch.ts
Normal file
108
packages/scraper/src/scrapers/router-switch.ts
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/**
|
||||||
|
* Router-Switch.com Scraper
|
||||||
|
*
|
||||||
|
* Massive catalog of Cisco/Arista/Juniper/HP transceivers including:
|
||||||
|
* CSFP (GLC-BX-D/U), GBIC (WS-G5484), XENPAK, CFP, XFP, legacy SFP
|
||||||
|
* Cheerio-friendly category pages, good price transparency.
|
||||||
|
*
|
||||||
|
* Schedule: every 8h
|
||||||
|
*/
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
|
||||||
|
import { contentHash, parsePrice, parseStockLevel } from "../utils/hash";
|
||||||
|
import { logger } from "../utils/logger";
|
||||||
|
|
||||||
|
const BASE = "https://www.router-switch.com";
|
||||||
|
|
||||||
|
const CATEGORIES: Array<{ url: string; form_factor: string }> = [
|
||||||
|
{ url: "/sfp-modules.html", form_factor: "SFP" },
|
||||||
|
{ url: "/sfp-plus.html", form_factor: "SFP+" },
|
||||||
|
{ url: "/sfp28.html", form_factor: "SFP28" },
|
||||||
|
{ url: "/qsfp-plus.html", form_factor: "QSFP+" },
|
||||||
|
{ url: "/qsfp28.html", form_factor: "QSFP28" },
|
||||||
|
{ url: "/qsfp-dd.html", form_factor: "QSFP-DD" },
|
||||||
|
{ url: "/osfp.html", form_factor: "OSFP" },
|
||||||
|
{ url: "/xfp.html", form_factor: "XFP" },
|
||||||
|
{ url: "/csfp.html", form_factor: "CSFP" },
|
||||||
|
{ url: "/cfp.html", form_factor: "CFP" },
|
||||||
|
{ url: "/cfp2.html", form_factor: "CFP2" },
|
||||||
|
{ url: "/gbic-transceiver.html", form_factor: "GBIC" },
|
||||||
|
{ url: "/xenpak.html", form_factor: "XENPAK" },
|
||||||
|
{ url: "/cxp-transceiver.html", form_factor: "CXP" },
|
||||||
|
];
|
||||||
|
|
||||||
|
async function fetchPage(catUrl: string, form_factor: string, vendorId: string, page = 1): Promise<number> {
|
||||||
|
const sep = catUrl.includes("?") ? "&" : "?";
|
||||||
|
const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`;
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
|
||||||
|
signal: AbortSignal.timeout(30_000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) return 0;
|
||||||
|
const html = await resp.text();
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
let count = 0;
|
||||||
|
|
||||||
|
const items = $(".products-grid .item, .product-item, li.item");
|
||||||
|
for (let i = 0; i < items.length; i++) {
|
||||||
|
const $el = $(items[i]);
|
||||||
|
const name = $el.find(".product-name a, h2.product-name, .product-name").first().text().trim();
|
||||||
|
const priceText = $el.find(".price, .regular-price, .special-price").first().text().trim();
|
||||||
|
const href = $el.find("a[href]").first().attr("href") || "";
|
||||||
|
if (!name || !priceText) continue;
|
||||||
|
|
||||||
|
// Extract part number from name or URL
|
||||||
|
const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/);
|
||||||
|
if (!partMatch) continue;
|
||||||
|
const partNumber = partMatch[1].toUpperCase();
|
||||||
|
const { price, currency } = parsePrice(priceText);
|
||||||
|
if (price <= 0) continue;
|
||||||
|
|
||||||
|
const stockText = $el.find(".availability span, .stock").text().trim();
|
||||||
|
const productUrl = href.startsWith("http") ? href : `${BASE}${href}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const t = await findOrCreateScrapedTransceiver({
|
||||||
|
partNumber,
|
||||||
|
vendorId,
|
||||||
|
formFactor: form_factor,
|
||||||
|
name,
|
||||||
|
url: productUrl,
|
||||||
|
});
|
||||||
|
await upsertPriceObservation({
|
||||||
|
transceiverId: t.id,
|
||||||
|
sourceVendorId: vendorId,
|
||||||
|
price,
|
||||||
|
currency: currency || "USD",
|
||||||
|
stockLevel: parseStockLevel(stockText),
|
||||||
|
url: productUrl,
|
||||||
|
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
|
||||||
|
});
|
||||||
|
count++;
|
||||||
|
} catch { /* skip */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Paginate up to 10 pages
|
||||||
|
const hasNext = $("a.next, .pages a:contains('Next')").length > 0;
|
||||||
|
if (hasNext && count > 0 && page < 10) {
|
||||||
|
count += await fetchPage(catUrl, form_factor, vendorId, page + 1);
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function scrapeRouterSwitch(): Promise<void> {
|
||||||
|
logger.info("Router-Switch.com scraper starting");
|
||||||
|
const vendorId = await ensureVendor("Router-Switch.com", "https://www.router-switch.com");
|
||||||
|
let total = 0;
|
||||||
|
|
||||||
|
for (const cat of CATEGORIES) {
|
||||||
|
try {
|
||||||
|
const n = await fetchPage(cat.url, cat.form_factor, vendorId);
|
||||||
|
if (n > 0) logger.info(`Router-Switch ${cat.form_factor}: ${n} products`);
|
||||||
|
total += n;
|
||||||
|
} catch (e) {
|
||||||
|
logger.warn(`Router-Switch ${cat.form_factor} failed`, { err: e });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.info(`Router-Switch done — ${total} total`);
|
||||||
|
}
|
||||||
90
packages/scraper/src/scrapers/wiitek.ts
Normal file
90
packages/scraper/src/scrapers/wiitek.ts
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
/**
|
||||||
|
* Wiitek Scraper — wiitek.com
|
||||||
|
*
|
||||||
|
* Chinese manufacturer/reseller, direct factory prices.
|
||||||
|
* Covers: CSFP, SFP-DD, CFP, CFP2, OSFP224, QSFP112 and all standard forms.
|
||||||
|
* Good for price benchmarking.
|
||||||
|
*
|
||||||
|
* Schedule: every 8h
|
||||||
|
*/
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
|
||||||
|
import { contentHash, parsePrice } from "../utils/hash";
|
||||||
|
import { logger } from "../utils/logger";
|
||||||
|
|
||||||
|
const BASE = "https://www.wiitek.com";
|
||||||
|
|
||||||
|
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
|
||||||
|
{ path: "/SFP-Transceiver/", form_factor: "SFP" },
|
||||||
|
{ path: "/SFP-Plus-Transceiver/", form_factor: "SFP+" },
|
||||||
|
{ path: "/SFP28-Transceiver/", form_factor: "SFP28" },
|
||||||
|
{ path: "/SFP56-Transceiver/", form_factor: "SFP56" },
|
||||||
|
{ path: "/SFP-DD-Transceiver/", form_factor: "SFP-DD" },
|
||||||
|
{ path: "/CSFP-Transceiver/", form_factor: "CSFP" },
|
||||||
|
{ path: "/QSFP-Transceiver/", form_factor: "QSFP+" },
|
||||||
|
{ path: "/QSFP28-Transceiver/", form_factor: "QSFP28" },
|
||||||
|
{ path: "/QSFP56-Transceiver/", form_factor: "QSFP56" },
|
||||||
|
{ path: "/QSFP-DD-Transceiver/", form_factor: "QSFP-DD" },
|
||||||
|
{ path: "/QSFP-DD800-Transceiver/", form_factor: "QSFP-DD800" },
|
||||||
|
{ path: "/QSFP112-Transceiver/", form_factor: "QSFP112" },
|
||||||
|
{ path: "/OSFP-Transceiver/", form_factor: "OSFP" },
|
||||||
|
{ path: "/OSFP112-Transceiver/", form_factor: "OSFP112" },
|
||||||
|
{ path: "/OSFP224-Transceiver/", form_factor: "OSFP224" },
|
||||||
|
{ path: "/CFP-Transceiver/", form_factor: "CFP" },
|
||||||
|
{ path: "/CFP2-Transceiver/", form_factor: "CFP2" },
|
||||||
|
{ path: "/XFP-Transceiver/", form_factor: "XFP" },
|
||||||
|
{ path: "/GBIC-Transceiver/", form_factor: "GBIC" },
|
||||||
|
{ path: "/XENPAK-Transceiver/", form_factor: "XENPAK" },
|
||||||
|
{ path: "/CXP-Transceiver/", form_factor: "CXP" },
|
||||||
|
];
|
||||||
|
|
||||||
|
export async function scrapeWiitek(): Promise<void> {
|
||||||
|
logger.info("Wiitek scraper starting");
|
||||||
|
const vendorId = await ensureVendor("Wiitek", BASE);
|
||||||
|
let total = 0;
|
||||||
|
let newItems = 0;
|
||||||
|
|
||||||
|
for (const cat of CATEGORIES) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(`${BASE}${cat.path}`, {
|
||||||
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
|
||||||
|
signal: AbortSignal.timeout(20_000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) continue;
|
||||||
|
const $ = cheerio.load(await resp.text());
|
||||||
|
|
||||||
|
const items = $(".product-item, .goods-item, .pro-item, [class*=product]");
|
||||||
|
for (let i = 0; i < items.length; i++) {
|
||||||
|
const $el = $(items[i]);
|
||||||
|
const name = $el.find("h2,h3,h4,.title,.name").first().text().trim();
|
||||||
|
const priceText = $el.find(".price,.cost,[class*=price]").first().text().trim();
|
||||||
|
const href = $el.find("a[href]").first().attr("href") || "";
|
||||||
|
if (!name || !priceText) continue;
|
||||||
|
|
||||||
|
const partMatch = name.match(/([A-Z0-9]{2,8}[-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/);
|
||||||
|
const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase();
|
||||||
|
const { price, currency } = parsePrice(priceText);
|
||||||
|
if (price <= 0) continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const t = await findOrCreateScrapedTransceiver({
|
||||||
|
partNumber, vendorId, formFactor: cat.form_factor, name,
|
||||||
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
});
|
||||||
|
const isNew = await upsertPriceObservation({
|
||||||
|
transceiverId: t.id, sourceVendorId: vendorId,
|
||||||
|
price, currency: currency || "USD",
|
||||||
|
stockLevel: "unknown",
|
||||||
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
||||||
|
contentHash: contentHash(`${partNumber}:${price}:${currency}`),
|
||||||
|
});
|
||||||
|
if (isNew) newItems++;
|
||||||
|
total++;
|
||||||
|
} catch { /* skip */ }
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
logger.warn(`Wiitek ${cat.form_factor} failed`, { err: e });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.info(`Wiitek done — ${total} total, ${newItems} new`);
|
||||||
|
}
|
||||||
@ -13,6 +13,9 @@ export const pool = new Pool({
|
|||||||
max: 10,
|
max: 10,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Alias — some scrapers import { db } instead of { pool }
|
||||||
|
export const db = pool;
|
||||||
|
|
||||||
export async function upsertPriceObservation(params: {
|
export async function upsertPriceObservation(params: {
|
||||||
transceiverId: string;
|
transceiverId: string;
|
||||||
sourceVendorId: string;
|
sourceVendorId: string;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user