From f146ac873ea5c21de38fb5d2af6d31e3ca4cd909 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Thu, 2 Apr 2026 08:39:17 +0200 Subject: [PATCH] feat: add 5 form-factor coverage scrapers with worker registrations Add Comms-Express, Router-Switch.com, Multimode Inc, OpticTransceiver.com, and Wiitek scrapers covering CFP2-DCO, CFP4, OSFP224, QSFP112, CXP, GBIC, XENPAK, CSFP, SFP-DD, SFP56, QSFP56 and other previously-uncovered form factors. Each scheduled every 8h. Worker registrations added to scheduler. Also export db alias in utils/db.ts to fix eBay enricher + community scrapers crashing with 'Cannot read properties of undefined (reading query)'. --- packages/scraper/src/scheduler.ts | 54 ++++++- .../scraper/src/scrapers/comms-express.ts | 134 ++++++++++++++++++ .../scraper/src/scrapers/multimode-inc.ts | 82 +++++++++++ .../scraper/src/scrapers/optictransceiver.ts | 103 ++++++++++++++ .../scraper/src/scrapers/router-switch.ts | 108 ++++++++++++++ packages/scraper/src/scrapers/wiitek.ts | 90 ++++++++++++ packages/scraper/src/utils/db.ts | 3 + 7 files changed, 571 insertions(+), 3 deletions(-) create mode 100644 packages/scraper/src/scrapers/comms-express.ts create mode 100644 packages/scraper/src/scrapers/multimode-inc.ts create mode 100644 packages/scraper/src/scrapers/optictransceiver.ts create mode 100644 packages/scraper/src/scrapers/router-switch.ts create mode 100644 packages/scraper/src/scrapers/wiitek.ts diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 4388d22..9e6cee4 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -110,6 +110,12 @@ export async function registerSchedules(boss: PgBoss): Promise { // ── Compute (every 4h, after pricing waves) ─────────────────────── "compute:abc", "compute:reorder-signals", + // ── New form-factor coverage scrapers (every 8h) ────────────────── + "scrape:pricing:comms-express", + "scrape:pricing:router-switch", + "scrape:pricing:multimode-inc", + "scrape:pricing:optictransceiver", + "scrape:pricing:wiitek", // ── Prediction Signal Scrapers (new) ────────────────────────────── "scrape:signals:sec-edgar", "scrape:signals:github", @@ -190,8 +196,18 @@ export async function registerSchedules(boss: PgBoss): Promise { // MANUFACTURER CATALOGS — every 8h (product data, no prices) // ══════════════════════════════════════════════════════════════════════ - await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); - await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + + // ══════════════════════════════════════════════════════════════════════ + // FORM-FACTOR COVERAGE SCRAPERS — every 8h (CFP, CSFP, SFP-DD, legacy) + // ══════════════════════════════════════════════════════════════════════ + + await boss.schedule("scrape:pricing:comms-express", "40 2,10,18 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); + await boss.schedule("scrape:pricing:router-switch", "0 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 5400 }); + await boss.schedule("scrape:pricing:multimode-inc", "20 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:optictransceiver", "45 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:pricing:wiitek", "5 4,12,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // VENDOR LISTS — every 12h @@ -561,5 +577,37 @@ export async function registerWorkers(boss: PgBoss): Promise { await runForecastEngine(); }); - console.log("All workers registered (50 jobs, 24/7 continuous)"); + // ── Form-factor coverage scrapers ───────────────────────────────────── + + await boss.work("scrape:pricing:comms-express", async () => { + console.log(`[${new Date().toISOString()}] Running: Comms-Express pricing`); + const { scrapeCommsExpress } = await import("./scrapers/comms-express"); + await scrapeCommsExpress(); + }); + + await boss.work("scrape:pricing:router-switch", async () => { + console.log(`[${new Date().toISOString()}] Running: Router-Switch.com pricing`); + const { scrapeRouterSwitch } = await import("./scrapers/router-switch"); + await scrapeRouterSwitch(); + }); + + await boss.work("scrape:pricing:multimode-inc", async () => { + console.log(`[${new Date().toISOString()}] Running: Multimode Inc pricing`); + const { scrapeMultimodeInc } = await import("./scrapers/multimode-inc"); + await scrapeMultimodeInc(); + }); + + await boss.work("scrape:pricing:optictransceiver", async () => { + console.log(`[${new Date().toISOString()}] Running: OpticTransceiver.com pricing`); + const { scrapeOpticTransceiver } = await import("./scrapers/optictransceiver"); + await scrapeOpticTransceiver(); + }); + + await boss.work("scrape:pricing:wiitek", async () => { + console.log(`[${new Date().toISOString()}] Running: Wiitek pricing`); + const { scrapeWiitek } = await import("./scrapers/wiitek"); + await scrapeWiitek(); + }); + + console.log("All workers registered (55 jobs, 24/7 continuous)"); } diff --git a/packages/scraper/src/scrapers/comms-express.ts b/packages/scraper/src/scrapers/comms-express.ts new file mode 100644 index 0000000..b8aed74 --- /dev/null +++ b/packages/scraper/src/scrapers/comms-express.ts @@ -0,0 +1,134 @@ +/** + * Comms Express Scraper — comms-express.com + * + * UK-based reseller with excellent coverage of: + * CFP, CFP2, CFP4, CFP2-DCO, CSFP, SFP-DD, QSFP112, OSFP, legacy GBIC/XENPAK/CXP + * Good pricing transparency, cheerio-friendly. + * + * Schedule: every 8h + */ +import * as cheerio from "cheerio"; +import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db"; +import { contentHash, parsePrice, parseStockLevel } from "../utils/hash"; +import { logger } from "../utils/logger"; + +const BASE = "https://www.comms-express.com"; + +// Categories mapped to form_factor +const CATEGORIES: Array<{ url: string; form_factor: string; speed_hint?: string }> = [ + { url: "/transceivers/sfp-modules/", form_factor: "SFP" }, + { url: "/transceivers/sfp-plus-modules/", form_factor: "SFP+" }, + { url: "/transceivers/sfp28-modules/", form_factor: "SFP28" }, + { url: "/transceivers/sfp56-modules/", form_factor: "SFP56" }, + { url: "/transceivers/sfp-dd-modules/", form_factor: "SFP-DD" }, + { url: "/transceivers/qsfp-modules/", form_factor: "QSFP+" }, + { url: "/transceivers/qsfp28-modules/", form_factor: "QSFP28" }, + { url: "/transceivers/qsfp56-modules/", form_factor: "QSFP56" }, + { url: "/transceivers/qsfp-dd-modules/", form_factor: "QSFP-DD" }, + { url: "/transceivers/qsfp-dd-800-modules/", form_factor: "QSFP-DD800" }, + { url: "/transceivers/qsfp112-modules/", form_factor: "QSFP112" }, + { url: "/transceivers/osfp-modules/", form_factor: "OSFP" }, + { url: "/transceivers/osfp112-modules/", form_factor: "OSFP112" }, + { url: "/transceivers/cfp-modules/", form_factor: "CFP" }, + { url: "/transceivers/cfp2-modules/", form_factor: "CFP2" }, + { url: "/transceivers/cfp2-dco-modules/", form_factor: "CFP2-DCO" }, + { url: "/transceivers/cfp4-modules/", form_factor: "CFP4" }, + { url: "/transceivers/csfp-modules/", form_factor: "CSFP" }, + { url: "/transceivers/xfp-modules/", form_factor: "XFP" }, + { url: "/transceivers/cxp-modules/", form_factor: "CXP" }, + { url: "/transceivers/gbic-modules/", form_factor: "GBIC" }, + { url: "/transceivers/xenpak-modules/", form_factor: "XENPAK" }, +]; + +interface Product { + partNumber: string; + name: string; + price: number; + currency: string; + stock: string; + url: string; + formFactor: string; +} + +async function fetchCategory(cat: typeof CATEGORIES[0], vendorId: string, page = 1): Promise { + const url = `${BASE}${cat.url}?page=${page}`; + const resp = await fetch(url, { + headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0; +https://tip.context-x.org)" }, + signal: AbortSignal.timeout(30_000), + }); + if (!resp.ok) return []; + const html = await resp.text(); + const $ = cheerio.load(html); + const products: Product[] = []; + + $(".product-item, .product-list-item, [data-product-id]").each((_, el) => { + const $el = $(el); + const name = $el.find(".product-name, .product-title, h3, h4").first().text().trim(); + const priceText = $el.find(".price, .product-price, [class*=price]").first().text().trim(); + const href = $el.find("a[href*='/transceivers/']").first().attr("href") || ""; + const partMatch = name.match(/[A-Z0-9]{2,4}-[A-Z0-9][A-Z0-9\-\.]+/) || + href.match(/\/([A-Z0-9\-]{6,30})\/?$/i); + const partNumber = partMatch ? partMatch[1] || partMatch[0] : name.substring(0, 40); + const { price, currency } = parsePrice(priceText); + const stockText = $el.find(".stock, .availability, [class*=stock]").first().text().trim(); + + if (!partNumber || price <= 0) return; + products.push({ + partNumber: partNumber.toUpperCase(), + name, + price, + currency: currency || "GBP", + stock: parseStockLevel(stockText), + url: href.startsWith("http") ? href : `${BASE}${href}`, + formFactor: cat.form_factor, + }); + }); + + // Check for next page + const hasNext = $(".pagination .next, a[rel=next]").length > 0; + if (hasNext && products.length > 0 && page < 20) { + const next = await fetchCategory(cat, vendorId, page + 1); + products.push(...next); + } + return products; +} + +export async function scrapeCommsExpress(): Promise { + logger.info("Comms Express scraper starting"); + const vendorId = await ensureVendor("Comms Express", "https://www.comms-express.com"); + let total = 0; + let newItems = 0; + + for (const cat of CATEGORIES) { + try { + const products = await fetchCategory(cat, vendorId); + for (const p of products) { + const transceiverResult = await findOrCreateScrapedTransceiver({ + partNumber: p.partNumber, + vendorId, + formFactor: p.formFactor, + name: p.name, + url: p.url, + }); + const hash = contentHash(`${p.partNumber}:${p.price}:${p.currency}`); + const isNew = await upsertPriceObservation({ + transceiverId: transceiverResult.id, + sourceVendorId: vendorId, + price: p.price, + currency: p.currency, + stockLevel: p.stock, + url: p.url, + contentHash: hash, + }); + if (isNew) newItems++; + total++; + } + if (products.length > 0) { + logger.info(`Comms Express ${cat.form_factor}: ${products.length} products`); + } + } catch (e) { + logger.warn(`Comms Express ${cat.form_factor} failed`, { err: e }); + } + } + logger.info(`Comms Express done — ${total} total, ${newItems} new`); +} diff --git a/packages/scraper/src/scrapers/multimode-inc.ts b/packages/scraper/src/scrapers/multimode-inc.ts new file mode 100644 index 0000000..6115028 --- /dev/null +++ b/packages/scraper/src/scrapers/multimode-inc.ts @@ -0,0 +1,82 @@ +/** + * MultiMode Inc Scraper — multimode-inc.com + * + * Specialist for high-speed coherent transceivers: + * CFP, CFP2, CFP2-DCO, CFP4, QSFP112, OSFP112, OSFP224 + * Plus broad 400G/800G coverage. + * + * Schedule: every 8h + */ +import * as cheerio from "cheerio"; +import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db"; +import { contentHash, parsePrice, parseStockLevel } from "../utils/hash"; +import { logger } from "../utils/logger"; + +const BASE = "https://www.multimode.com"; + +const CATEGORIES: Array<{ path: string; form_factor: string }> = [ + { path: "/cfp-transceivers/", form_factor: "CFP" }, + { path: "/cfp2-transceivers/", form_factor: "CFP2" }, + { path: "/cfp2-dco/", form_factor: "CFP2-DCO" }, + { path: "/cfp4-transceivers/", form_factor: "CFP4" }, + { path: "/osfp-transceivers/", form_factor: "OSFP" }, + { path: "/osfp112/", form_factor: "OSFP112" }, + { path: "/osfp224/", form_factor: "OSFP224" }, + { path: "/qsfp112/", form_factor: "QSFP112" }, + { path: "/qsfp-dd-800/", form_factor: "QSFP-DD800" }, + { path: "/qsfp-dd/", form_factor: "QSFP-DD" }, + { path: "/sfp-dd/", form_factor: "SFP-DD" }, + { path: "/qsfp28-transceivers/", form_factor: "QSFP28" }, +]; + +export async function scrapeMultimodeInc(): Promise { + logger.info("Multimode Inc scraper starting"); + const vendorId = await ensureVendor("Multimode Inc", BASE); + let total = 0; + let newItems = 0; + + for (const cat of CATEGORIES) { + try { + const resp = await fetch(`${BASE}${cat.path}`, { + headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" }, + signal: AbortSignal.timeout(25_000), + }); + if (!resp.ok) continue; + const $ = cheerio.load(await resp.text()); + + const rows = $(".product, .woocommerce-loop-product__link, article.product"); + for (let i = 0; i < rows.length; i++) { + const $el = $(rows[i]); + const name = $el.find(".woocommerce-loop-product__title, h2, h3").first().text().trim(); + const priceText = $el.find(".price, .woocommerce-Price-amount").first().text().trim(); + const href = $el.find("a").first().attr("href") || $el.closest("a").attr("href") || ""; + if (!name) continue; + + const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/); + const partNumber = partMatch ? partMatch[1].toUpperCase() : name.substring(0, 50); + const { price, currency } = parsePrice(priceText); + if (price <= 0) continue; + + try { + const t = await findOrCreateScrapedTransceiver({ + partNumber, vendorId, formFactor: cat.form_factor, name, + url: href.startsWith("http") ? href : `${BASE}${href}`, + }); + const isNew = await upsertPriceObservation({ + transceiverId: t.id, sourceVendorId: vendorId, + price, currency: currency || "USD", + stockLevel: "unknown", + url: href.startsWith("http") ? href : `${BASE}${href}`, + contentHash: contentHash(`${partNumber}:${price}:${currency}`), + }); + if (isNew) newItems++; + total++; + } catch { /* skip */ } + } + if (rows.length > 0) logger.info(`Multimode Inc ${cat.form_factor}: ${rows.length} products`); + } catch (e) { + logger.warn(`Multimode Inc ${cat.form_factor} failed`, { err: e }); + } + } + logger.info(`Multimode Inc done — ${total} total, ${newItems} new`); +} diff --git a/packages/scraper/src/scrapers/optictransceiver.ts b/packages/scraper/src/scrapers/optictransceiver.ts new file mode 100644 index 0000000..82e705d --- /dev/null +++ b/packages/scraper/src/scrapers/optictransceiver.ts @@ -0,0 +1,103 @@ +/** + * OpticTransceiver.com Scraper + * + * Competitive pricing, very broad form factor coverage: + * CSFP, SFP-DD, QSFP56, SFP56, CXP, legacy + modern. + * Static HTML, cheerio-friendly. + * + * Schedule: every 8h + */ +import * as cheerio from "cheerio"; +import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db"; +import { contentHash, parsePrice, parseStockLevel } from "../utils/hash"; +import { logger } from "../utils/logger"; + +const BASE = "https://www.optictransceiver.com"; + +const CATEGORIES: Array<{ path: string; form_factor: string }> = [ + { path: "/sfp-modules/", form_factor: "SFP" }, + { path: "/sfp-plus-transceivers/", form_factor: "SFP+" }, + { path: "/sfp28-transceiver/", form_factor: "SFP28" }, + { path: "/sfp56-transceiver/", form_factor: "SFP56" }, + { path: "/sfp-dd-transceiver/", form_factor: "SFP-DD" }, + { path: "/csfp-transceiver/", form_factor: "CSFP" }, + { path: "/qsfp-plus-transceiver/", form_factor: "QSFP+" }, + { path: "/qsfp28-transceiver/", form_factor: "QSFP28" }, + { path: "/qsfp56-transceiver/", form_factor: "QSFP56" }, + { path: "/qsfp-dd-transceiver/", form_factor: "QSFP-DD" }, + { path: "/qsfp-dd-800-transceiver/", form_factor: "QSFP-DD800" }, + { path: "/qsfp112-transceiver/", form_factor: "QSFP112" }, + { path: "/osfp-transceiver/", form_factor: "OSFP" }, + { path: "/osfp112-transceiver/", form_factor: "OSFP112" }, + { path: "/cfp-transceiver/", form_factor: "CFP" }, + { path: "/cfp2-transceiver/", form_factor: "CFP2" }, + { path: "/xfp-transceiver/", form_factor: "XFP" }, + { path: "/cxp-transceiver/", form_factor: "CXP" }, + { path: "/gbic-transceiver/", form_factor: "GBIC" }, +]; + +async function scrapeCategory(path: string, form_factor: string, vendorId: string): Promise { + let page = 1; + let total = 0; + + while (page <= 15) { + const url = `${BASE}${path}${page > 1 ? `page/${page}/` : ""}`; + try { + const resp = await fetch(url, { + headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" }, + signal: AbortSignal.timeout(25_000), + }); + if (!resp.ok || resp.status === 404) break; + const $ = cheerio.load(await resp.text()); + const items = $(".product, .product-item, article.product"); + if (items.length === 0) break; + + for (let i = 0; i < items.length; i++) { + const $el = $(items[i]); + const name = $el.find("h2, h3, .product-title").first().text().trim(); + const priceText = $el.find(".price, .amount").first().text().trim(); + const href = ($el.find("a").first().attr("href") || "").trim(); + if (!name || !href) continue; + + const partMatch = name.match(/([A-Z0-9]{2,8}[\-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/); + const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase(); + const { price, currency } = parsePrice(priceText); + if (price <= 0) continue; + + try { + const t = await findOrCreateScrapedTransceiver({ + partNumber, vendorId, formFactor: form_factor, name, + url: href.startsWith("http") ? href : `${BASE}${href}`, + }); + await upsertPriceObservation({ + transceiverId: t.id, sourceVendorId: vendorId, + price, currency: currency || "USD", + stockLevel: "unknown", + url: href.startsWith("http") ? href : `${BASE}${href}`, + contentHash: contentHash(`${partNumber}:${price}:${currency}`), + }); + total++; + } catch { /* skip */ } + } + page++; + } catch { break; } + } + return total; +} + +export async function scrapeOpticTransceiver(): Promise { + logger.info("OpticTransceiver.com scraper starting"); + const vendorId = await ensureVendor("OpticTransceiver", BASE); + let total = 0; + + for (const cat of CATEGORIES) { + try { + const n = await scrapeCategory(cat.path, cat.form_factor, vendorId); + if (n > 0) logger.info(`OpticTransceiver ${cat.form_factor}: ${n} products`); + total += n; + } catch (e) { + logger.warn(`OpticTransceiver ${cat.form_factor} failed`, { err: e }); + } + } + logger.info(`OpticTransceiver done — ${total} total`); +} diff --git a/packages/scraper/src/scrapers/router-switch.ts b/packages/scraper/src/scrapers/router-switch.ts new file mode 100644 index 0000000..25e0d76 --- /dev/null +++ b/packages/scraper/src/scrapers/router-switch.ts @@ -0,0 +1,108 @@ +/** + * Router-Switch.com Scraper + * + * Massive catalog of Cisco/Arista/Juniper/HP transceivers including: + * CSFP (GLC-BX-D/U), GBIC (WS-G5484), XENPAK, CFP, XFP, legacy SFP + * Cheerio-friendly category pages, good price transparency. + * + * Schedule: every 8h + */ +import * as cheerio from "cheerio"; +import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db"; +import { contentHash, parsePrice, parseStockLevel } from "../utils/hash"; +import { logger } from "../utils/logger"; + +const BASE = "https://www.router-switch.com"; + +const CATEGORIES: Array<{ url: string; form_factor: string }> = [ + { url: "/sfp-modules.html", form_factor: "SFP" }, + { url: "/sfp-plus.html", form_factor: "SFP+" }, + { url: "/sfp28.html", form_factor: "SFP28" }, + { url: "/qsfp-plus.html", form_factor: "QSFP+" }, + { url: "/qsfp28.html", form_factor: "QSFP28" }, + { url: "/qsfp-dd.html", form_factor: "QSFP-DD" }, + { url: "/osfp.html", form_factor: "OSFP" }, + { url: "/xfp.html", form_factor: "XFP" }, + { url: "/csfp.html", form_factor: "CSFP" }, + { url: "/cfp.html", form_factor: "CFP" }, + { url: "/cfp2.html", form_factor: "CFP2" }, + { url: "/gbic-transceiver.html", form_factor: "GBIC" }, + { url: "/xenpak.html", form_factor: "XENPAK" }, + { url: "/cxp-transceiver.html", form_factor: "CXP" }, +]; + +async function fetchPage(catUrl: string, form_factor: string, vendorId: string, page = 1): Promise { + const sep = catUrl.includes("?") ? "&" : "?"; + const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`; + const resp = await fetch(url, { + headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" }, + signal: AbortSignal.timeout(30_000), + }); + if (!resp.ok) return 0; + const html = await resp.text(); + const $ = cheerio.load(html); + let count = 0; + + const items = $(".products-grid .item, .product-item, li.item"); + for (let i = 0; i < items.length; i++) { + const $el = $(items[i]); + const name = $el.find(".product-name a, h2.product-name, .product-name").first().text().trim(); + const priceText = $el.find(".price, .regular-price, .special-price").first().text().trim(); + const href = $el.find("a[href]").first().attr("href") || ""; + if (!name || !priceText) continue; + + // Extract part number from name or URL + const partMatch = name.match(/([A-Z0-9]{2,8}-[A-Z0-9][A-Z0-9\-\/\.]{3,30})/); + if (!partMatch) continue; + const partNumber = partMatch[1].toUpperCase(); + const { price, currency } = parsePrice(priceText); + if (price <= 0) continue; + + const stockText = $el.find(".availability span, .stock").text().trim(); + const productUrl = href.startsWith("http") ? href : `${BASE}${href}`; + + try { + const t = await findOrCreateScrapedTransceiver({ + partNumber, + vendorId, + formFactor: form_factor, + name, + url: productUrl, + }); + await upsertPriceObservation({ + transceiverId: t.id, + sourceVendorId: vendorId, + price, + currency: currency || "USD", + stockLevel: parseStockLevel(stockText), + url: productUrl, + contentHash: contentHash(`${partNumber}:${price}:${currency}`), + }); + count++; + } catch { /* skip */ } + } + + // Paginate up to 10 pages + const hasNext = $("a.next, .pages a:contains('Next')").length > 0; + if (hasNext && count > 0 && page < 10) { + count += await fetchPage(catUrl, form_factor, vendorId, page + 1); + } + return count; +} + +export async function scrapeRouterSwitch(): Promise { + logger.info("Router-Switch.com scraper starting"); + const vendorId = await ensureVendor("Router-Switch.com", "https://www.router-switch.com"); + let total = 0; + + for (const cat of CATEGORIES) { + try { + const n = await fetchPage(cat.url, cat.form_factor, vendorId); + if (n > 0) logger.info(`Router-Switch ${cat.form_factor}: ${n} products`); + total += n; + } catch (e) { + logger.warn(`Router-Switch ${cat.form_factor} failed`, { err: e }); + } + } + logger.info(`Router-Switch done — ${total} total`); +} diff --git a/packages/scraper/src/scrapers/wiitek.ts b/packages/scraper/src/scrapers/wiitek.ts new file mode 100644 index 0000000..5bc32c4 --- /dev/null +++ b/packages/scraper/src/scrapers/wiitek.ts @@ -0,0 +1,90 @@ +/** + * Wiitek Scraper — wiitek.com + * + * Chinese manufacturer/reseller, direct factory prices. + * Covers: CSFP, SFP-DD, CFP, CFP2, OSFP224, QSFP112 and all standard forms. + * Good for price benchmarking. + * + * Schedule: every 8h + */ +import * as cheerio from "cheerio"; +import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db"; +import { contentHash, parsePrice } from "../utils/hash"; +import { logger } from "../utils/logger"; + +const BASE = "https://www.wiitek.com"; + +const CATEGORIES: Array<{ path: string; form_factor: string }> = [ + { path: "/SFP-Transceiver/", form_factor: "SFP" }, + { path: "/SFP-Plus-Transceiver/", form_factor: "SFP+" }, + { path: "/SFP28-Transceiver/", form_factor: "SFP28" }, + { path: "/SFP56-Transceiver/", form_factor: "SFP56" }, + { path: "/SFP-DD-Transceiver/", form_factor: "SFP-DD" }, + { path: "/CSFP-Transceiver/", form_factor: "CSFP" }, + { path: "/QSFP-Transceiver/", form_factor: "QSFP+" }, + { path: "/QSFP28-Transceiver/", form_factor: "QSFP28" }, + { path: "/QSFP56-Transceiver/", form_factor: "QSFP56" }, + { path: "/QSFP-DD-Transceiver/", form_factor: "QSFP-DD" }, + { path: "/QSFP-DD800-Transceiver/", form_factor: "QSFP-DD800" }, + { path: "/QSFP112-Transceiver/", form_factor: "QSFP112" }, + { path: "/OSFP-Transceiver/", form_factor: "OSFP" }, + { path: "/OSFP112-Transceiver/", form_factor: "OSFP112" }, + { path: "/OSFP224-Transceiver/", form_factor: "OSFP224" }, + { path: "/CFP-Transceiver/", form_factor: "CFP" }, + { path: "/CFP2-Transceiver/", form_factor: "CFP2" }, + { path: "/XFP-Transceiver/", form_factor: "XFP" }, + { path: "/GBIC-Transceiver/", form_factor: "GBIC" }, + { path: "/XENPAK-Transceiver/", form_factor: "XENPAK" }, + { path: "/CXP-Transceiver/", form_factor: "CXP" }, +]; + +export async function scrapeWiitek(): Promise { + logger.info("Wiitek scraper starting"); + const vendorId = await ensureVendor("Wiitek", BASE); + let total = 0; + let newItems = 0; + + for (const cat of CATEGORIES) { + try { + const resp = await fetch(`${BASE}${cat.path}`, { + headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" }, + signal: AbortSignal.timeout(20_000), + }); + if (!resp.ok) continue; + const $ = cheerio.load(await resp.text()); + + const items = $(".product-item, .goods-item, .pro-item, [class*=product]"); + for (let i = 0; i < items.length; i++) { + const $el = $(items[i]); + const name = $el.find("h2,h3,h4,.title,.name").first().text().trim(); + const priceText = $el.find(".price,.cost,[class*=price]").first().text().trim(); + const href = $el.find("a[href]").first().attr("href") || ""; + if (!name || !priceText) continue; + + const partMatch = name.match(/([A-Z0-9]{2,8}[-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/); + const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase(); + const { price, currency } = parsePrice(priceText); + if (price <= 0) continue; + + try { + const t = await findOrCreateScrapedTransceiver({ + partNumber, vendorId, formFactor: cat.form_factor, name, + url: href.startsWith("http") ? href : `${BASE}${href}`, + }); + const isNew = await upsertPriceObservation({ + transceiverId: t.id, sourceVendorId: vendorId, + price, currency: currency || "USD", + stockLevel: "unknown", + url: href.startsWith("http") ? href : `${BASE}${href}`, + contentHash: contentHash(`${partNumber}:${price}:${currency}`), + }); + if (isNew) newItems++; + total++; + } catch { /* skip */ } + } + } catch (e) { + logger.warn(`Wiitek ${cat.form_factor} failed`, { err: e }); + } + } + logger.info(`Wiitek done — ${total} total, ${newItems} new`); +} diff --git a/packages/scraper/src/utils/db.ts b/packages/scraper/src/utils/db.ts index 1a95426..fb813b8 100644 --- a/packages/scraper/src/utils/db.ts +++ b/packages/scraper/src/utils/db.ts @@ -13,6 +13,9 @@ export const pool = new Pool({ max: 10, }); +// Alias — some scrapers import { db } instead of { pool } +export const db = pool; + export async function upsertPriceObservation(params: { transceiverId: string; sourceVendorId: string;