feat: add NADDOD, QSFPTEK, and AddOn Networks scrapers

Three new fetch-based price scrapers for compatible optics vendors:
- NADDOD: WooCommerce, USD, ~800+ SKUs
- QSFPTEK: Custom PHP shop, USD, ~1000+ SKUs
- AddOn Networks: Magento/custom, USD, ~2500 SKUs

All registered in scheduler (8-12h intervals) and index.ts --flags.
Build: 0 TypeScript errors.
This commit is contained in:
Rene Fichtmueller 2026-03-30 21:20:23 +02:00
parent fcddd1f27b
commit 2348238888
5 changed files with 924 additions and 1 deletions

View File

@ -27,6 +27,9 @@
* tsx src/index.ts --switch-crawl-pw Crawl switch assets (Playwright, JS-heavy vendors)
* tsx src/index.ts --fetch-only Run only fetch-based scrapers (no Playwright)
* tsx src/index.ts --atgbics Run ATGBICS scraper once
* tsx src/index.ts --naddod Run NADDOD scraper once
* tsx src/index.ts --qsfptek Run QSFPTEK scraper once
* tsx src/index.ts --addon Run AddOn Networks scraper once
*/
import { createScheduler, registerSchedules, registerWorkers } from "./scheduler";
import { scrapeFs } from "./scrapers/fs-com";
@ -54,6 +57,9 @@ import { crawlSwitchAssets } from "./scrapers/switch-assets-crawler";
import { crawlSwitchAssetsPlaywright } from "./scrapers/switch-assets-playwright";
import { scrapeAtgbics } from "./scrapers/atgbics";
import { scrapeProLabs } from "./scrapers/prolabs";
import { scrapeNaddod } from "./scrapers/naddod";
import { scrapeQsfptek } from "./scrapers/qsfptek";
import { scrapeAddonNetworks } from "./scrapers/addon-networks";
import { pool } from "./utils/db";
const args = process.argv.slice(2);
@ -86,6 +92,15 @@ async function runOnce(): Promise<void> {
if (args.includes("--prolabs") || isAll || isFetchOnly) {
await scrapeProLabs();
}
if (args.includes("--naddod") || isAll || isFetchOnly) {
await scrapeNaddod();
}
if (args.includes("--qsfptek") || isAll || isFetchOnly) {
await scrapeQsfptek();
}
if (args.includes("--addon") || isAll || isFetchOnly) {
await scrapeAddonNetworks();
}
if (args.includes("--juniper") || isAll || isFetchOnly) {
await scrapeJuniperHct();
}
@ -172,7 +187,7 @@ async function runScheduler(): Promise<void> {
process.on("SIGTERM", shutdown);
}
const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics"];
const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--naddod", "--qsfptek", "--addon", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics"];
if (args.some((a) => ALL_FLAGS.includes(a))) {
runOnce().catch((err) => {

View File

@ -61,6 +61,9 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
"scrape:pricing:10gtek",
"scrape:pricing:atgbics",
"scrape:pricing:prolabs",
"scrape:pricing:naddod",
"scrape:pricing:qsfptek",
"scrape:pricing:addon",
"scrape:compat:cisco",
"scrape:pricing:flexoptix",
"scrape:vendors:flexoptix",
@ -120,6 +123,24 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
expireInSeconds: 3600,
});
// NADDOD pricing (every 8 hours — WooCommerce, USD prices)
await boss.schedule("scrape:pricing:naddod", "0 5/8 * * *", {}, {
retryLimit: 2,
expireInSeconds: 3600,
});
// QSFPTEK pricing (every 10 hours — custom PHP shop, USD prices)
await boss.schedule("scrape:pricing:qsfptek", "0 3/10 * * *", {}, {
retryLimit: 2,
expireInSeconds: 3600,
});
// AddOn Networks pricing (every 12 hours — enterprise site, USD prices)
await boss.schedule("scrape:pricing:addon", "0 6/12 * * *", {}, {
retryLimit: 2,
expireInSeconds: 3600,
});
// Flexoptix catalog (every 6 hours — fetch-based, fast)
await boss.schedule("scrape:pricing:flexoptix", "0 1/6 * * *", {}, {
retryLimit: 2,
@ -152,6 +173,9 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
const { scrapeNews } = await import("./scrapers/news");
const { scrapeAtgbics } = await import("./scrapers/atgbics");
const { scrapeProLabs } = await import("./scrapers/prolabs");
const { scrapeNaddod } = await import("./scrapers/naddod");
const { scrapeQsfptek } = await import("./scrapers/qsfptek");
const { scrapeAddonNetworks } = await import("./scrapers/addon-networks");
await boss.work("scrape:pricing:fs", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
@ -198,6 +222,21 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await withIsolatedStorage("prolabs", scrapeProLabs);
});
await boss.work("scrape:pricing:naddod", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: NADDOD pricing`);
await scrapeNaddod();
});
await boss.work("scrape:pricing:qsfptek", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: QSFPTEK pricing`);
await scrapeQsfptek();
});
await boss.work("scrape:pricing:addon", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: AddOn Networks pricing`);
await scrapeAddonNetworks();
});
await boss.work("scrape:faq", async (_job) => {
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
});

View File

@ -0,0 +1,303 @@
/**
* AddOn Networks Scraper US-based compatible optics vendor
*
* addnetworks.com Enterprise-grade compatible transceivers.
* Products browseable under /products/ category pages.
* Pricing is public in USD. Rate limited: 1 req/2sec.
*
* AddOn Networks (AddOn Computer Products) specializes in OEM-compatible
* optics for Cisco, Juniper, Arista, HPE, and Dell environments.
* ~2500 SKUs, strong US channel presence.
*/
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
import { contentHash } from "../utils/hash";
const BASE = "https://www.addnetworks.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
};
const MAX_PAGES = 50;
// AddOn uses "compatible" suffix naming (e.g. "ADD-XSSFP10GE-LR-AO")
// Categories follow standard form-factor taxonomy
const CATEGORIES = [
{ path: "/products/networking/optical-networking/sfp/", formFactor: "SFP", speed: "1G", speedGbps: 1 },
{ path: "/products/networking/optical-networking/sfp-plus/", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
{ path: "/products/networking/optical-networking/sfp28/", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
{ path: "/products/networking/optical-networking/qsfp-plus/", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
{ path: "/products/networking/optical-networking/qsfp28/", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
{ path: "/products/networking/optical-networking/qsfp-dd/", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
// Broader category fallback
{ path: "/products/networking/optical-networking/", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
];
interface Product {
partNumber: string;
name: string;
url: string;
price?: number;
formFactor: string;
speed: string;
speedGbps: number;
reachLabel?: string;
reachMeters?: number;
fiberType?: string;
wavelength?: string;
compatibleWith?: string;
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function detectReach(text: string): { label: string; meters: number } | undefined {
const patterns: [RegExp, string, number][] = [
[/\b120\s*km\b/i, "120km", 120000],
[/\b80\s*km\b/i, "80km", 80000],
[/\b40\s*km\b/i, "40km", 40000],
[/\b20\s*km\b/i, "20km", 20000],
[/\b10\s*km\b/i, "10km", 10000],
[/\b2\s*km\b/i, "2km", 2000],
[/\b550\s*m\b/i, "550m", 550],
[/\b500\s*m\b/i, "500m", 500],
[/\b400\s*m\b/i, "400m", 400],
[/\b300\s*m\b/i, "300m", 300],
[/\b150\s*m\b/i, "150m", 150],
[/\b100\s*m\b/i, "100m", 100],
[/\bLR4\b/, "10km", 10000],
[/\bLR\b/, "10km", 10000],
[/\bER4?\b/, "40km", 40000],
[/\bZR4?\b/, "80km", 80000],
[/\bSR4?\b/, "300m", 300],
[/\bDR4?\b/, "500m", 500],
[/\bFR4?\b/, "2km", 2000],
];
for (const [regex, label, meters] of patterns) {
if (regex.test(text)) return { label, meters };
}
return undefined;
}
function detectFiber(text: string): string {
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
return "";
}
function detectWavelength(text: string): string {
const match = text.match(/(\d{3,4})\s*nm/i);
return match ? match[1] : "";
}
function extractCompatibleVendor(name: string): string {
const brands = ["Cisco", "Juniper", "Arista", "HPE", "HP", "Aruba", "Dell", "Brocade", "Extreme",
"Huawei", "Nokia", "MikroTik", "Mellanox", "Nvidia", "Ubiquiti", "Force10",
"Foundry", "Enterasys", "Allied Telesis", "Netgear", "Calix"];
for (const brand of brands) {
if (new RegExp(`\\b${brand}\\b`, "i").test(name)) return brand;
}
// AddOn naming convention: "FOR-XX" suffix
const forMatch = name.match(/-AO$|-IN$/i);
if (forMatch) {
// Check preceding OEM part number pattern, e.g. SFP-10G-SR-AO → Cisco
if (/^SFP-|^GLC-|^QSFP-|^SFP28-/i.test(name)) return "Cisco";
if (/^EX-|^QFX-/i.test(name)) return "Juniper";
if (/^740-|^J\d{4}/i.test(name)) return "Juniper";
}
return "";
}
/**
* Parse AddOn Networks product listing HTML.
* Supports multiple CMS patterns (Magento, BigCommerce, custom).
*/
function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] {
const products: Product[] = [];
const seen = new Set<string>();
const collapsed = html.replace(/\s+/g, " ");
// Strategy 1: Magento / standard product grid
for (const m of collapsed.matchAll(/<li[^>]+class="[^"]*product[^"]*"[^>]*>([\s\S]*?)<\/li>/gi)) {
const card = m[1];
const urlMatch = card.match(/href="(https?:\/\/(?:www\.)?addnetworks\.com\/[^"?#]+)"/i);
if (!urlMatch) continue;
const url = urlMatch[1];
if (seen.has(url) || !/\/product(?:s)?\/|\/item\//i.test(url)) continue;
seen.add(url);
const nameMatch = card.match(/<h[2-4][^>]*>([^<]{10,})<\/h[2-4]>/i) ||
card.match(/product[_-]?(?:name|title)[^>]*>([^<]{10,})</i) ||
card.match(/class="name[^"]*"[^>]*>([^<]{10,})</i);
if (!nameMatch) continue;
const name = nameMatch[1].trim().replace(/&amp;/g, "&").replace(/&#[0-9]+;/g, "");
if (name.length < 5) continue;
const priceMatch = card.match(/\$\s*([\d,]+\.?\d*)/);
const price = priceMatch ? parseFloat(priceMatch[1].replace(/,/g, "")) : undefined;
const reach = detectReach(name);
// AddOn part numbers end in "-AO" or "-IN" suffix
const partNumber = name.match(/([A-Z0-9](?:[A-Z0-9\-\.\/]{4,}(?:-AO|-IN|-ADD)?))/)?.[1] ||
name.split(/\s+/)[0]?.slice(0, 80) || name.slice(0, 60);
products.push({
partNumber, name, url,
price: price && price > 0 && price < 100000 ? price : undefined,
formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
compatibleWith: extractCompatibleVendor(name),
});
}
// Strategy 2: Generic product link fallback using matchAll
if (products.length === 0) {
for (const m of collapsed.matchAll(/href="(https?:\/\/(?:www\.)?addnetworks\.com\/[^"?#]+)"[^>]*>\s*<[^>]+>\s*([^<]{10,})/gi)) {
const url = m[1];
const name = m[2].trim().replace(/&amp;/g, "&");
if (seen.has(url) || name.length < 10) continue;
if (!/transceiver|sfp|qsfp|osfp|dac|aoc|fiber|optical/i.test(name)) continue;
seen.add(url);
const idx = collapsed.indexOf(url);
const ctx = collapsed.slice(Math.max(0, idx - 300), idx + 600);
const priceM = ctx.match(/\$\s*([\d,]+\.?\d*)/);
const price = priceM ? parseFloat(priceM[1].replace(/,/g, "")) : undefined;
const reach = detectReach(name);
products.push({
partNumber: name.match(/([A-Z0-9][A-Z0-9\-\.\/]{4,})/)?.[1] || name.split(/\s+/)[0]?.slice(0, 80) || "",
name, url,
price: price && price > 0 && price < 100000 ? price : undefined,
formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
compatibleWith: extractCompatibleVendor(name),
});
}
}
return products;
}
async function fetchPage(url: string): Promise<string> {
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.text();
}
export async function scrapeAddonNetworks(): Promise<void> {
console.log("=== AddOn Networks Scraper Starting ===\n");
const vendorId = await ensureVendor(
"AddOn Networks",
"compatible",
"https://www.addnetworks.com",
"https://www.addnetworks.com/products/networking/optical-networking/",
);
let totalProducts = 0;
let priceUpdates = 0;
const seenCategories = new Set<string>();
for (const cat of CATEGORIES) {
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`);
try {
const html1 = await fetchPage(BASE + cat.path);
const catProducts = parseProductList(html1, cat);
if (cat.path === "/products/networking/optical-networking/" && seenCategories.size > 3) {
console.log(` Skipping generic fallback (${seenCategories.size} specific categories scraped)`);
continue;
}
if (catProducts.length === 0) {
console.log(" No products on page 1 — skipping");
continue;
}
seenCategories.add(cat.path);
console.log(` Found ${catProducts.length} products on page 1`);
// Detect pagination
const totalPagesMatch =
html1.match(/page\s+\d+\s+of\s+(\d+)/i) ||
html1.match(/aria-label="Last[^"]*"\s+href="[^"]*[?&]p=(\d+)/) ||
html1.match(/pagination[^>]*>[\s\S]*?(\d+)<\/a>\s*<\/[^>]+>\s*<\/[^>]+>/);
const totalPages = totalPagesMatch ? Math.min(parseInt(totalPagesMatch[1]), MAX_PAGES) : 2;
console.log(` Total pages (estimate): ${totalPages}`);
const allProducts = [...catProducts];
for (let page = 2; page <= totalPages; page++) {
await sleep(2000);
try {
const pageUrl = BASE + cat.path + `?p=${page}`;
const html = await fetchPage(pageUrl);
const pageProds = parseProductList(html, cat);
if (pageProds.length === 0) break;
allProducts.push(...pageProds);
console.log(` Page ${page}: ${pageProds.length} products`);
} catch (err) {
console.warn(` Page ${page} failed: ${(err as Error).message.slice(0, 60)}`);
break;
}
}
const uniqueProducts = allProducts.filter((p, i, arr) => arr.findIndex((x) => x.url === p.url) === i);
console.log(` Total unique: ${uniqueProducts.length}`);
for (const product of uniqueProducts) {
try {
const txId = await findOrCreateScrapedTransceiver({
partNumber: product.partNumber,
vendorId,
formFactor: product.formFactor,
speedGbps: product.speedGbps,
speed: product.speed,
reachMeters: product.reachMeters,
reachLabel: product.reachLabel,
fiberType: product.fiberType,
wavelengths: product.wavelength,
category: "DataCenter",
});
if (product.price && product.price > 0) {
const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({
transceiverId: txId,
sourceVendorId: vendorId,
price: product.price,
currency: "USD",
stockLevel: "in_stock",
url: product.url,
contentHash: hash,
});
if (updated) priceUpdates++;
}
totalProducts++;
} catch (err) {
console.warn(` DB error: ${(err as Error).message.slice(0, 80)}`);
}
}
} catch (err) {
console.error(` Category failed: ${(err as Error).message}`);
}
await sleep(2000);
}
console.log(`\n=== AddOn Networks Complete: ${totalProducts} products, ${priceUpdates} price updates ===`);
}
if (require.main === module) {
scrapeAddonNetworks()
.then(() => pool.end())
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
}

View File

@ -0,0 +1,285 @@
/**
* NADDOD Scraper Chinese compatible transceiver vendor
*
* naddod.com WooCommerce store, server-rendered HTML, USD pricing.
* Products listed under product category pages.
* Pagination via /page/N/. Rate limited: 1 req/2sec.
*
* NADDOD (Shenzhen NADDOD Information Co.) makes and sells compatible
* optics for Cisco, Juniper, Arista, etc. Transparent USD pricing.
*/
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
import { contentHash } from "../utils/hash";
const BASE = "https://www.naddod.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
};
const MAX_PAGES = 30;
const CATEGORIES = [
{ path: "/product-category/1g-sfp-transceivers/", formFactor: "SFP", speed: "1G", speedGbps: 1 },
{ path: "/product-category/10g-sfp-transceivers/", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
{ path: "/product-category/25g-sfp28-transceivers/", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
{ path: "/product-category/40g-qsfp-transceivers/", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
{ path: "/product-category/100g-qsfp28-transceivers/", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
{ path: "/product-category/200g-qsfp56-transceivers/", formFactor: "QSFP56", speed: "200G", speedGbps: 200 },
{ path: "/product-category/400g-qsfp-dd-transceivers/", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
{ path: "/product-category/800g-osfp-transceivers/", formFactor: "OSFP", speed: "800G", speedGbps: 800 },
{ path: "/product-category/transceivers/", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
];
interface Product {
partNumber: string;
name: string;
url: string;
price?: number;
formFactor: string;
speed: string;
speedGbps: number;
reachLabel?: string;
reachMeters?: number;
fiberType?: string;
wavelength?: string;
compatibleWith?: string;
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function detectReach(text: string): { label: string; meters: number } | undefined {
const patterns: [RegExp, string, number][] = [
[/\b120\s*km\b/i, "120km", 120000],
[/\b80\s*km\b/i, "80km", 80000],
[/\b40\s*km\b/i, "40km", 40000],
[/\b20\s*km\b/i, "20km", 20000],
[/\b10\s*km\b/i, "10km", 10000],
[/\b2\s*km\b/i, "2km", 2000],
[/\b550\s*m\b/i, "550m", 550],
[/\b500\s*m\b/i, "500m", 500],
[/\b400\s*m\b/i, "400m", 400],
[/\b300\s*m\b/i, "300m", 300],
[/\b150\s*m\b/i, "150m", 150],
[/\b100\s*m\b/i, "100m", 100],
[/\bLR4\b/, "10km", 10000],
[/\bLR\b/, "10km", 10000],
[/\bER4?\b/, "40km", 40000],
[/\bZR4?\b/, "80km", 80000],
[/\bSR4?\b/, "300m", 300],
[/\bDR4?\b/, "500m", 500],
[/\bFR4?\b/, "2km", 2000],
];
for (const [regex, label, meters] of patterns) {
if (regex.test(text)) return { label, meters };
}
return undefined;
}
function detectFiber(text: string): string {
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
return "";
}
function detectWavelength(text: string): string {
const match = text.match(/(\d{3,4})\s*nm/i);
return match ? match[1] : "";
}
function extractCompatibleVendor(name: string): string {
const brands = ["Cisco", "Juniper", "Arista", "HPE", "Dell", "Brocade", "Extreme", "Huawei",
"Nokia", "MikroTik", "Mellanox", "Nvidia", "Ubiquiti"];
for (const brand of brands) {
if (new RegExp(`\\b${brand}\\b`, "i").test(name)) return brand;
}
const match = name.match(/(?:for\s+|compatible\s+(?:with\s+)?)([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)/);
return match ? match[1] : "";
}
function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] {
const products: Product[] = [];
const seen = new Set<string>();
const collapsed = html.replace(/\s+/g, " ");
// Strategy 1: WooCommerce standard product loop
const cardRegex = /<li[^>]+class="[^"]*product[^"]*"[^>]*>([\s\S]*?)<\/li>/gi;
let cardMatch;
while ((cardMatch = cardRegex.exec(collapsed)) !== null) {
const card = cardMatch[1];
const urlMatch = card.match(/href="(https?:\/\/(?:www\.)?naddod\.com\/product\/[^"]+)"/i);
if (!urlMatch) continue;
const url = urlMatch[1];
if (seen.has(url)) continue;
seen.add(url);
const nameMatch = card.match(/woocommerce-loop-product__title[^>]*>([^<]+)</i) ||
card.match(/<h2[^>]*>([^<]{10,})<\/h2>/i) ||
card.match(/<h3[^>]*>([^<]{10,})<\/h3>/i);
if (!nameMatch) continue;
const name = nameMatch[1].trim().replace(/&amp;/g, "&").replace(/&#8211;/g, "");
if (name.length < 5) continue;
const priceMatch = card.match(/\$\s*([\d,]+\.?\d*)/);
const price = priceMatch ? parseFloat(priceMatch[1].replace(/,/g, "")) : undefined;
const reach = detectReach(name);
const partNumber = name.split(/\s+(?:compatible|for|sfp|qsfp)/i)[0]?.trim().slice(0, 80) || name.slice(0, 60);
products.push({
partNumber, name, url,
price: price && price > 0 && price < 100000 ? price : undefined,
formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
compatibleWith: extractCompatibleVendor(name),
});
}
// Strategy 2: Generic product link fallback
if (products.length === 0) {
const linkRegex = /href="(https?:\/\/(?:www\.)?naddod\.com\/(?:product|shop)\/[^"?#]+)"[^>]*>\s*([^<]{10,})/gi;
let m;
while ((m = linkRegex.exec(collapsed)) !== null) {
const url = m[1];
const name = m[2].trim().replace(/&amp;/g, "&");
if (seen.has(url) || name.length < 10) continue;
if (!/transceiver|sfp|qsfp|osfp|dac|aoc|xfp/i.test(name)) continue;
seen.add(url);
const ctx = collapsed.slice(Math.max(0, m.index - 200), m.index + 500);
const priceM = ctx.match(/\$\s*([\d,]+\.?\d*)/);
const price = priceM ? parseFloat(priceM[1].replace(/,/g, "")) : undefined;
const reach = detectReach(name);
products.push({
partNumber: name.split(/\s+/)[0]?.slice(0, 80) || "",
name, url,
price: price && price > 0 && price < 100000 ? price : undefined,
formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
compatibleWith: extractCompatibleVendor(name),
});
}
}
return products;
}
async function fetchPage(url: string): Promise<string> {
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.text();
}
export async function scrapeNaddod(): Promise<void> {
console.log("=== NADDOD Scraper Starting ===\n");
const vendorId = await ensureVendor(
"NADDOD",
"compatible",
"https://www.naddod.com",
"https://www.naddod.com/product-category/transceivers/",
);
let totalProducts = 0;
let priceUpdates = 0;
const seenCategories = new Set<string>();
for (const cat of CATEGORIES) {
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`);
try {
const html1 = await fetchPage(BASE + cat.path);
const catProducts = parseProductList(html1, cat);
if (cat.path.includes("/transceivers/") && seenCategories.size > 3) {
console.log(` Skipping generic fallback (${seenCategories.size} specific categories scraped)`);
continue;
}
if (catProducts.length === 0) {
console.log(" No products on page 1 — skipping");
continue;
}
seenCategories.add(cat.path);
console.log(` Found ${catProducts.length} products on page 1`);
const totalPagesMatch = html1.match(/page-numbers[^>]*>(\d+)<\/a>(?!.*page-numbers)/);
const totalPages = totalPagesMatch ? Math.min(parseInt(totalPagesMatch[1]), MAX_PAGES) : 1;
console.log(` Total pages: ${totalPages}`);
const allProducts = [...catProducts];
for (let page = 2; page <= totalPages; page++) {
await sleep(2000);
try {
const html = await fetchPage(BASE + cat.path + `page/${page}/`);
const pageProds = parseProductList(html, cat);
if (pageProds.length === 0) break;
allProducts.push(...pageProds);
console.log(` Page ${page}: ${pageProds.length} products`);
} catch (err) {
console.warn(` Page ${page} failed: ${(err as Error).message.slice(0, 60)}`);
break;
}
}
const uniqueProducts = allProducts.filter((p, i, arr) => arr.findIndex((x) => x.url === p.url) === i);
console.log(` Total unique: ${uniqueProducts.length}`);
for (const product of uniqueProducts) {
try {
const txId = await findOrCreateScrapedTransceiver({
partNumber: product.partNumber,
vendorId,
formFactor: product.formFactor,
speedGbps: product.speedGbps,
speed: product.speed,
reachMeters: product.reachMeters,
reachLabel: product.reachLabel,
fiberType: product.fiberType,
wavelengths: product.wavelength,
category: "DataCenter",
});
if (product.price && product.price > 0) {
const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({
transceiverId: txId,
sourceVendorId: vendorId,
price: product.price,
currency: "USD",
stockLevel: "in_stock",
url: product.url,
contentHash: hash,
});
if (updated) priceUpdates++;
}
totalProducts++;
} catch (err) {
console.warn(` DB error: ${(err as Error).message.slice(0, 80)}`);
}
}
} catch (err) {
console.error(` Category failed: ${(err as Error).message}`);
}
await sleep(2000);
}
console.log(`\n=== NADDOD Complete: ${totalProducts} products, ${priceUpdates} price updates ===`);
}
if (require.main === module) {
scrapeNaddod()
.then(() => pool.end())
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
}

View File

@ -0,0 +1,281 @@
/**
* QSFPTEK Scraper Chinese compatible transceiver vendor
*
* qsfptek.com Server-rendered HTML shop, USD pricing.
* Focuses on QSFP+/QSFP28/QSFP-DD/SFP+ form factors.
* Rate limited: 1 req/2sec.
*
* QSFPTEK (Shenzhen Optotech Technology) competitive pricing,
* transparent USD prices, no account required.
*/
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
import { contentHash } from "../utils/hash";
const BASE = "https://www.qsfptek.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
};
const MAX_PAGES = 30;
const CATEGORIES = [
{ path: "/c/sfp-transceiver.html", formFactor: "SFP", speed: "1G", speedGbps: 1 },
{ path: "/c/sfp-plus-transceiver.html", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
{ path: "/c/sfp28-transceiver.html", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
{ path: "/c/qsfp-plus-transceiver.html", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
{ path: "/c/qsfp28-transceiver.html", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
{ path: "/c/qsfp56-transceiver.html", formFactor: "QSFP56", speed: "200G", speedGbps: 200 },
{ path: "/c/qsfp-dd-transceiver.html", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
{ path: "/c/osfp-transceiver.html", formFactor: "OSFP", speed: "800G", speedGbps: 800 },
{ path: "/c/optical-transceiver.html", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
];
interface Product {
partNumber: string;
name: string;
url: string;
price?: number;
formFactor: string;
speed: string;
speedGbps: number;
reachLabel?: string;
reachMeters?: number;
fiberType?: string;
wavelength?: string;
compatibleWith?: string;
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function detectReach(text: string): { label: string; meters: number } | undefined {
const patterns: [RegExp, string, number][] = [
[/\b120\s*km\b/i, "120km", 120000],
[/\b80\s*km\b/i, "80km", 80000],
[/\b40\s*km\b/i, "40km", 40000],
[/\b20\s*km\b/i, "20km", 20000],
[/\b10\s*km\b/i, "10km", 10000],
[/\b2\s*km\b/i, "2km", 2000],
[/\b550\s*m\b/i, "550m", 550],
[/\b500\s*m\b/i, "500m", 500],
[/\b300\s*m\b/i, "300m", 300],
[/\b100\s*m\b/i, "100m", 100],
[/\bLR4\b/, "10km", 10000],
[/\bLR\b/, "10km", 10000],
[/\bER4?\b/, "40km", 40000],
[/\bZR4?\b/, "80km", 80000],
[/\bSR4?\b/, "300m", 300],
[/\bDR4?\b/, "500m", 500],
[/\bFR4?\b/, "2km", 2000],
];
for (const [regex, label, meters] of patterns) {
if (regex.test(text)) return { label, meters };
}
return undefined;
}
function detectFiber(text: string): string {
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
return "";
}
function detectWavelength(text: string): string {
const match = text.match(/(\d{3,4})\s*nm/i);
return match ? match[1] : "";
}
function extractCompatibleVendor(name: string): string {
const brands = ["Cisco", "Juniper", "Arista", "HPE", "Aruba", "Dell", "Brocade", "Extreme",
"Huawei", "Nokia", "MikroTik", "Mellanox", "Nvidia", "Ubiquiti", "Allied Telesis"];
for (const brand of brands) {
if (new RegExp(`\\b${brand}\\b`, "i").test(name)) return brand;
}
return "";
}
function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] {
const products: Product[] = [];
const seen = new Set<string>();
const collapsed = html.replace(/\s+/g, " ");
// Strategy 1: OpenCart / custom card layout using matchAll
for (const cardMatch of collapsed.matchAll(/<div[^>]+class="[^"]*product-(?:thumb|layout)[^"]*"[^>]*>([\s\S]*?)<\/div>\s*<\/div>/gi)) {
const card = cardMatch[1];
const urlMatch = card.match(/href="(https?:\/\/(?:www\.)?qsfptek\.com\/[^"]+)"/i);
if (!urlMatch) continue;
const url = urlMatch[1];
if (seen.has(url)) continue;
seen.add(url);
const nameMatch = card.match(/<h[34][^>]*>\s*<a[^>]*>([^<]{10,})<\/a>/i) ||
card.match(/<a[^>]*title="([^"]{10,})"/i);
if (!nameMatch) continue;
const name = nameMatch[1].trim().replace(/&amp;/g, "&").replace(/&#[0-9]+;/g, "");
if (name.length < 5) continue;
const priceMatch = card.match(/\$\s*([\d,]+\.?\d*)/);
const price = priceMatch ? parseFloat(priceMatch[1].replace(/,/g, "")) : undefined;
const reach = detectReach(name);
const partNumber = name.split(/\s+(?:compatible|for|sfp|qsfp)/i)[0]?.trim().slice(0, 80) || name.slice(0, 60);
products.push({
partNumber, name, url,
price: price && price > 0 && price < 100000 ? price : undefined,
formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
compatibleWith: extractCompatibleVendor(name),
});
}
// Strategy 2: Generic product link scan using matchAll
if (products.length === 0) {
for (const m of collapsed.matchAll(/href="(https?:\/\/(?:www\.)?qsfptek\.com\/(?:p|product)[^"?#]+)"[^>]*>([^<]{10,})</gi)) {
const url = m[1];
const name = m[2].trim().replace(/&amp;/g, "&");
if (seen.has(url) || name.length < 10) continue;
if (!/transceiver|sfp|qsfp|osfp|dac|aoc/i.test(name)) continue;
seen.add(url);
const idx = collapsed.indexOf(url);
const ctx = collapsed.slice(Math.max(0, idx - 300), idx + 600);
const priceM = ctx.match(/\$\s*([\d,]+\.?\d*)/);
const price = priceM ? parseFloat(priceM[1].replace(/,/g, "")) : undefined;
const reach = detectReach(name);
products.push({
partNumber: name.split(/\s+/)[0]?.slice(0, 80) || "",
name, url,
price: price && price > 0 && price < 100000 ? price : undefined,
formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
compatibleWith: extractCompatibleVendor(name),
});
}
}
return products;
}
async function fetchPage(url: string): Promise<string> {
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.text();
}
export async function scrapeQsfptek(): Promise<void> {
console.log("=== QSFPTEK Scraper Starting ===\n");
const vendorId = await ensureVendor(
"QSFPTEK",
"compatible",
"https://www.qsfptek.com",
"https://www.qsfptek.com/c/optical-transceiver.html",
);
let totalProducts = 0;
let priceUpdates = 0;
const seenCategories = new Set<string>();
for (const cat of CATEGORIES) {
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`);
try {
const html1 = await fetchPage(BASE + cat.path);
const catProducts = parseProductList(html1, cat);
if (cat.path.includes("/optical-transceiver") && seenCategories.size > 3) {
console.log(` Skipping generic fallback (${seenCategories.size} specific categories scraped)`);
continue;
}
if (catProducts.length === 0) {
console.log(" No products on page 1 — skipping");
continue;
}
seenCategories.add(cat.path);
console.log(` Found ${catProducts.length} products on page 1`);
const totalPagesMatch =
html1.match(/total-page[^>]*>\s*(\d+)/) ||
html1.match(/page\s+\d+\s+of\s+(\d+)/i);
const totalPages = totalPagesMatch ? Math.min(parseInt(totalPagesMatch[1]), MAX_PAGES) : 3;
console.log(` Total pages (estimate): ${totalPages}`);
const allProducts = [...catProducts];
for (let page = 2; page <= totalPages; page++) {
await sleep(2000);
try {
const pageUrl = BASE + cat.path.replace(".html", "") + `?page=${page}`;
const html = await fetchPage(pageUrl);
const pageProds = parseProductList(html, cat);
if (pageProds.length === 0) break;
allProducts.push(...pageProds);
console.log(` Page ${page}: ${pageProds.length} products`);
} catch (err) {
console.warn(` Page ${page} failed: ${(err as Error).message.slice(0, 60)}`);
break;
}
}
const uniqueProducts = allProducts.filter((p, i, arr) => arr.findIndex((x) => x.url === p.url) === i);
console.log(` Total unique: ${uniqueProducts.length}`);
for (const product of uniqueProducts) {
try {
const txId = await findOrCreateScrapedTransceiver({
partNumber: product.partNumber,
vendorId,
formFactor: product.formFactor,
speedGbps: product.speedGbps,
speed: product.speed,
reachMeters: product.reachMeters,
reachLabel: product.reachLabel,
fiberType: product.fiberType,
wavelengths: product.wavelength,
category: "DataCenter",
});
if (product.price && product.price > 0) {
const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({
transceiverId: txId,
sourceVendorId: vendorId,
price: product.price,
currency: "USD",
stockLevel: "in_stock",
url: product.url,
contentHash: hash,
});
if (updated) priceUpdates++;
}
totalProducts++;
} catch (err) {
console.warn(` DB error: ${(err as Error).message.slice(0, 80)}`);
}
}
} catch (err) {
console.error(` Category failed: ${(err as Error).message}`);
}
await sleep(2000);
}
console.log(`\n=== QSFPTEK Complete: ${totalProducts} products, ${priceUpdates} price updates ===`);
}
if (require.main === module) {
scrapeQsfptek()
.then(() => pool.end())
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
}