/** * 10Gtek.com Scraper — Chinese OEM Transceiver Vendor * * 10gtek.com is a direct competitor to FS.com at lower price points. * Uses plain fetch (server-rendered HTML). * Rate limited: 1 req/2sec. * * Categories: SFP, SFP+, SFP28, QSFP+, QSFP28, QSFP-DD, OSFP */ import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; import { contentHash, parsePrice } from "../utils/hash"; const BASE = "https://www.10gtek.com"; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml", }; const CATEGORIES = [ { path: "/sfp", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { path: "/10g-sfp+", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { path: "/sfp28", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, { path: "/qsfp", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, { path: "/qsfp28", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, { path: "/qsfpdd", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, { path: "/xfp", formFactor: "XFP", speed: "10G", speedGbps: 10 }, ]; interface Product { partNumber: string; name: string; url: string; price?: number; currency?: string; formFactor: string; speed: string; speedGbps: number; reachLabel?: string; reachMeters?: number; fiberType?: string; } function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } function detectReach(text: string): { label: string; meters: number } | undefined { const patterns: [RegExp, string, number][] = [ [/\b80\s*km\b/i, "80km", 80000], [/\b40\s*km\b/i, "40km", 40000], [/\b20\s*km\b/i, "20km", 20000], [/\b10\s*km\b/i, "10km", 10000], [/\b2\s*km\b/i, "2km", 2000], [/\b500\s*m\b/i, "500m", 500], [/\b300\s*m\b/i, "300m", 300], [/\b100\s*m\b/i, "100m", 100], [/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000], [/\bER\b/, "40km", 40000], [/\bZR\b/, "80km", 80000], [/\bSR4?\b/, "100m", 100], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000], ]; for (const [regex, label, meters] of patterns) { if (regex.test(text)) return { label, meters }; } return undefined; } function detectFiber(text: string): string { if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; return ""; } /** Strip HTML tags and decode common entities */ function stripHtml(s: string): string { return s.replace(/<[^>]+>/g, "").replace(/&/g, "&").replace(/</g, "<") .replace(/>/g, ">").replace(/ /g, " ").replace(/°/g, "°") .replace(/&#\d+;/g, "").trim(); } function parseDistance(text: string): { label: string; meters: number } | undefined { const km = text.match(/(\d+)\s*km/i); if (km) return { label: `${km[1]}km`, meters: parseInt(km[1]) * 1000 }; const m = text.match(/(\d+)\s*m\b/i); if (m) return { label: `${m[1]}m`, meters: parseInt(m[1]) }; return undefined; } function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] { const products: Product[] = []; // 10Gtek uses HTML tables with columns: // Part No. | Spec | Data Rate | Wavelength | Fiber Type | Distance | Optical Comp. | Tx Power | E.R | Rx Sens. | Temp. // Extract all rows and parse cells const rowRegex = /]*>([\s\S]*?)<\/tr>/gi; let rowMatch; while ((rowMatch = rowRegex.exec(html)) !== null) { const rowHtml = rowMatch[1]; // Extract all cell contents const cellRegex = /]*>([\s\S]*?)<\/td>/gi; const cells: string[] = []; let cellMatch; while ((cellMatch = cellRegex.exec(rowHtml)) !== null) { cells.push(stripHtml(cellMatch[1])); } // Need at least 6 columns, first cell must look like a part number (starts with A or contains letters+digits) if (cells.length < 6) continue; const partNumber = cells[0]; if (!partNumber || partNumber.length < 3) continue; // Skip header rows if (/^Part\s*No/i.test(partNumber) || /^Spec/i.test(partNumber)) continue; // Part numbers typically start with A (ASF, AXS, AXQ, AQS, etc.) or contain alphanumeric if (!/^[A-Z][A-Z0-9]/i.test(partNumber)) continue; const spec = cells[1] || ""; const dataRate = cells[2] || ""; const wavelength = cells.length >= 4 ? cells[3] : ""; const fiberType = cells.length >= 5 ? cells[4] : ""; const distance = cells.length >= 6 ? cells[5] : ""; const txPower = cells.length >= 8 ? cells[7] : ""; // Build descriptive name const name = `${partNumber} ${spec} ${dataRate}`.trim(); const reach = parseDistance(distance) || detectReach(spec + " " + distance); // Determine fiber type from table cell or spec let fiber = ""; if (/SMF|single/i.test(fiberType)) fiber = "SMF"; else if (/MMF|multi/i.test(fiberType)) fiber = "MMF"; else if (/CAT|RJ|copper/i.test(fiberType)) fiber = "Copper"; else fiber = detectFiber(spec); // Extract wavelength const wl = wavelength.replace(/[^0-9]/g, ""); products.push({ partNumber, name, url: `${BASE}${cat.path}#${partNumber}`, formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, reachLabel: reach?.label, reachMeters: reach?.meters, fiberType: fiber, }); } // Dedupe by part number const seen = new Set(); return products.filter((p) => { if (seen.has(p.partNumber)) return false; seen.add(p.partNumber); return true; }); } async function fetchPage(url: string): Promise { const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); return resp.text(); } export async function scrape10Gtek(): Promise { console.log("=== 10Gtek Scraper Starting ===\n"); const vendorId = await ensureVendor("10Gtek", "compatible", "https://www.10gtek.com", "https://www.10gtek.com"); let totalProducts = 0; let priceUpdates = 0; for (const cat of CATEGORIES) { console.log(`\n--- ${cat.formFactor} (${cat.speed}) ---`); try { const html = await fetchPage(BASE + cat.path); const catProducts = parseProductList(html, cat); console.log(` Found ${catProducts.length} products`); for (const product of catProducts) { try { const txId = await findOrCreateScrapedTransceiver({ partNumber: product.partNumber, vendorId, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, reachMeters: product.reachMeters, reachLabel: product.reachLabel, fiberType: product.fiberType, category: "DataCenter", }); if (product.price && product.price > 0) { const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const updated = await upsertPriceObservation({ transceiverId: txId, sourceVendorId: vendorId, price: product.price, currency: product.currency || "USD", stockLevel: "in_stock", url: product.url, contentHash: hash, }); if (updated) priceUpdates++; } totalProducts++; } catch (err) { console.warn(` Error: ${(err as Error).message.slice(0, 80)}`); } } } catch (err) { console.error(` Category failed: ${(err as Error).message}`); } await sleep(2000); } console.log(`\n=== 10Gtek Complete: ${totalProducts} products, ${priceUpdates} prices ===`); } if (require.main === module) { scrape10Gtek() .then(() => pool.end()) .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }