diff --git a/packages/scraper/src/scrapers/tenGtek.ts b/packages/scraper/src/scrapers/tenGtek.ts index b875c7b..6f4a3e1 100644 --- a/packages/scraper/src/scrapers/tenGtek.ts +++ b/packages/scraper/src/scrapers/tenGtek.ts @@ -1,29 +1,31 @@ /** * 10Gtek.com Scraper — Chinese OEM Transceiver Vendor * - * 10gtek.com is a direct competitor to FS.com at lower price points. - * Uses plain fetch (server-rendered HTML). - * Rate limited: 1 req/2sec. + * 10Gtek's main site (www.10gtek.com) only shows technical spec tables, no prices. + * Prices are available on their retail store: sfpcables.com (same company/brand). + * This scraper targets sfpcables.com which has both part numbers and USD prices. * - * Categories: SFP, SFP+, SFP28, QSFP+, QSFP28, QSFP-DD, OSFP + * Strategy: Paginate each category on sfpcables.com, extract Model + price per product. + * Rate limited: 1 req/2sec between pages. + * + * Categories: SFP, SFP+, SFP28, QSFP+, QSFP28, XFP */ import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; import { contentHash, parsePrice } from "../utils/hash"; -const BASE = "https://www.10gtek.com"; +const BASE = "https://www.sfpcables.com"; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml", }; const CATEGORIES = [ - { path: "/sfp", formFactor: "SFP", speed: "1G", speedGbps: 1 }, - { path: "/10g-sfp+", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, - { path: "/sfp28", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, - { path: "/qsfp", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, - { path: "/qsfp28", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, - { path: "/qsfpdd", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, - { path: "/xfp", formFactor: "XFP", speed: "10G", speedGbps: 10 }, + { slug: "sfp-1-25g-series", formFactor: "SFP", speed: "1G", speedGbps: 1 }, + { slug: "sfp-transceivers", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, + { slug: "sfp28-transceivers", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, + { slug: "qsfp-transceivers", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, + { slug: "100g-qsfp28-transceivers", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, + { slug: "xfp-transceivers", formFactor: "XFP", speed: "10G", speedGbps: 10 }, ]; interface Product { @@ -69,94 +71,94 @@ function detectReach(text: string): { label: string; meters: number } | undefine } function detectFiber(text: string): string { - if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; + if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) + return "SMF"; if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; + if (/copper|rj.?45/i.test(text)) return "Copper"; return ""; } /** Strip HTML tags and decode common entities */ function stripHtml(s: string): string { - return s.replace(/<[^>]+>/g, "").replace(/&/g, "&").replace(/</g, "<") - .replace(/>/g, ">").replace(/ /g, " ").replace(/°/g, "°") - .replace(/\d+;/g, "").trim(); + return s + .replace(/<[^>]+>/g, "") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/ /g, " ") + .replace(/\d+;/g, "") + .trim(); } -function parseDistance(text: string): { label: string; meters: number } | undefined { - const km = text.match(/(\d+)\s*km/i); - if (km) return { label: `${km[1]}km`, meters: parseInt(km[1]) * 1000 }; - const m = text.match(/(\d+)\s*m\b/i); - if (m) return { label: `${m[1]}m`, meters: parseInt(m[1]) }; - return undefined; -} - -function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] { +/** + * Parse product listings from a sfpcables.com category page. + * + * HTML structure per product (Magento): + *