diff --git a/packages/scraper/src/scrapers/switch-image-playwright.ts b/packages/scraper/src/scrapers/switch-image-playwright.ts index b34b43b..bd3bfcd 100644 --- a/packages/scraper/src/scrapers/switch-image-playwright.ts +++ b/packages/scraper/src/scrapers/switch-image-playwright.ts @@ -3,7 +3,9 @@ * * Vendors that reject plain HTTP bots (403/406) or require JS rendering: * Arista (HTTP 406), Dell (HTTP 403), Edgecore (HTTP 403), - * Fortinet (JS-rendered), HPE/Aruba (HTTP 403), Extreme Networks (no static URLs) + * Fortinet (JS-rendered), HPE/Aruba (HTTP 403), Extreme Networks (no static URLs), + * Nokia, Huawei, NVIDIA, Netgear, Ciena, Moxa, D-Link, Alcatel-Lucent Enterprise, + * Asterfusion, Brocade, UfiSpace, QCT * * Strategy: * 1. Query switches without image_url for JS-blocked vendors @@ -65,6 +67,14 @@ const GENERIC_IMAGE_PATTERNS: RegExp[] = [ /cookiebot\.com/i, /trustarc\.com/i, /consent-manager/i, + // Nokia CMS marketing banners (not product photos) + /nok\d+-nokia-com-banner/i, + // Huawei category/why-buy marketing images + /whyhuawei-/i, + /campus-switches/i, + /bg_products/i, + // Generic "banners" path segment used by CMSes + /\/banners?\//i, ]; function isGenericImage(url: string): boolean { @@ -138,13 +148,73 @@ function buildExtremeUrl(model: string): string | null { return slug ? `https://www.extremenetworks.com/product/${slug}` : null; } +// ── New vendors (JS-rendered; rely on stored product_page_url or built URL) ──── + +// Nokia, Huawei, Ciena, Moxa, D-Link, ALE, Asterfusion, Brocade: +// all models have product_page_url in DB → return null so the stored URL is used. +const buildPassthroughUrl = (_model: string): string | null => null; + +function buildNvidiaUrl(model: string): string | null { + // NVIDIA Spectrum switches: SN5600, SN4700, SN3700, SN3750-SX, SN2201, etc. + // ConnectX-7 is an HCA, no relevant product page → skip. + const snMatch = model.match(/^(SN[\d]+)/i); + if (snMatch) { + return `https://www.nvidia.com/en-us/networking/ethernet-switching/${snMatch[1].toLowerCase()}/`; + } + return null; +} + +function buildNetgearUrl(model: string): string | null { + // M4300-96X, M4350-48G4XF, M4500-32C → /business/wired/switches/fully-managed// + const slug = model.toLowerCase() + .replace(/[^a-z0-9]/g, "-") + .replace(/-+/g, "-") + .replace(/^-|-$/g, ""); + return slug ? `https://www.netgear.com/business/wired/switches/fully-managed/${slug}/` : null; +} + +// UfiSpace: slug map derived from sitemap (non-predictable product URL tree) +const UFISPACE_URL_MAP: Record = { + "S9510-28DC": "https://www.ufispace.com/products/telco/access/s9510-28dc-flexe-tsn-disaggregated-cell-site-gateway", + "S9600-30DX": "https://www.ufispace.com/products/telco/aggregation/s9600-30dx-open-zr-aggregation-router", + "S9600-32X": "https://www.ufispace.com/products/telco/aggregation/s9600-32x-25g-100g-aggregation-router", + "S9600-72XC": "https://www.ufispace.com/products/telco/aggregation/s9600-72xc-25g-100g-open-aggregation-router-tcam", + "S9700-53DX": "https://www.ufispace.com/products/telco/core-edge/s9700-53dx-100g-core-router", + "S9710-76D": "https://www.ufispace.com/products/telco/core-edge/s9710-76d-high-density-400g-disaggregated-core-router", +}; +function buildUfiSpaceUrl(model: string): string | null { + return UFISPACE_URL_MAP[model] ?? null; +} + +// QCT: URL map derived from sitemap (category path not predictable from model name) +const QCT_URL_MAP: Record = { + "QuantaMesh T3048-LY8": "https://www.qct.io/product/index/Networking/Ethernet-Switch/T3000-Series/QuantaMesh-T3048-LY8", + "QuantaMesh T7032-IX1": "https://www.qct.io/product/index/Networking/Bare-Metal-Switch/Spine-Switch/QuantaMesh-BMS-T7032-IX1", +}; +function buildQctUrl(model: string): string | null { + return QCT_URL_MAP[model] ?? null; +} + const URL_BUILDERS: Record string | null> = { - arista: buildAristaUrl, - dell: buildDellUrl, - edgecore: buildEdgecoreUrl, - fortinet: buildFortinetUrl, - "hpe-aruba": buildHpeArubaUrl, - extreme: buildExtremeUrl, + arista: buildAristaUrl, + dell: buildDellUrl, + edgecore: buildEdgecoreUrl, + fortinet: buildFortinetUrl, + "hpe-aruba": buildHpeArubaUrl, + extreme: buildExtremeUrl, + // New JS-rendered vendors (stored product_page_url used where available) + nokia: buildPassthroughUrl, + huawei: buildPassthroughUrl, + ciena: buildPassthroughUrl, + moxa: buildPassthroughUrl, + "d-link": buildPassthroughUrl, + "alcatel-lucent-enterprise": buildPassthroughUrl, + asterfusion: buildPassthroughUrl, + brocade: buildPassthroughUrl, + "nvidia-networking": buildNvidiaUrl, + netgear: buildNetgearUrl, + ufispace: buildUfiSpaceUrl, + "quanta-cloud-technology": buildQctUrl, }; // ── Request data attached to each crawl URL ──────────────────────────────────