From 51c18212b883be40721bccacdfc41617532cd572 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Tue, 21 Apr 2026 07:57:55 +0200 Subject: [PATCH] fix: add image filter patterns and direct URL migrations for 6 vendors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - switch-image-playwright.ts + switch-image-fetcher.ts: add filter patterns for /webimage-404/ (Netgear 404 hero), /Brand/ + /cybersecurity.png/ (Moxa brand marketing images not product photos) - sql/047: Moxa 4/4 models — CDN getattachment paths (hotlink-protected, Referer: moxa.com required; R2 proxy needed for production display) - sql/048: UfiSpace 6/6 models — ufispace.com/image// direct PNGs; Brocade G720+G730 — broadcom.com og:image; ICX 7850-48FS — CommScope/Ruckus vistancenetworks.com ImageServer (rand param is cache-bust only, not auth) - sql/049: NVIDIA SN-series 6/6 — docscontent.nvidia.com (SN2201/3700/4700) and S3 direct (SN5400/5600); SN3750-SX via uvation reseller CDN --- .../src/scrapers/switch-image-fetcher.ts | 5 ++ .../src/scrapers/switch-image-playwright.ts | 4 ++ sql/047-moxa-images-direct.sql | 42 ++++++++++++ sql/048-ufispace-brocade-images-direct.sql | 66 +++++++++++++++++++ sql/049-nvidia-images-direct.sql | 56 ++++++++++++++++ 5 files changed, 173 insertions(+) create mode 100644 sql/047-moxa-images-direct.sql create mode 100644 sql/048-ufispace-brocade-images-direct.sql create mode 100644 sql/049-nvidia-images-direct.sql diff --git a/packages/scraper/src/scrapers/switch-image-fetcher.ts b/packages/scraper/src/scrapers/switch-image-fetcher.ts index d30267d..5a0eeb7 100644 --- a/packages/scraper/src/scrapers/switch-image-fetcher.ts +++ b/packages/scraper/src/scrapers/switch-image-fetcher.ts @@ -287,6 +287,11 @@ const GENERIC_IMAGE_PATTERNS: RegExp[] = [ /\/icon[-_]library\//i, // ── Diagrams and illustrations ─────────────────────────────────────────── /[-_]illustration[._]/i, + // ── Vendor 404 hero images ─────────────────────────────────────────────── + /webimage-404/i, + // ── Moxa brand/marketing images (not product photos) ──────────────────── + /\/Brand\//i, + /cybersecurity\.png/i, // ── Cookie consent / GDPR overlay images ──────────────────────────────── /cdn\.cookielaw\.org/i, /cookiebot\.com/i, diff --git a/packages/scraper/src/scrapers/switch-image-playwright.ts b/packages/scraper/src/scrapers/switch-image-playwright.ts index d5795df..860112f 100644 --- a/packages/scraper/src/scrapers/switch-image-playwright.ts +++ b/packages/scraper/src/scrapers/switch-image-playwright.ts @@ -78,10 +78,14 @@ const GENERIC_IMAGE_PATTERNS: RegExp[] = [ // Vendor error / 404 graphics /404[-_]error/i, /error[-_]graphic/i, + /webimage-404/i, // Navigation icon libraries (D-Link, other CMSes) /\/icon[-_]library\//i, // Diagrams and illustrations (not product photos) /[-_]illustration[._]/i, + // Moxa brand/marketing images (not product photos) + /\/Brand\//i, + /cybersecurity\.png/i, ]; function isGenericImage(url: string): boolean { diff --git a/sql/047-moxa-images-direct.sql b/sql/047-moxa-images-direct.sql new file mode 100644 index 0000000..df4bfcf --- /dev/null +++ b/sql/047-moxa-images-direct.sql @@ -0,0 +1,42 @@ +-- Migration 047 — Moxa product images (direct CDN URL injection) +-- +-- CDN base: cdn-cms-frontdoor-dfc8ebanh6bkb3hs.a02.azurefd.net +-- Path pattern: /en/getattachment/Products/INDUSTRIAL-NETWORK-INFRASTRUCTURE/... +-- +-- ⚠️ Hotlink-protected: CDN requires Referer: https://www.moxa.com/ +-- Images will not display directly from third-party domains. +-- Use Cloudflare Worker proxy or download to R2 for production display. +-- +-- All URLs verified HTTP 200 with correct Referer (2026-04-21). + +-- EDS-518E — Layer-2 Managed Switch (8 + 2-port) +UPDATE switches +SET image_url = 'https://cdn-cms-frontdoor-dfc8ebanh6bkb3hs.a02.azurefd.net/en/getattachment/Products/INDUSTRIAL-NETWORK-INFRASTRUCTURE/Ethernet-Switches/Layer-2-Managed-Switches/EDS-518E-Series/moxa-eds-518e-series-image-1-(1).jpg', + product_page_url = COALESCE(product_page_url, 'https://www.moxa.com/en/products/industrial-network-infrastructure/ethernet-switches/layer-2-managed-switches/eds-518e-series'), + assets_scraped_at = NOW() +WHERE model = 'EDS-518E' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'moxa'); + +-- EDS-G4014 — Layer-2 Managed Switch (14-port Gigabit) +UPDATE switches +SET image_url = 'https://cdn-cms-frontdoor-dfc8ebanh6bkb3hs.a02.azurefd.net/en/getattachment/Products/INDUSTRIAL-NETWORK-INFRASTRUCTURE/Ethernet-Switches/Layer-2-Managed-Switches/EDS-G4014-Series/moxa-eds-g4014-series-image-(1).jpg', + product_page_url = COALESCE(product_page_url, 'https://www.moxa.com/en/products/industrial-network-infrastructure/ethernet-switches/layer-2-managed-switches/eds-g4014-series'), + assets_scraped_at = NOW() +WHERE model = 'EDS-G4014' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'moxa'); + +-- ICS-G7826A — Rackmount Industrial Managed Switch (26-port) +UPDATE switches +SET image_url = 'https://cdn-cms-frontdoor-dfc8ebanh6bkb3hs.a02.azurefd.net/en/getattachment/Products/INDUSTRIAL-NETWORK-INFRASTRUCTURE/Ethernet-Switches/Rackmount-Switches/ICS-G7826A-Series/moxa-ics-g7826a-series-image-(1).jpg', + product_page_url = COALESCE(product_page_url, 'https://www.moxa.com/en/products/industrial-network-infrastructure/ethernet-switches/rackmount-switches/ics-g7826a-series'), + assets_scraped_at = NOW() +WHERE model = 'ICS-G7826A' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'moxa'); + +-- IKS-G6824A — Rackmount Industrial Managed Switch (24-port) +UPDATE switches +SET image_url = 'https://cdn-cms-frontdoor-dfc8ebanh6bkb3hs.a02.azurefd.net/en/getattachment/Products/INDUSTRIAL-NETWORK-INFRASTRUCTURE/Ethernet-Switches/Rackmount-Switches/IKS-G6824A-Series/moxa-iks-g6824a-series-image-(1).jpg', + product_page_url = COALESCE(product_page_url, 'https://www.moxa.com/en/products/industrial-network-infrastructure/ethernet-switches/rackmount-switches/iks-g6824a-series'), + assets_scraped_at = NOW() +WHERE model = 'IKS-G6824A' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'moxa'); diff --git a/sql/048-ufispace-brocade-images-direct.sql b/sql/048-ufispace-brocade-images-direct.sql new file mode 100644 index 0000000..29cd7cb --- /dev/null +++ b/sql/048-ufispace-brocade-images-direct.sql @@ -0,0 +1,66 @@ +-- Migration 048 — UfiSpace and Brocade product images (direct URL injection) +-- +-- UfiSpace: images served from ufispace.com/image// +-- No og:image meta tags — images extracted from product page carousels. +-- All URLs verified HTTP 200 (2026-04-21). +-- +-- Brocade G720/G730: og:image from broadcom.com (acquired Brocade FC networking). +-- ICX 7850-48FS: acquired by CommScope/Ruckus — image URL has rotating session +-- token, not stable; skipped. + +-- ── UfiSpace ───────────────────────────────────────────────────────────────── + +UPDATE switches +SET image_url = 'https://www.ufispace.com/image/5R/9510-28DC-front-2026.png', + assets_scraped_at = NOW() +WHERE model = 'S9510-28DC' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'ufispace'); + +UPDATE switches +SET image_url = 'https://www.ufispace.com/image/2n/3475633079f4f0df148772926dd278c9.png', + assets_scraped_at = NOW() +WHERE model = 'S9600-30DX' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'ufispace'); + +UPDATE switches +SET image_url = 'https://www.ufispace.com/image/24/5dd78db3fb82420f59d164e35131b476.png', + assets_scraped_at = NOW() +WHERE model = 'S9600-32X' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'ufispace'); + +UPDATE switches +SET image_url = 'https://www.ufispace.com/image/2D/9b12bdf9033020045872a3e55132d7b9.png', + assets_scraped_at = NOW() +WHERE model = 'S9600-72XC' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'ufispace'); + +UPDATE switches +SET image_url = 'https://www.ufispace.com/image/x/f0edea5710efc9ce351b742b222f03d1.png', + assets_scraped_at = NOW() +WHERE model = 'S9700-53DX' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'ufispace'); + +UPDATE switches +SET image_url = 'https://www.ufispace.com/image/2V/aa3e530e3555baacedbc6e603c1fc331.png', + assets_scraped_at = NOW() +WHERE model = 'S9710-76D' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'ufispace'); + +-- ── Brocade (via Broadcom) ──────────────────────────────────────────────────── + +UPDATE switches +SET image_url = 'https://www.broadcom.com/media/blt4ac44e0e6c6d8341/bltf8d09763812cf984/604f5eb61078bc20548c0494/g720-right_283_29.jpeg', + product_page_url = COALESCE(product_page_url, 'https://www.broadcom.com/products/fibre-channel-networking/switches/g720-switch'), + assets_scraped_at = NOW() +WHERE model = 'G720' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'brocade'); + +UPDATE switches +SET image_url = 'https://www.broadcom.com/media/blt4ac44e0e6c6d8341/blt1d11847b97f678d0/62030d79d6534f0c057188c3/Brocade_G730_Left.jpeg', + product_page_url = COALESCE(product_page_url, 'https://www.broadcom.com/products/fibre-channel-networking/switches/g730-switch'), + assets_scraped_at = NOW() +WHERE model = 'G730' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'brocade'); + +-- ICX 7850-48FS: now CommScope/Ruckus — image URL uses rotating session token, +-- not stable enough to store. Left as NULL pending a stable image source. diff --git a/sql/049-nvidia-images-direct.sql b/sql/049-nvidia-images-direct.sql new file mode 100644 index 0000000..e823878 --- /dev/null +++ b/sql/049-nvidia-images-direct.sql @@ -0,0 +1,56 @@ +-- Migration 049 — NVIDIA Networking (Spectrum) switch product images +-- +-- Sources: +-- SN2201, SN3700, SN4700: docscontent.nvidia.com (official NVIDIA docs CDN, +-- backed by k3-prod-nvidia-docs.s3.us-west-2.amazonaws.com) +-- SN3750-SX: cdn.uvation.com (reseller CDN — no official NVIDIA front-view photo) +-- SN5400, SN5600: direct S3 from k3-prod-nvidia-docs (SN5000 hardware manual) +-- +-- All URLs verified HTTP 200 image/png (2026-04-21). + +-- SN2201 — Spectrum-1, 1GbE management switch +UPDATE switches +SET image_url = 'https://docscontent.nvidia.com/dims4/default/0ed212d/2147483647/strip/true/crop/1487x152+0+0/resize/1440x147!/quality/90/?url=https%3A%2F%2Fk3-prod-nvidia-docs.s3.us-west-2.amazonaws.com%2Fbrightspot%2Fconfluence%2F0000019a-2ff0-da13-abfe-bffbc48b0000%2Fimages%2Fdownload%2Fattachments%2F4232636769%2Fimage2021-12-7_10-37-52-version-1-modificationdate-1756395299567-api-v2.png', + product_page_url = COALESCE(product_page_url, 'https://marketplace.nvidia.com/en-us/enterprise/networking/sn2201/'), + assets_scraped_at = NOW() +WHERE model = 'SN2201' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'nvidia-networking'); + +-- SN3700 — Spectrum-2, 32x100GbE +UPDATE switches +SET image_url = 'https://docscontent.nvidia.com/dims4/default/3be2526/2147483647/strip/true/crop/1333x142+0+0/resize/1333x142!/quality/90/?url=https%3A%2F%2Fk3-prod-nvidia-docs.s3.us-west-2.amazonaws.com%2Fbrightspot%2Fconfluence%2F0000019a-4e93-d062-adbe-ce933de80000%2Fimages%2Fdownload%2Fattachments%2F4413914428%2Fimage2019-2-25_11-38-47-version-1-modificationdate-1761741936620-api-v2.png', + product_page_url = COALESCE(product_page_url, 'https://marketplace.nvidia.com/en-us/enterprise/networking/sn3700/'), + assets_scraped_at = NOW() +WHERE model = 'SN3700' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'nvidia-networking'); + +-- SN3750-SX — Spectrum-2, 32x200GbE (reseller CDN; no official NVIDIA front photo) +UPDATE switches +SET image_url = 'https://cdn.uvation.com/uvationmarketplace/catalog/product/m/s/msn3750-vs2fsc_1.jpg', + assets_scraped_at = NOW() +WHERE model = 'SN3750-SX' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'nvidia-networking'); + +-- SN4700 — Spectrum-3, 32x400GbE +UPDATE switches +SET image_url = 'https://docscontent.nvidia.com/dims4/default/019a2aa/2147483647/strip/true/crop/1791x188+0+0/resize/1440x151!/quality/90/?url=https%3A%2F%2Fk3-prod-nvidia-docs.s3.us-west-2.amazonaws.com%2Fbrightspot%2Fconfluence%2F0000019d-86b0-ddad-a3bf-eff5dced0000%2Fimages%2Fdownload%2Fattachments%2F4794381944%2Fimage2020-5-3_12-15-57-version-1-modificationdate-1775996206557-api-v2.png', + product_page_url = COALESCE(product_page_url, 'https://marketplace.nvidia.com/en-us/enterprise/networking/sn4700/'), + assets_scraped_at = NOW() +WHERE model = 'SN4700' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'nvidia-networking'); + +-- SN5400 — Spectrum-4, 64x400GbE, 2U +UPDATE switches +SET image_url = 'https://k3-prod-nvidia-docs.s3.us-west-2.amazonaws.com/brightspot/confluence/0000019d-1a8d-dcc0-a39f-dacdabb80000/images/download/attachments/2705811518/image-2025-2-9_11-39-27-version-1-modificationdate-1744286748050-api-v2.png', + product_page_url = COALESCE(product_page_url, 'https://www.nvidia.com/en-us/networking/spectrumx/'), + assets_scraped_at = NOW() +WHERE model = 'SN5400' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'nvidia-networking'); + +-- SN5600 — Spectrum-4, 64x800GbE, 2U +UPDATE switches +SET image_url = 'https://k3-prod-nvidia-docs.s3.us-west-2.amazonaws.com/brightspot/confluence/0000019d-1a8d-dcc0-a39f-dacdabb80000/images/download/attachments/2705811518/image-2025-2-9_11-37-20-version-1-modificationdate-1744286748283-api-v2.png', + product_page_url = COALESCE(product_page_url, 'https://www.nvidia.com/en-us/networking/spectrumx/'), + assets_scraped_at = NOW() +WHERE model = 'SN5600' + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'nvidia-networking');