From 9618a4f0e0582bfe81531c69655a89da9104990b Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Tue, 21 Apr 2026 00:41:31 +0200 Subject: [PATCH] fix: Cisco 8000 builder URL + MikroTik lowercase + new vendor builders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit URL builder fixes: - Cisco 8000: update to new /site/us/en/ URL scheme (family page, not per-model) - MikroTik: fix to lowercase+underscore format (was uppercase, caused 404) - Fortinet: set to null — JS-rendered pages, all redirect to generic page - Alcatel-Lucent Enterprise slug added to dispatcher (was missing, caused 0 hits) - Add Quanta, Allied Telesis, Ufispace, Netgear URL builders - NVIDIA: skip ConnectX/BlueField non-switch models Migration 044: - Clear 35 wrong NCS-5500 URLs from Cisco 8000-series models - Pre-set correct 8000-series family URL for 21 models without images --- .../src/scrapers/switch-image-fetcher.ts | 18 +++++++------- sql/044-fix-cisco-8000-product-urls.sql | 24 +++++++++++++++++++ 2 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 sql/044-fix-cisco-8000-product-urls.sql diff --git a/packages/scraper/src/scrapers/switch-image-fetcher.ts b/packages/scraper/src/scrapers/switch-image-fetcher.ts index 1b198e6..cb7c5d5 100644 --- a/packages/scraper/src/scrapers/switch-image-fetcher.ts +++ b/packages/scraper/src/scrapers/switch-image-fetcher.ts @@ -53,9 +53,9 @@ function buildCiscoUrl(model: string): string | null { return `https://www.cisco.com/c/en/us/products/switches/catalyst-${slug}/index.html`; } // Cisco 8000 SP series: 8101-32FH, 8202-32FH, 8608 + // Cisco no longer has individual 8000 model pages — use the family page instead if (/^8[0-9]{3}/.test(m)) { - const slug = m.toLowerCase().replace(/[^a-z0-9]/g, "-"); - return `https://www.cisco.com/c/en/us/products/routers/8000-series-routers/${slug}/index.html`; + return `https://www.cisco.com/site/us/en/products/networking/sdwan-routers/8000-series/index.html`; } return null; } @@ -125,8 +125,8 @@ function buildExtremeUrl(model: string): string | null { } function buildMikroTikUrl(model: string): string | null { - // CRS504-4XQ-IN → https://mikrotik.com/product/CRS504_4XQ_IN - const slug = model.replace(/[-\s]+/g, "_"); + // CRS504-4XQ-IN → https://mikrotik.com/product/crs504_4xq_in (lowercase required) + const slug = model.toLowerCase().replace(/[-\s]+/g, "_").replace(/[^a-z0-9_]/g, ""); return `https://mikrotik.com/product/${slug}`; } @@ -160,11 +160,11 @@ function buildAsterfusionUrl(model: string): string | null { return `https://www.asterfusion.com/products/${slug}/`; } -function buildFortinetUrl(model: string): string | null { - // FortiSwitch 1024E → fortiswitch-1024e - // FortiSwitch 124F-POE → fortiswitch-124f-poe - const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, ""); - return `https://www.fortinet.com/products/switches/${slug}.html`; +function buildFortinetUrl(_model: string): string | null { + // Fortinet product pages are JS-rendered — og:image only returns the brand icon. + // All /products/fortiswitch/ URLs redirect to the generic /ethernet-switches page. + // Image scraping is not possible via plain HTTP for this vendor. + return null; } function buildQuantaUrl(model: string): string | null { diff --git a/sql/044-fix-cisco-8000-product-urls.sql b/sql/044-fix-cisco-8000-product-urls.sql new file mode 100644 index 0000000..b03fa9a --- /dev/null +++ b/sql/044-fix-cisco-8000-product-urls.sql @@ -0,0 +1,24 @@ +-- Migration 044 — Fix Cisco 8000 series product_page_url +-- +-- The previous scraper run incorrectly stored the NCS-5500 series URL +-- for Cisco 8000-series SP router models (8101-32FH, 8202-32FH, etc). +-- The correct page is the 8000-series family page on Cisco's new /site/ URL scheme. +-- +-- After this migration, the image scraper will re-fetch these 35 switches +-- using the updated buildCiscoUrl() which now returns the correct family URL. + +-- 1. Clear the wrongly-stored NCS-5500 product_page_url so the scraper rebuilds it +UPDATE switches +SET product_page_url = NULL, + assets_scraped_at = NULL +WHERE product_page_url = 'https://www.cisco.com/c/en/us/products/routers/network-convergence-system-5500-series/index.html' + AND image_url IS NULL; + +-- 2. Pre-set the correct 8000-series family URL for all 8000-series models without an image +-- so the next scraper run hits the right page immediately +UPDATE switches +SET product_page_url = 'https://www.cisco.com/site/us/en/products/networking/sdwan-routers/8000-series/index.html', + assets_scraped_at = NULL +WHERE image_url IS NULL + AND vendor_id = (SELECT id FROM vendors WHERE slug = 'cisco') + AND model ~ '^8[0-9]{3}';