diff --git a/packages/scraper/src/scrapers/switch-image-playwright.ts b/packages/scraper/src/scrapers/switch-image-playwright.ts index 9db2e40..89aa17e 100644 --- a/packages/scraper/src/scrapers/switch-image-playwright.ts +++ b/packages/scraper/src/scrapers/switch-image-playwright.ts @@ -92,33 +92,45 @@ function buildDellUrl(model: string): string | null { return `https://www.dell.com/en-us/shop/dell-networking-switches/sc/networking-switches?appliedRefinements=DP_SEARCH_RESULTS_KEYWORDS~${encodeURIComponent(cleanModel)}`; } +// Edgecore uses WooCommerce with /product// URLs (no .html suffix). +// Some models have non-obvious slugs verified via sitemap. +const EDGECORE_SLUG_MAP: Record = { + "AS7712-32X": "as7712-32x-ec", // -ec suffix variant in Edgecore WooCommerce + "Minipack2": "minipack-as8000-open-modular-platform", // Facebook OCP Minipack2 +}; + function buildEdgecoreUrl(model: string): string | null { - const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); - return `https://www.edge-core.com/product/${slug}.html`; + if (model in EDGECORE_SLUG_MAP) { + return `https://www.edge-core.com/product/${EDGECORE_SLUG_MAP[model]}/`; + } + // Standard slug: lowercase, replace non-alphanum with dash, collapse multiple dashes + const slug = model.toLowerCase() + .replace(/[^a-z0-9-]/g, "-") + .replace(/-+/g, "-") + .replace(/^-|-$/g, ""); + return slug ? `https://www.edge-core.com/product/${slug}/` : null; } -function buildFortinetUrl(model: string): string | null { - // FortiSwitch 424E → fortiswitch-424e - const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, ""); - return `https://www.fortinet.com/products/fortiswitch/${slug}`; +function buildFortinetUrl(_model: string): string | null { + // Fortinet product pages are fully JS-rendered and all redirect to generic /products/ethernet-switches. + // No reliable og:image can be extracted — skip entirely. + return null; } function buildHpeArubaUrl(model: string): string | null { + // HPE Aruba series pages are stored in product_page_url for all known models. + // Builder is a fallback for unknown models. const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); return `https://www.arubanetworks.com/products/switches/${slug}/`; } function buildExtremeUrl(model: string): string | null { - // Extreme uses Coveo JS search — try series page - const m = model.toLowerCase(); - if (m.startsWith("x6")) { - const series = model.match(/^(X\d+)/i)?.[1]?.toLowerCase() ?? ""; - return `https://www.extremenetworks.com/products/switching/${series}-series/`; - } - if (m.startsWith("slx") || m.startsWith("8720") || m.startsWith("5520")) { - return `https://www.extremenetworks.com/products/switching/`; - } - return `https://www.extremenetworks.com/products/switching/`; + // Extreme direct product pages: extremenetworks.com/product/ + const slug = model.toLowerCase() + .replace(/\s+/g, "-") + .replace(/[^a-z0-9-]/g, "") + .replace(/-+/g, "-"); + return slug ? `https://www.extremenetworks.com/product/${slug}` : null; } const URL_BUILDERS: Record string | null> = { @@ -256,16 +268,18 @@ export async function fetchSwitchImagesPlaywright(targetVendorSlug?: string): Pr const twitterMeta = document.querySelector('meta[name="twitter:image"]'); if (twitterMeta?.content) return twitterMeta.content; - // Fallback: largest product-looking image + // Fallback: largest visible image that isn't a UI element. + // Deliberately broad — isGenericImage() will filter hero/banner/logo images afterward. const imgs = Array.from(document.querySelectorAll("img")); + const skipPattern = /\/flags?\/|\/icons?\/|\/avatars?\/|social[-_]icon|favicon|spinner|loading/i; const candidate = imgs .filter((img) => { const src = img.src || img.getAttribute("data-src") || ""; return src.startsWith("http") && - (src.match(/\.(jpg|jpeg|png|webp)/i)) && - img.naturalWidth > 300 && - img.naturalHeight > 200 && - (src.includes("product") || src.includes("switch") || src.includes("router") || src.includes("hardware")); + /\.(jpg|jpeg|png|webp)/i.test(src) && + img.naturalWidth >= 200 && + img.naturalHeight >= 150 && + !skipPattern.test(src); }) .sort((a, b) => (b.naturalWidth * b.naturalHeight) - (a.naturalWidth * a.naturalHeight))[0]; return candidate?.src ?? null; @@ -303,7 +317,9 @@ export async function fetchSwitchImagesPlaywright(targetVendorSlug?: string): Pr errors++; }, }, - makeCrawleeConfig("switch-images-playwright"), + // Use a unique run ID to avoid Crawlee temp-dir state contamination when multiple + // vendor runs execute back-to-back (ENOENT: stale request-queue files from prior run). + makeCrawleeConfig(`switch-images-playwright-${Date.now()}`), ); await crawler.run(requests);