From 8f36eff956914dc6cadda43407d1b75525b4682a Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Tue, 21 Apr 2026 06:45:41 +0200 Subject: [PATCH] fix(scraper): filter OneTrust/cookie-consent images + skip in img fallback cdn.cookielaw.org logos appear as the largest DOM image on Dell/Extreme product pages when the cookie consent overlay is present. Added to both GENERIC_IMAGE_PATTERNS (isGenericImage filter) and img fallback skipPattern so the next-largest actual product image can be found. --- packages/scraper/src/scrapers/switch-image-playwright.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/scraper/src/scrapers/switch-image-playwright.ts b/packages/scraper/src/scrapers/switch-image-playwright.ts index fba0399..b34b43b 100644 --- a/packages/scraper/src/scrapers/switch-image-playwright.ts +++ b/packages/scraper/src/scrapers/switch-image-playwright.ts @@ -60,6 +60,11 @@ const GENERIC_IMAGE_PATTERNS: RegExp[] = [ /social[-_]icon/i, /favicon/i, /og[-_]image[-_][0-9]+x[0-9]+\./i, // e.g. og-image-1200x630 → family-level generic + // Cookie consent / GDPR overlay images (OneTrust, Cookiebot, TrustArc, etc.) + /cdn\.cookielaw\.org/i, + /cookiebot\.com/i, + /trustarc\.com/i, + /consent-manager/i, ]; function isGenericImage(url: string): boolean { @@ -278,7 +283,7 @@ export async function fetchSwitchImagesPlaywright(targetVendorSlug?: string): Pr // Deliberately broad — isGenericImage() will filter hero/banner/logo images afterward. imageUrl = await page.evaluate(() => { const imgs = Array.from(document.querySelectorAll("img")); - const skipPattern = /\/flags?\/|\/icons?\/|\/avatars?\/|social[-_]icon|favicon|spinner|loading/i; + const skipPattern = /\/flags?\/|\/icons?\/|\/avatars?\/|social[-_]icon|favicon|spinner|loading|cookielaw|cookiebot|trustarc/i; const candidate = imgs .filter((img) => { const src = img.src || img.getAttribute("data-src") || "";