fix(scraper): filter OneTrust/cookie-consent images + skip in img fallback
cdn.cookielaw.org logos appear as the largest DOM image on Dell/Extreme product pages when the cookie consent overlay is present. Added to both GENERIC_IMAGE_PATTERNS (isGenericImage filter) and img fallback skipPattern so the next-largest actual product image can be found.
This commit is contained in:
parent
d67fbe31da
commit
8f36eff956
@ -60,6 +60,11 @@ const GENERIC_IMAGE_PATTERNS: RegExp[] = [
|
||||
/social[-_]icon/i,
|
||||
/favicon/i,
|
||||
/og[-_]image[-_][0-9]+x[0-9]+\./i, // e.g. og-image-1200x630 → family-level generic
|
||||
// Cookie consent / GDPR overlay images (OneTrust, Cookiebot, TrustArc, etc.)
|
||||
/cdn\.cookielaw\.org/i,
|
||||
/cookiebot\.com/i,
|
||||
/trustarc\.com/i,
|
||||
/consent-manager/i,
|
||||
];
|
||||
|
||||
function isGenericImage(url: string): boolean {
|
||||
@ -278,7 +283,7 @@ export async function fetchSwitchImagesPlaywright(targetVendorSlug?: string): Pr
|
||||
// Deliberately broad — isGenericImage() will filter hero/banner/logo images afterward.
|
||||
imageUrl = await page.evaluate(() => {
|
||||
const imgs = Array.from(document.querySelectorAll<HTMLImageElement>("img"));
|
||||
const skipPattern = /\/flags?\/|\/icons?\/|\/avatars?\/|social[-_]icon|favicon|spinner|loading/i;
|
||||
const skipPattern = /\/flags?\/|\/icons?\/|\/avatars?\/|social[-_]icon|favicon|spinner|loading|cookielaw|cookiebot|trustarc/i;
|
||||
const candidate = imgs
|
||||
.filter((img) => {
|
||||
const src = img.src || img.getAttribute("data-src") || "";
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user