diff --git a/packages/scraper/src/scrapers/gbics.ts b/packages/scraper/src/scrapers/gbics.ts index ce6c9a7..472ca4f 100644 --- a/packages/scraper/src/scrapers/gbics.ts +++ b/packages/scraper/src/scrapers/gbics.ts @@ -117,7 +117,10 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product // BigCommerce card pattern (attribute order varies by theme version): // Old: // New: - // Two-pass approach: find all product tags regardless of attribute order + // Two-pass approach: find all product tags regardless of attribute order. + // GBICS BigCommerce theme: product cards have aria-label BEFORE href ("aria-label first"), + // while navigation links have href BEFORE aria-label. Try pattern 2 when pattern 1 + // finds results but none contain GBP prices (£), which indicates only nav links were matched. const productRegex = /href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*aria-label="([^"]+)"/gi; const productRegex2 = /aria-label="([^"]+)"[^>]*href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"/gi; let match; @@ -125,7 +128,11 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product while ((match = productRegex.exec(collapsed)) !== null) { rawMatches.push({ url: match[1].trim(), label: match[2].trim(), index: match.index }); } - if (rawMatches.length === 0) { + // Fall back to pattern 2 when pattern 1 finds no results, OR when no results contain + // GBP prices (£) — indicating only navigation links were matched by pattern 1. + const hasPricesInP1 = rawMatches.some((m) => m.label.includes("£")); + if (rawMatches.length === 0 || !hasPricesInP1) { + rawMatches.length = 0; // clear nav-link pollution while ((match = productRegex2.exec(collapsed)) !== null) { rawMatches.push({ url: match[2].trim(), label: match[1].trim(), index: match.index }); }