fix(scrapers): fix ATGBics theme migration, NADDOD URL, disable VCELink

- ATGBics: update HTML parser from old card--product theme to new
  card__info theme (Shopify template changed April 2026); name now
  extracted from href link text instead of aria-label
- NADDOD: correct ensureVendor shop URL from /collections/transceivers
  (404) to /collection/optical-transceivers
- VCELink: disable scraper — site pivoted from optical transceivers to
  audio/video/cable products; all collection URLs return 404
This commit is contained in:
Rene Fichtmueller 2026-04-20 22:11:24 +02:00
parent ca943f1f86
commit 1aba912a15
3 changed files with 11 additions and 10 deletions

View File

@ -144,19 +144,15 @@ function parseCategoryPage(html: string, cat: typeof CATEGORIES[number]): Atgbic
const products: AtgbicsProduct[] = [];
const seen = new Set<string>();
// Split by product cards — class="card card--product
const cardParts = html.split(/class="card card--product/);
// Split by product cards — class="card__info" (theme updated 2025, was "card card--product")
const cardParts = html.split(/class="card__info"/);
for (const card of cardParts.slice(1)) {
// Name from aria-label (full descriptive name)
const nameM = card.match(/aria-label="([^"]{8,})"/);
if (!nameM) continue;
const name = nameM[1].replace(/®/g, "").replace(/\s+/g, " ").trim();
// Product handle from href
const hrefM = card.match(/href="\/(?:collections\/[^"]+\/)?products\/([^"?#]+)"/);
// Product handle + name from <a href="/products/..." class="card-link text-current">NAME</a>
const hrefM = card.match(/href="\/products\/([^"?#]+)"[^>]*>\s*([^<]{8,}?)\s*<\/a>/s);
if (!hrefM) continue;
const handle = hrefM[1];
const name = hrefM[2].replace(/®/g, "").replace(/\s+/g, " ").trim();
if (seen.has(handle)) continue;
seen.add(handle);

View File

@ -205,7 +205,7 @@ export async function scrapeNaddod(): Promise<void> {
"NADDOD",
"compatible",
"https://www.naddod.com",
"https://www.naddod.com/collections/transceivers",
"https://www.naddod.com/collection/optical-transceivers",
);
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────

View File

@ -180,6 +180,11 @@ async function fetchPage(url: string): Promise<string> {
}
export async function scrapeVcelink(): Promise<void> {
// VCELink pivoted away from optical transceivers to audio/video/cable products (April 2026).
// All transceiver collection URLs return 404. Scraper disabled until site sells optics again.
console.warn("[vcelink] Scraper disabled — site no longer sells optical transceivers (pivoted to audio/video, April 2026)");
return;
console.log("=== Vcelink Scraper Starting ===\n");
const vendorId = await ensureVendor(