fix(scrapers): fix ATGBics theme migration, NADDOD URL, disable VCELink

- ATGBics: update HTML parser from old card--product theme to new
  card__info theme (Shopify template changed April 2026); name now
  extracted from href link text instead of aria-label
- NADDOD: correct ensureVendor shop URL from /collections/transceivers
  (404) to /collection/optical-transceivers
- VCELink: disable scraper — site pivoted from optical transceivers to
  audio/video/cable products; all collection URLs return 404
This commit is contained in:
Rene Fichtmueller 2026-04-20 22:11:24 +02:00
parent ca943f1f86
commit 1aba912a15
3 changed files with 11 additions and 10 deletions

View File

@ -144,19 +144,15 @@ function parseCategoryPage(html: string, cat: typeof CATEGORIES[number]): Atgbic
const products: AtgbicsProduct[] = []; const products: AtgbicsProduct[] = [];
const seen = new Set<string>(); const seen = new Set<string>();
// Split by product cards — class="card card--product // Split by product cards — class="card__info" (theme updated 2025, was "card card--product")
const cardParts = html.split(/class="card card--product/); const cardParts = html.split(/class="card__info"/);
for (const card of cardParts.slice(1)) { for (const card of cardParts.slice(1)) {
// Name from aria-label (full descriptive name) // Product handle + name from <a href="/products/..." class="card-link text-current">NAME</a>
const nameM = card.match(/aria-label="([^"]{8,})"/); const hrefM = card.match(/href="\/products\/([^"?#]+)"[^>]*>\s*([^<]{8,}?)\s*<\/a>/s);
if (!nameM) continue;
const name = nameM[1].replace(/®/g, "").replace(/\s+/g, " ").trim();
// Product handle from href
const hrefM = card.match(/href="\/(?:collections\/[^"]+\/)?products\/([^"?#]+)"/);
if (!hrefM) continue; if (!hrefM) continue;
const handle = hrefM[1]; const handle = hrefM[1];
const name = hrefM[2].replace(/®/g, "").replace(/\s+/g, " ").trim();
if (seen.has(handle)) continue; if (seen.has(handle)) continue;
seen.add(handle); seen.add(handle);

View File

@ -205,7 +205,7 @@ export async function scrapeNaddod(): Promise<void> {
"NADDOD", "NADDOD",
"compatible", "compatible",
"https://www.naddod.com", "https://www.naddod.com",
"https://www.naddod.com/collections/transceivers", "https://www.naddod.com/collection/optical-transceivers",
); );
// ── Phase 1: Discover product URLs via sitemap ──────────────────────────── // ── Phase 1: Discover product URLs via sitemap ────────────────────────────

View File

@ -180,6 +180,11 @@ async function fetchPage(url: string): Promise<string> {
} }
export async function scrapeVcelink(): Promise<void> { export async function scrapeVcelink(): Promise<void> {
// VCELink pivoted away from optical transceivers to audio/video/cable products (April 2026).
// All transceiver collection URLs return 404. Scraper disabled until site sells optics again.
console.warn("[vcelink] Scraper disabled — site no longer sells optical transceivers (pivoted to audio/video, April 2026)");
return;
console.log("=== Vcelink Scraper Starting ===\n"); console.log("=== Vcelink Scraper Starting ===\n");
const vendorId = await ensureVendor( const vendorId = await ensureVendor(