fix(scrapers): fix ATGBics theme migration, NADDOD URL, disable VCELink
- ATGBics: update HTML parser from old card--product theme to new card__info theme (Shopify template changed April 2026); name now extracted from href link text instead of aria-label - NADDOD: correct ensureVendor shop URL from /collections/transceivers (404) to /collection/optical-transceivers - VCELink: disable scraper — site pivoted from optical transceivers to audio/video/cable products; all collection URLs return 404
This commit is contained in:
parent
ca943f1f86
commit
1aba912a15
@ -144,19 +144,15 @@ function parseCategoryPage(html: string, cat: typeof CATEGORIES[number]): Atgbic
|
||||
const products: AtgbicsProduct[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
// Split by product cards — class="card card--product
|
||||
const cardParts = html.split(/class="card card--product/);
|
||||
// Split by product cards — class="card__info" (theme updated 2025, was "card card--product")
|
||||
const cardParts = html.split(/class="card__info"/);
|
||||
|
||||
for (const card of cardParts.slice(1)) {
|
||||
// Name from aria-label (full descriptive name)
|
||||
const nameM = card.match(/aria-label="([^"]{8,})"/);
|
||||
if (!nameM) continue;
|
||||
const name = nameM[1].replace(/®/g, "").replace(/\s+/g, " ").trim();
|
||||
|
||||
// Product handle from href
|
||||
const hrefM = card.match(/href="\/(?:collections\/[^"]+\/)?products\/([^"?#]+)"/);
|
||||
// Product handle + name from <a href="/products/..." class="card-link text-current">NAME</a>
|
||||
const hrefM = card.match(/href="\/products\/([^"?#]+)"[^>]*>\s*([^<]{8,}?)\s*<\/a>/s);
|
||||
if (!hrefM) continue;
|
||||
const handle = hrefM[1];
|
||||
const name = hrefM[2].replace(/®/g, "").replace(/\s+/g, " ").trim();
|
||||
if (seen.has(handle)) continue;
|
||||
seen.add(handle);
|
||||
|
||||
|
||||
@ -205,7 +205,7 @@ export async function scrapeNaddod(): Promise<void> {
|
||||
"NADDOD",
|
||||
"compatible",
|
||||
"https://www.naddod.com",
|
||||
"https://www.naddod.com/collections/transceivers",
|
||||
"https://www.naddod.com/collection/optical-transceivers",
|
||||
);
|
||||
|
||||
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────
|
||||
|
||||
@ -180,6 +180,11 @@ async function fetchPage(url: string): Promise<string> {
|
||||
}
|
||||
|
||||
export async function scrapeVcelink(): Promise<void> {
|
||||
// VCELink pivoted away from optical transceivers to audio/video/cable products (April 2026).
|
||||
// All transceiver collection URLs return 404. Scraper disabled until site sells optics again.
|
||||
console.warn("[vcelink] Scraper disabled — site no longer sells optical transceivers (pivoted to audio/video, April 2026)");
|
||||
return;
|
||||
|
||||
console.log("=== Vcelink Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user