fix(scrapers): fix ATGBics theme migration, NADDOD URL, disable VCELink
- ATGBics: update HTML parser from old card--product theme to new card__info theme (Shopify template changed April 2026); name now extracted from href link text instead of aria-label - NADDOD: correct ensureVendor shop URL from /collections/transceivers (404) to /collection/optical-transceivers - VCELink: disable scraper — site pivoted from optical transceivers to audio/video/cable products; all collection URLs return 404
This commit is contained in:
parent
ca943f1f86
commit
1aba912a15
@ -144,19 +144,15 @@ function parseCategoryPage(html: string, cat: typeof CATEGORIES[number]): Atgbic
|
|||||||
const products: AtgbicsProduct[] = [];
|
const products: AtgbicsProduct[] = [];
|
||||||
const seen = new Set<string>();
|
const seen = new Set<string>();
|
||||||
|
|
||||||
// Split by product cards — class="card card--product
|
// Split by product cards — class="card__info" (theme updated 2025, was "card card--product")
|
||||||
const cardParts = html.split(/class="card card--product/);
|
const cardParts = html.split(/class="card__info"/);
|
||||||
|
|
||||||
for (const card of cardParts.slice(1)) {
|
for (const card of cardParts.slice(1)) {
|
||||||
// Name from aria-label (full descriptive name)
|
// Product handle + name from <a href="/products/..." class="card-link text-current">NAME</a>
|
||||||
const nameM = card.match(/aria-label="([^"]{8,})"/);
|
const hrefM = card.match(/href="\/products\/([^"?#]+)"[^>]*>\s*([^<]{8,}?)\s*<\/a>/s);
|
||||||
if (!nameM) continue;
|
|
||||||
const name = nameM[1].replace(/®/g, "").replace(/\s+/g, " ").trim();
|
|
||||||
|
|
||||||
// Product handle from href
|
|
||||||
const hrefM = card.match(/href="\/(?:collections\/[^"]+\/)?products\/([^"?#]+)"/);
|
|
||||||
if (!hrefM) continue;
|
if (!hrefM) continue;
|
||||||
const handle = hrefM[1];
|
const handle = hrefM[1];
|
||||||
|
const name = hrefM[2].replace(/®/g, "").replace(/\s+/g, " ").trim();
|
||||||
if (seen.has(handle)) continue;
|
if (seen.has(handle)) continue;
|
||||||
seen.add(handle);
|
seen.add(handle);
|
||||||
|
|
||||||
|
|||||||
@ -205,7 +205,7 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
"NADDOD",
|
"NADDOD",
|
||||||
"compatible",
|
"compatible",
|
||||||
"https://www.naddod.com",
|
"https://www.naddod.com",
|
||||||
"https://www.naddod.com/collections/transceivers",
|
"https://www.naddod.com/collection/optical-transceivers",
|
||||||
);
|
);
|
||||||
|
|
||||||
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────
|
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────
|
||||||
|
|||||||
@ -180,6 +180,11 @@ async function fetchPage(url: string): Promise<string> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function scrapeVcelink(): Promise<void> {
|
export async function scrapeVcelink(): Promise<void> {
|
||||||
|
// VCELink pivoted away from optical transceivers to audio/video/cable products (April 2026).
|
||||||
|
// All transceiver collection URLs return 404. Scraper disabled until site sells optics again.
|
||||||
|
console.warn("[vcelink] Scraper disabled — site no longer sells optical transceivers (pivoted to audio/video, April 2026)");
|
||||||
|
return;
|
||||||
|
|
||||||
console.log("=== Vcelink Scraper Starting ===\n");
|
console.log("=== Vcelink Scraper Starting ===\n");
|
||||||
|
|
||||||
const vendorId = await ensureVendor(
|
const vendorId = await ensureVendor(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user