/** * ATGBICS Scraper — Prices, Stock, Product Catalog * * ATGBICS is a UK-based independent compatible optics vendor. * Site uses Shopify. Prices ARE present in static HTML on collection pages. * * Strategy: * 1. Fetch each collection page (correct handles discovered 2026-04-18) * 2. Parse product cards: name (aria-label), handle, price (£X.XX), image * 3. Paginate via ?page=N until empty * 4. Upsert to DB * * No Playwright required — static HTML contains all needed data. * Rate limited: 1 req/1 sec. Runs from Mac or Erik (no IP issues with static pages). */ import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db"; import { contentHash } from "../utils/hash"; const BASE_URL = "https://www.atgbics.com"; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-GB,en;q=0.9", // Force GBP pricing regardless of visitor IP geolocation Cookie: "cart_currency=GBP", }; const MAX_PAGES_PER_CAT = 50; // Correct collection handles discovered 2026-04-18 by fetching /collections/ // Each collection has static-HTML-rendered prices (£X.XX in price__current span) const CATEGORIES = [ // === Core speeds by form factor === { handle: "compatible-transceivers-sfp-1-25g", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { handle: "compatible-transceivers-sfp-100m", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { handle: "compatible-transceiver-sfp-bidi-1-25g", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { handle: "compatible-transceivers-sfpp-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceivers-sfpp-bidi-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceivers-sfpp-cwdm-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceivers-sfp-dwdm-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceiver-sfp-25g", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, { handle: "high-speed-sfp-transceivers", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, { handle: "high-speed-sfp-transceivers-1", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, { handle: "compatible-tansceivers-qsfp-bidi-100gbps", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, ]; interface AtgbicsProduct { partNumber: string; name: string; price: number; currency: string; stockLevel: string; url: string; formFactor: string; speed: string; speedGbps: number; reachLabel?: string; reachMeters?: number; fiberType?: string; wavelength?: string; imageUrl?: string; } function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } function detectFormFactor(text: string, fallback: string): string { const lower = text.toLowerCase(); if (lower.includes("qsfp-dd") || lower.includes("qsfp dd")) return "QSFP-DD"; if (lower.includes("qsfp56")) return "QSFP56"; if (lower.includes("qsfp28")) return "QSFP28"; if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return "QSFP+"; if (lower.includes("sfp28")) return "SFP28"; if (lower.includes("sfp+") || lower.includes("sfp-plus") || lower.includes("sfpplus")) return "SFP+"; if (lower.includes("sfp") && !lower.includes("qsfp")) return "SFP"; if (lower.includes("xfp")) return "XFP"; if (lower.includes("cfp2")) return "CFP2"; if (lower.includes("cfp")) return "CFP"; return fallback; } function detectSpeed(text: string, fallbackGbps: number): { speed: string; speedGbps: number } { const patterns: [RegExp, string, number][] = [ [/800\s*g/i, "800G", 800], [/400\s*g/i, "400G", 400], [/200\s*g/i, "200G", 200], [/100\s*g/i, "100G", 100], [/40\s*g/i, "40G", 40], [/25\s*g/i, "25G", 25], [/10\s*g/i, "10G", 10], [/1000\s*base/i, "1G", 1], [/1\.25\s*g/i, "1G", 1], [/1\s*g\b/i, "1G", 1], ]; for (const [re, speed, gbps] of patterns) { if (re.test(text)) return { speed, speedGbps: gbps }; } return { speed: fallbackGbps + "G", speedGbps: fallbackGbps }; } function detectReach(text: string): { label: string; meters: number } | undefined { const patterns: [RegExp, string, number][] = [ [/\b120\s*km\b/i, "120km", 120000], [/\b80\s*km\b/i, "80km", 80000], [/\b40\s*km\b/i, "40km", 40000], [/\b20\s*km\b/i, "20km", 20000], [/\b15\s*km\b/i, "15km", 15000], [/\b10\s*km\b/i, "10km", 10000], [/\b2\s*km\b/i, "2km", 2000], [/\b550\s*m\b/i, "550m", 550], [/\b500\s*m\b/i, "500m", 500], [/\b300\s*m\b/i, "300m", 300], [/\b150\s*m\b/i, "150m", 150], [/\b100\s*m\b/i, "100m", 100], [/\b70\s*m\b/i, "70m", 70], [/\bLR4?\b/, "10km", 10000], [/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000], [/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000], ]; for (const [re, label, meters] of patterns) { if (re.test(text)) return { label, meters }; } return undefined; } function detectFiber(text: string): string { if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper"; if (/aoc|active.?optical/i.test(text)) return "MMF"; return "SMF"; } function detectWavelength(text: string): string { const m = text.match(/(\d{3,4})\s*nm/i); return m ? m[1] : ""; } /** * Extract OEM part number from the ATGBICS product name. * Name format: "{OEM_PN} {Vendor}® Compatible Transceiver {Specs}" * e.g. "SFP-10G-SR Cisco® Compatible Transceiver SFP+ 10GBase-SR ..." */ function extractPartNumber(name: string): string { // First token before first space-separated vendor name or ® symbol const pnMatch = name.match(/^([A-Z0-9][A-Z0-9._\-/+]+)/i); if (pnMatch && pnMatch[1].length >= 3 && pnMatch[1].length <= 60) { return pnMatch[1].toUpperCase(); } return name.split(/\s+/)[0]?.toUpperCase()?.slice(0, 60) || name.slice(0, 60); } /** Parse a collection page HTML — returns array of products */ function parseCategoryPage(html: string, cat: typeof CATEGORIES[number]): AtgbicsProduct[] { const products: AtgbicsProduct[] = []; const seen = new Set(); // Split by product cards — class="card__info" (theme updated 2025, was "card card--product") const cardParts = html.split(/class="card__info"/); for (const card of cardParts.slice(1)) { // Product handle + name from NAME const hrefM = card.match(/href="\/products\/([^"?#]+)"[^>]*>\s*([^<]{8,}?)\s*<\/a>/s); if (!hrefM) continue; const handle = hrefM[1]; const name = hrefM[2].replace(/®/g, "").replace(/\s+/g, " ").trim(); if (seen.has(handle)) continue; seen.add(handle); // Price — £X.XX in price__current (may have newline before £) const priceM = card.match(/price__current"[^>]*>\s*£([\d,]+(?:\.\d{0,2})?)/s); const price = priceM ? parseFloat(priceM[1].replace(",", "")) : 0; if (!price || price <= 0 || price > 100000) continue; // Image from data-srcset (first src) const imgM = card.match(/data-srcset="\/\/(atgbics\.com\/cdn\/shop\/files\/[^"\s]+)/); const imageUrl = imgM ? `https://${imgM[1].split(" ")[0]}` : undefined; const fullText = `${name} ${handle}`; const speedInfo = detectSpeed(fullText, cat.speedGbps); const ff = detectFormFactor(fullText, cat.formFactor); const reach = detectReach(fullText); const partNumber = extractPartNumber(name); products.push({ partNumber, name, price, currency: "GBP", stockLevel: "in_stock", // ATGBICS only lists available items url: `${BASE_URL}/products/${handle}`, formFactor: ff, speed: speedInfo.speed, speedGbps: speedInfo.speedGbps, reachLabel: reach?.label, reachMeters: reach?.meters, fiberType: detectFiber(fullText), wavelength: detectWavelength(fullText), imageUrl: imageUrl?.includes("no-image") ? undefined : imageUrl, }); } return products; } async function fetchPage(url: string): Promise { const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20000) }); if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); return resp.text(); } /** Check if a page has pagination links pointing to the next page */ function hasNextPage(html: string, currentPage: number): boolean { const nextPage = currentPage + 1; return html.includes(`page=${nextPage}`) || html.includes(`page%3D${nextPage}`); } export async function scrapeAtgbics(): Promise { console.log("=== ATGBICS Scraper Starting (static HTML, correct collection handles) ===\n"); const vendorId = await ensureVendor( "ATGBICS", "compatible", "https://www.atgbics.com", "https://www.atgbics.com/collections/compatible-transceivers-sfpp-10g", ); let totalProducts = 0; let priceUpdates = 0; let imageUpdates = 0; const seenHandles = new Set(); for (const cat of CATEGORIES) { console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.handle}] ---`); let catTotal = 0; for (let page = 1; page <= MAX_PAGES_PER_CAT; page++) { const pageUrl = page === 1 ? `${BASE_URL}/collections/${cat.handle}?sort_by=price-ascending¤cy=GBP` : `${BASE_URL}/collections/${cat.handle}?sort_by=price-ascending¤cy=GBP&page=${page}`; try { const html = await fetchPage(pageUrl); const pageProducts = parseCategoryPage(html, cat); if (pageProducts.length === 0) { console.log(` Page ${page}: 0 products — stopping`); break; } console.log(` Page ${page}: ${pageProducts.length} products`); for (const product of pageProducts) { // Skip cross-category duplicates (same product may appear in multiple collections) const dedupKey = `${product.url}`; if (seenHandles.has(dedupKey)) continue; seenHandles.add(dedupKey); try { const txId = await findOrCreateScrapedTransceiver({ partNumber: product.partNumber, vendorId, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, reachMeters: product.reachMeters, reachLabel: product.reachLabel, fiberType: product.fiberType, wavelengths: product.wavelength, category: "Compatible", }); const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const updated = await upsertPriceObservation({ transceiverId: txId, sourceVendorId: vendorId, price: product.price, currency: product.currency, stockLevel: product.stockLevel, url: product.url, contentHash: hash, }); if (updated) priceUpdates++; if (product.imageUrl) { const res = await pool.query( `UPDATE transceivers SET image_url = $1, image_scraped_at = NOW(), has_image = true WHERE id = $2 AND (image_url IS NULL OR image_url = '') RETURNING id`, [product.imageUrl, txId], ); if (res.rowCount && res.rowCount > 0) imageUpdates++; } totalProducts++; catTotal++; } catch (err) { console.warn(` DB error ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`); } } // Check pagination if (!hasNextPage(html, page)) { console.log(` No page ${page + 1} — collection done`); break; } } catch (err) { console.warn(` Page ${page} error: ${(err as Error).message.slice(0, 80)}`); break; } await sleep(1000); } console.log(` Category total: ${catTotal} products`); await sleep(1000); } console.log(`\n=== ATGBICS Complete: ${totalProducts} products, ${priceUpdates} price updates, ${imageUpdates} images ===`); } if (require.main === module) { scrapeAtgbics() .then(() => pool.end()) .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }