/** * ATGBICS Scraper — Prices, Stock, Product Catalog * * ATGBICS is a UK-based independent compatible optics vendor. * Site uses Shopify. Uses the /products.json API for reliable, JS-free data access. * * Strategy: * For each collection: GET /collections/{handle}/products.json?limit=250&page=N * Parse JSON: title, handle, variants[0].price (GBP string), images * Paginate until response returns < limit products. * * Rate limited: 1 req/1 sec. Runs from Mac or Erik. * Rewritten 2026-05-06: switched from HTML parsing to products.json API after * Shopify's static HTML stopped rendering per-collection results correctly. */ import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, markImageVerified, pool } from "../utils/db"; import { contentHash } from "../utils/hash"; const BASE_URL = "https://atgbics.com"; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "application/json", }; const LIMIT = 250; // Shopify products.json max per page const MAX_PAGES_PER_CAT = 40; // 40 × 250 = 10,000 products per collection /** Collections for transceiver products — discovered 2026-05-06 via /collections.json */ const CATEGORIES = [ // Core speeds by form factor { handle: "compatible-transceivers-sfp-1-25g", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { handle: "compatible-transceiver-sfp-bidi-1-25g", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { handle: "compatible-transceivers-sfp-100m", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { handle: "compatible-transceivers-sfpp-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceivers-sfpp-bidi-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceivers-sfpp-cwdm-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceivers-sfp-dwdm-10g", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { handle: "compatible-transceiver-sfp-25g", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, { handle: "compatible-transceivers-qsfpp-40gbps", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, { handle: "compatible-transceivers-qsfp28-100gbps",formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, { handle: "400gbase-products", formFactor: "QSFP-DD",speed: "400G", speedGbps: 400 }, ]; interface ShopifyVariant { price: string; compare_at_price?: string | null; available?: boolean; } interface ShopifyProduct { title: string; handle: string; variants: ShopifyVariant[]; images?: Array<{ src: string }>; tags?: string[]; } interface AtgbicsProduct { partNumber: string; name: string; price: number; currency: string; stockLevel: string; url: string; formFactor: string; speed: string; speedGbps: number; reachLabel?: string; reachMeters?: number; fiberType?: string; wavelength?: string; imageUrl?: string; } function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } function detectFormFactor(text: string, fallback: string): string { const lower = text.toLowerCase(); if (lower.includes("qsfp-dd") || lower.includes("qsfp dd")) return "QSFP-DD"; if (lower.includes("qsfp56")) return "QSFP56"; if (lower.includes("qsfp28")) return "QSFP28"; if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return "QSFP+"; if (lower.includes("sfp28")) return "SFP28"; if (lower.includes("sfp+") || lower.includes("sfp-plus") || lower.includes("sfpplus")) return "SFP+"; if (lower.includes("sfp") && !lower.includes("qsfp")) return "SFP"; if (lower.includes("xfp")) return "XFP"; if (lower.includes("cfp2")) return "CFP2"; if (lower.includes("cfp")) return "CFP"; return fallback; } function detectSpeed(text: string, fallbackGbps: number): { speed: string; speedGbps: number } { const patterns: [RegExp, string, number][] = [ [/800\s*g/i, "800G", 800], [/400\s*g/i, "400G", 400], [/200\s*g/i, "200G", 200], [/100\s*g/i, "100G", 100], [/40\s*g/i, "40G", 40], [/25\s*g/i, "25G", 25], [/10\s*g/i, "10G", 10], [/1000\s*base/i, "1G", 1], [/1\.25\s*g/i, "1G", 1], [/1\s*g\b/i, "1G", 1], ]; for (const [re, speed, gbps] of patterns) { if (re.test(text)) return { speed, speedGbps: gbps }; } return { speed: fallbackGbps + "G", speedGbps: fallbackGbps }; } function detectReach(text: string): { label: string; meters: number } | undefined { const patterns: [RegExp, string, number][] = [ [/\b120\s*km\b/i, "120km", 120000], [/\b80\s*km\b/i, "80km", 80000], [/\b40\s*km\b/i, "40km", 40000], [/\b20\s*km\b/i, "20km", 20000], [/\b15\s*km\b/i, "15km", 15000], [/\b10\s*km\b/i, "10km", 10000], [/\b2\s*km\b/i, "2km", 2000], [/\b550\s*m\b/i, "550m", 550], [/\b500\s*m\b/i, "500m", 500], [/\b300\s*m\b/i, "300m", 300], [/\b150\s*m\b/i, "150m", 150], [/\b100\s*m\b/i, "100m", 100], [/\b70\s*m\b/i, "70m", 70], [/\bLR4?\b/, "10km", 10000], [/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000], [/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000], ]; for (const [re, label, meters] of patterns) { if (re.test(text)) return { label, meters }; } return undefined; } function detectFiber(text: string): string { if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper"; if (/aoc|active.?optical/i.test(text)) return "MMF"; return "SMF"; } function detectWavelength(text: string): string { const m = text.match(/(\d{3,4})\s*nm/i); return m ? m[1] : ""; } /** * Extract OEM part number from the ATGBICS product name. * Format: "{OEM_PN} {Vendor}® Compatible Transceiver {Specs}" * e.g. "SFP-10G-SR Cisco® Compatible Transceiver SFP+ 10GBase-SR ..." */ function extractPartNumber(name: string): string { const pnMatch = name.match(/^([A-Z0-9][A-Z0-9._\-/+]+)/i); if (pnMatch && pnMatch[1].length >= 3 && pnMatch[1].length <= 60) { return pnMatch[1].toUpperCase(); } return name.split(/\s+/)[0]?.toUpperCase()?.slice(0, 60) || name.slice(0, 60); } /** Parse a single Shopify product JSON into our AtgbicsProduct format */ function parseShopifyProduct( sp: ShopifyProduct, cat: typeof CATEGORIES[number] ): AtgbicsProduct | null { const priceStr = sp.variants[0]?.price; const price = priceStr ? parseFloat(priceStr) : 0; if (!price || price <= 0 || price > 100000) return null; const name = sp.title.replace(/®/g, "").replace(/\s+/g, " ").trim(); if (name.length < 5) return null; const fullText = `${name} ${sp.handle}`; const speedInfo = detectSpeed(fullText, cat.speedGbps); const ff = detectFormFactor(fullText, cat.formFactor); const reach = detectReach(fullText); const partNumber = extractPartNumber(name); // Image URL — first non-placeholder image const rawImg = sp.images?.[0]?.src; const imageUrl = rawImg && !rawImg.includes("no-image") ? rawImg : undefined; return { partNumber, name, price, currency: "GBP", stockLevel: "in_stock", url: `${BASE_URL}/products/${sp.handle}`, formFactor: ff, speed: speedInfo.speed, speedGbps: speedInfo.speedGbps, reachLabel: reach?.label, reachMeters: reach?.meters, fiberType: detectFiber(fullText), wavelength: detectWavelength(fullText), imageUrl, }; } async function fetchProductsJson(url: string): Promise { const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20000) }); if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); const data = (await resp.json()) as { products: ShopifyProduct[] }; return data.products ?? []; } export async function scrapeAtgbics(): Promise { console.log("=== ATGBICS Scraper Starting (products.json API) ===\n"); const vendorId = await ensureVendor( "ATGBICS", "compatible", "https://atgbics.com", "https://atgbics.com/collections/compatible-transceivers-sfpp-10g", ); let totalProducts = 0; let priceUpdates = 0; let imageUpdates = 0; const seenHandles = new Set(); for (const cat of CATEGORIES) { console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.handle}] ---`); let catTotal = 0; for (let page = 1; page <= MAX_PAGES_PER_CAT; page++) { const pageUrl = `${BASE_URL}/collections/${cat.handle}/products.json?limit=${LIMIT}&page=${page}`; let shopifyProducts: ShopifyProduct[]; try { shopifyProducts = await fetchProductsJson(pageUrl); } catch (err) { console.warn(` Page ${page} error: ${(err as Error).message.slice(0, 80)}`); break; } if (shopifyProducts.length === 0) { if (page === 1) console.log(` Empty collection — skipping`); else console.log(` Page ${page}: 0 products — done`); break; } console.log(` Page ${page}: ${shopifyProducts.length} products`); for (const sp of shopifyProducts) { // Skip cross-category duplicates if (seenHandles.has(sp.handle)) continue; seenHandles.add(sp.handle); const product = parseShopifyProduct(sp, cat); if (!product) continue; try { const txId = await findOrCreateScrapedTransceiver({ partNumber: product.partNumber, vendorId, productUrl: product.url, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, reachMeters: product.reachMeters, reachLabel: product.reachLabel, fiberType: product.fiberType, wavelengths: product.wavelength, category: "Compatible", }); const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const updated = await upsertPriceObservation({ transceiverId: txId, sourceVendorId: vendorId, price: product.price, currency: product.currency, stockLevel: product.stockLevel, url: product.url, contentHash: hash, }); if (updated) priceUpdates++; if (product.imageUrl) { const updatedImage = await markImageVerified(txId, product.imageUrl); if (updatedImage) imageUpdates++; } totalProducts++; catTotal++; } catch (err) { console.warn(` DB error ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`); } } // If we got fewer products than the limit, we're on the last page if (shopifyProducts.length < LIMIT) break; await sleep(1000); } console.log(` Category total: ${catTotal} new products saved`); await sleep(1000); } console.log(`\n=== ATGBICS Complete: ${totalProducts} products, ${priceUpdates} price updates, ${imageUpdates} images ===`); } if (require.main === module) { scrapeAtgbics() .then(() => pool.end()) .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }