/** * Optcore.net Scraper — Most transparent pricing in the industry. * Prices start at $5.50, fully public, no bot protection. * * Strategy: WP REST API to enumerate transceiver product URLs, * then PlaywrightCrawler to render each page and extract price. * * Optcore uses Flatsome WooCommerce with Cloudflare Rocket Loader * (JS lazy-loading) — static HTML has no product data. */ import { PlaywrightCrawler } from "crawlee"; import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db"; import { contentHash, parsePrice, parseStockLevel } from "../utils/hash"; const BASE_URL = "https://www.optcore.net"; // Transceiver category IDs from /wp-json/wp/v2/product_cat // Filtered to optical transceiver categories with products const TRANSCEIVER_CATEGORY_IDS = [ 309, // 10G SFP+ 173, // 1G SFP 76, // 100G QSFP28 79, // 25G SFP28 73, // 40G QSFP+ 311, // 10G BiDi SFP+ 313, // 10G CWDM SFP+ 312, // 10G DWDM SFP+ 333, // 10G XFP 1088, // 10GBase-T SFP+ 59, // 8G/10G/16G SFP+ 1102, // BiDi SFP 4097, // 400G QSFP-DD 77, // 100G CFP/CFP2/CFP4 4101, // 200G QSFP56 4092, // 50G SFP56 6441, // 800G OSFP ]; interface OptcoreProduct { partNumber: string; name: string; price: number; currency: string; stockLevel: string; url: string; formFactor?: string; speedGbps?: number; speed?: string; reachLabel?: string; } function detectFormFactor(text: string): string | undefined { const lower = text.toLowerCase(); if (lower.includes("osfp") && !lower.includes("qsfp")) return "OSFP"; if (lower.includes("qsfp-dd")) return "QSFP-DD"; if (lower.includes("qsfp56")) return "QSFP56"; if (lower.includes("qsfp28")) return "QSFP28"; if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return "QSFP+"; if (lower.includes("sfp28")) return "SFP28"; if (lower.includes("sfp56")) return "SFP56"; if (lower.includes("sfp+") || lower.includes("sfp plus")) return "SFP+"; if (lower.includes("cfp4")) return "CFP4"; if (lower.includes("cfp2")) return "CFP2"; if (lower.includes("cfp")) return "CFP"; if (lower.includes("xfp")) return "XFP"; if (lower.includes("sfp") && !lower.includes("qsfp")) return "SFP"; return undefined; } function detectSpeed(text: string): { speed: string; speedGbps: number } | undefined { const patterns: [RegExp, string, number][] = [ [/800\s*g/i, "800G", 800], [/400\s*g/i, "400G", 400], [/200\s*g/i, "200G", 200], [/100\s*g/i, "100G", 100], [/50\s*g/i, "50G", 50], [/40\s*g/i, "40G", 40], [/25\s*g/i, "25G", 25], [/16\s*g/i, "16G", 16], [/10\s*g/i, "10G", 10], [/1000\s*base/i, "1G", 1], [/1\s*g\b/i, "1G", 1], ]; for (const [re, speed, gbps] of patterns) { if (re.test(text)) return { speed, speedGbps: gbps }; } return undefined; } function detectReach(text: string): string | undefined { const match = text.match(/(\d+)\s*(m|km)\b/i); if (match) return `${match[1]}${match[2].toLowerCase()}`; return undefined; } /** * Fetch product URLs for transceiver categories via WP REST API. * Returns up to 2000 product URLs with title + slug for metadata. */ async function fetchTransceiverUrls(): Promise> { const results: Array<{ url: string; title: string; partNumber: string }> = []; const seen = new Set(); for (const catId of TRANSCEIVER_CATEGORY_IDS) { let page = 1; let hasMore = true; while (hasMore) { const apiUrl = `${BASE_URL}/wp-json/wp/v2/product?product_cat=${catId}&per_page=100&page=${page}&_fields=slug,link,title`; try { const resp = await fetch(apiUrl, { headers: { "User-Agent": "Mozilla/5.0 (compatible; TIP-Scraper/1.0)" }, signal: AbortSignal.timeout(15000), }); if (!resp.ok) break; const totalPages = parseInt(resp.headers.get("X-WP-TotalPages") || "1"); const products: Array<{ slug: string; link: string; title: { rendered: string } }> = await resp.json(); for (const p of products) { if (!seen.has(p.slug)) { seen.add(p.slug); results.push({ url: p.link, title: p.title.rendered, partNumber: p.slug, }); } } hasMore = page < totalPages; page++; // Rate limit: 10 req/sec max await new Promise((r) => setTimeout(r, 100)); } catch { hasMore = false; } } } return results; } export async function scrapeOptcore(): Promise { console.log("=== Optcore.net Scraper Starting ===\n"); const vendorId = await ensureVendor( "Optcore", "compatible", "https://www.optcore.net", "https://www.optcore.net/product-category/optical-transceiver/" ); console.log(`Vendor ID: ${vendorId}`); // Step 1: Enumerate transceiver product URLs via WP REST API console.log("Fetching product URLs via WP REST API..."); const productMeta = await fetchTransceiverUrls(); console.log(`Found ${productMeta.length} transceiver product URLs`); // Build a map for quick metadata lookup const metaByUrl = new Map(productMeta.map((p) => [p.url, p])); const products: OptcoreProduct[] = []; let pagesScraped = 0; // Step 2: Render each product page with Playwright to extract price const crawler = new PlaywrightCrawler({ maxConcurrency: 3, maxRequestsPerMinute: 30, requestHandlerTimeoutSecs: 30, headless: true, launchContext: { launchOptions: { args: ["--disable-blink-features=AutomationControlled", "--no-sandbox"], }, }, async requestHandler({ page, request, log }) { const url = request.url; log.info(`Scraping: ${url}`); // Wait for WooCommerce price element to appear try { await page.waitForSelector(".woocommerce-Price-amount, .price .amount, [class*=\"price\"]", { timeout: 8000, }); } catch { // Price element not found — might be out of stock or JS failed log.warning(`No price element found: ${url}`); pagesScraped++; return; } const data = await page.evaluate(() => { // Product title const title = document.querySelector("h1.product_title, h1.entry-title, h1")?.textContent?.trim() || ""; // Price — WooCommerce renders: $5.50 const priceEl = document.querySelector( ".price ins .woocommerce-Price-amount, .price .woocommerce-Price-amount, .woocommerce-Price-amount" ); const priceText = priceEl?.textContent?.trim() || ""; // Stock const stockEl = document.querySelector(".stock, .availability, [class*=\"stock\"]"); const stockText = stockEl?.textContent?.trim() || ""; return { title, priceText, stockText }; }); const meta = metaByUrl.get(url); const name = data.title || meta?.title || url.split("/").filter(Boolean).pop() || ""; const partNumber = meta?.partNumber || url.split("/").filter(Boolean).pop() || ""; const { price, currency } = parsePrice(data.priceText); if (price > 0) { const speedInfo = detectSpeed(name); products.push({ partNumber, name, price, currency, stockLevel: data.stockText ? parseStockLevel(data.stockText) : "in_stock", url, formFactor: detectFormFactor(name), speedGbps: speedInfo?.speedGbps, speed: speedInfo?.speed, reachLabel: detectReach(name), }); } pagesScraped++; }, }); const urls = productMeta.map((p) => p.url); await crawler.run(urls); console.log(`\nPages scraped: ${pagesScraped}`); console.log(`Products with price: ${products.length}`); // Deduplicate const unique = new Map(); for (const p of products) { if (!unique.has(p.partNumber)) unique.set(p.partNumber, p); } // Write to DB let written = 0; let skipped = 0; for (const p of unique.values()) { try { const transceiverId = await findOrCreateScrapedTransceiver({ partNumber: p.partNumber, vendorId, formFactor: p.formFactor, speedGbps: p.speedGbps, speed: p.speed, reachLabel: p.reachLabel, category: "DataCenter", }); const hash = contentHash({ price: p.price, stock: p.stockLevel }); const isNew = await upsertPriceObservation({ transceiverId, sourceVendorId: vendorId, price: p.price, currency: p.currency, stockLevel: p.stockLevel, url: p.url, contentHash: hash, }); if (isNew) written++; else skipped++; } catch (err) { console.error(` Error: ${p.partNumber}:`, (err as Error).message); } } console.log(`\nDatabase: ${written} new, ${skipped} unchanged (${unique.size} unique)`); console.log("=== Optcore.net Scraper Complete ===\n"); } if (require.main === module) { scrapeOptcore() .then(() => pool.end()) .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }