/** * Juniper HCT Scraper — OEM Hardware Compatibility Tool * * apps.juniper.net/hct — Next.js SSR app with product data embedded in * self.__next_f.push() payloads. Transceivers category = 100001. * Rich data: modelNumber, partNumber, distance, speedType, formFactor, EOL status. * No prices (OEM), but excellent compatibility + spec data. */ import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db"; const BASE = "https://apps.juniper.net/hct"; const HEADERS = { "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", Accept: "text/html,application/xhtml+xml", }; const CATEGORIES = [ { id: 100001, name: "Transceivers" }, ]; interface JuniperTransceiver { modelNumber: string; partNumber: string; description: string; cableType: string; distance: string; speedType: string; formFactor: string; connectorType: string; maxDistanceKm?: number; maxDistanceLabel?: string; isModelEol: boolean; } function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } function parseSpeedGbps(speedType: string): { speed: string; speedGbps: number } { const lower = speedType.toLowerCase(); if (lower.includes("800g")) return { speed: "800G", speedGbps: 800 }; if (lower.includes("400g")) return { speed: "400G", speedGbps: 400 }; if (lower.includes("200g")) return { speed: "200G", speedGbps: 200 }; if (lower.includes("100g")) return { speed: "100G", speedGbps: 100 }; if (lower.includes("40g")) return { speed: "40G", speedGbps: 40 }; if (lower.includes("25g")) return { speed: "25G", speedGbps: 25 }; if (lower.includes("10g")) return { speed: "10G", speedGbps: 10 }; if (lower.includes("1g") || lower.includes("1000")) return { speed: "1G", speedGbps: 1 }; return { speed: speedType || "Unknown", speedGbps: 0 }; } function normalizeFormFactor(ff: string): string { const upper = ff.toUpperCase().trim(); if (upper.includes("QSFP-DD") || upper.includes("QSFPDD")) return "QSFP-DD"; if (upper.includes("QSFP28")) return "QSFP28"; if (upper.includes("QSFP+") || upper === "QSFP") return "QSFP+"; if (upper.includes("OSFP")) return "OSFP"; if (upper.includes("CFP2")) return "CFP2"; if (upper.includes("CFP4")) return "CFP4"; if (upper.includes("CFP")) return "CFP"; if (upper.includes("SFP56")) return "SFP56"; if (upper.includes("SFP28")) return "SFP28"; if (upper.includes("SFP+")) return "SFP+"; if (upper.includes("XFP")) return "XFP"; if (upper.includes("SFP")) return "SFP"; return ff || "SFP"; } function detectFiber(cableType: string, description: string): string { const text = `${cableType} ${description}`.toLowerCase(); if (/smf|single.?mode/.test(text)) return "SMF"; if (/mmf|multi.?mode/.test(text)) return "MMF"; if (/copper|dac|twinax|cat\s*[56]|rj.?45|base-t/.test(text)) return "Copper"; return ""; } function parseDistance(distance: string): { label: string; meters: number } | undefined { if (!distance) return undefined; const km = distance.match(/([\d.]+)\s*km/i); if (km) return { label: `${km[1]}km`, meters: Math.round(parseFloat(km[1]) * 1000) }; const m = distance.match(/([\d.]+)\s*m\b/i); if (m) return { label: `${m[1]}m`, meters: parseInt(m[1]) }; return undefined; } function detectWavelength(description: string): string { const match = description.match(/(\d{3,4})\s*nm/i); return match ? match[1] : ""; } /** * Extract transceiver data from Next.js SSR payload. * Data is embedded in self.__next_f.push([...]) with escaped JSON (\" not "). * Strategy: unescape the HTML, find categoryDetail array, parse each object. */ function parseNextJsData(html: string): JuniperTransceiver[] { const transceivers: JuniperTransceiver[] = []; // Unescape the escaped JSON (\" → ", \\ → \) const unescaped = html.replace(/\\"/g, '"').replace(/\\\\"/g, '\\"'); // Find categoryDetail array and extract individual objects const detailIdx = unescaped.indexOf('"categoryDetail":['); if (detailIdx === -1) { console.log(" Warning: categoryDetail not found in HTML"); return transceivers; } // Extract from categoryDetail to end of array const arrayStart = unescaped.indexOf("[", detailIdx); if (arrayStart === -1) return transceivers; // Use regex to find each transceiver object by modelNumber const modelRegex = /"modelNumber"\s*:\s*"([^"]+)"/g; const seen = new Set(); let match; while ((match = modelRegex.exec(unescaped)) !== null) { const modelNumber = match[1]; if (seen.has(modelNumber)) continue; seen.add(modelNumber); // Extract chunk around this model const idx = match.index; const objStart = unescaped.lastIndexOf("{", idx); const chunk = unescaped.slice(objStart, objStart + 2000); const getString = (field: string): string => { const re = new RegExp(`"${field}"\\s*:\\s*"([^"]*)"`, "i"); const m = chunk.match(re); return m ? m[1] : ""; }; // For array fields like cableType:["SMF"], speedType:[{speed:"100G"}], formFactor:["CFP"] const getArrayFirst = (field: string): string => { // Try ["value"] pattern const arrRe = new RegExp(`"${field}"\\s*:\\s*\\[\\s*"([^"]*)"`, "i"); const arrM = chunk.match(arrRe); if (arrM) return arrM[1]; // Try [{speed:"value"}] pattern const objRe = new RegExp(`"${field}"\\s*:\\s*\\[\\s*\\{\\s*"\\w+"\\s*:\\s*"([^"]*)"`, "i"); const objM = chunk.match(objRe); if (objM) return objM[1]; return getString(field); }; const getBool = (field: string): boolean => { const re = new RegExp(`"${field}"\\s*:\\s*(true|false)`, "i"); const m = chunk.match(re); return m ? m[1] === "true" : false; }; const getNum = (field: string): number | undefined => { const re = new RegExp(`"${field}"\\s*:\\s*(\\d+(?:\\.\\d+)?)`, "i"); const m = chunk.match(re); return m ? parseFloat(m[1]) : undefined; }; // Extract distance from array like ["40 km"] or from maxDistanceLabel const distArr = chunk.match(/"distance"\s*:\s*\[\s*"([^"]*)"/i); const distance = distArr ? distArr[1] : getString("maxDistanceLabel"); transceivers.push({ modelNumber, partNumber: getString("partNumber") || getString("oldPartNumber") || modelNumber, description: getString("description"), cableType: getArrayFirst("cableType"), distance, speedType: getArrayFirst("speedType"), formFactor: getArrayFirst("formFactor"), connectorType: getString("connectorType"), maxDistanceKm: getNum("maxDistanceKm"), maxDistanceLabel: getString("maxDistanceLabel"), isModelEol: getBool("isModelEol"), }); } return transceivers; } async function fetchPage(url: string): Promise { const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(60000) }); if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); return resp.text(); } export async function scrapeJuniperHct(): Promise { console.log("=== Juniper HCT Scraper Starting ===\n"); const vendorId = await ensureVendor("Juniper Networks", "oem", "https://www.juniper.net", "https://apps.juniper.net/hct/"); let totalProducts = 0; for (const cat of CATEGORIES) { console.log(`\n--- ${cat.name} (category ${cat.id}) ---`); try { const html = await fetchPage(`${BASE}/category/${cat.id}`); console.log(` Fetched ${(html.length / 1024).toFixed(0)}KB`); const transceivers = parseNextJsData(html); console.log(` Parsed ${transceivers.length} transceivers`); for (const tx of transceivers) { try { const speedInfo = parseSpeedGbps(tx.speedType || tx.description); const distInfo = tx.maxDistanceKm ? { label: `${tx.maxDistanceKm}km`, meters: Math.round(tx.maxDistanceKm * 1000) } : parseDistance(tx.distance); const formFactor = normalizeFormFactor(tx.formFactor); await findOrCreateScrapedTransceiver({ partNumber: tx.modelNumber, vendorId, formFactor, speedGbps: speedInfo.speedGbps, speed: speedInfo.speed, reachMeters: distInfo?.meters, reachLabel: distInfo?.label, fiberType: detectFiber(tx.cableType, tx.description), wavelengths: detectWavelength(tx.description), category: "DataCenter", }); totalProducts++; } catch (err) { console.warn(` Error [${tx.modelNumber}]: ${(err as Error).message.slice(0, 80)}`); } } } catch (err) { console.error(` Category failed: ${(err as Error).message}`); } await sleep(2000); } console.log(`\n=== Juniper HCT Complete: ${totalProducts} transceivers (no prices - OEM) ===`); } if (require.main === module) { scrapeJuniperHct() .then(() => pool.end()) .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); }