/** * SONiC Hardware Compatibility List Scraper * * Fetches the SONiC supported devices list from GitHub wiki (Markdown table) * and platform.json files from sonic-buildimage/device/ for port mappings. * * Sources: * - https://github.com/sonic-net/SONiC/wiki/Supported-Devices-and-Platforms * - https://github.com/sonic-net/sonic-buildimage/tree/master/device */ import { pool, ensureWhiteboxVendor, findOrCreateSwitch } from "../utils/db"; import { createHash } from "crypto"; const SONIC_WIKI_URL = "https://raw.githubusercontent.com/wiki/sonic-net/SONiC/Supported-Devices-and-Platforms.md"; const SONIC_DEVICE_API = "https://api.github.com/repos/sonic-net/sonic-buildimage/contents/device"; interface SonicDevice { vendor: string; platform: string; hwsku: string; asic: string; ports: string; sonicVersion: string; } interface PlatformJson { interfaces: Record; } function contentHash(data: string): string { return createHash("sha256").update(data).digest("hex").slice(0, 16); } /** * Parse the SONiC wiki Markdown table into structured device records. */ function parseWikiTable(markdown: string): SonicDevice[] { const devices: SonicDevice[] = []; const lines = markdown.split("\n"); let inTable = false; for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith("|") && trimmed.includes("Vendor") && trimmed.includes("Platform")) { inTable = true; continue; } if (inTable && trimmed.startsWith("|---")) { continue; } if (inTable && trimmed.startsWith("|")) { const cells = trimmed .split("|") .map((c) => c.trim()) .filter((c) => c.length > 0); if (cells.length >= 4) { devices.push({ vendor: cells[0].replace(/\*+/g, "").trim(), platform: cells[1].replace(/\*+/g, "").trim(), hwsku: cells[2]?.replace(/\*+/g, "").trim() || "", asic: cells[3]?.replace(/\*+/g, "").trim() || "", ports: cells[4]?.replace(/\*+/g, "").trim() || "", sonicVersion: cells[5]?.replace(/\*+/g, "").trim() || "", }); } } if (inTable && !trimmed.startsWith("|") && trimmed.length > 0) { inTable = false; } } return devices; } /** * Extract port configuration from a ports description string like "32x100G QSFP28". */ function parsePortString(ports: string): { portsConfig: Record; totalPorts: number; maxSpeedGbps: number; formFactors: string[] } { const portsConfig: Record = {}; let totalPorts = 0; let maxSpeedGbps = 0; const formFactors: string[] = []; const portGroups = ports.split(/[,+&]/); for (const group of portGroups) { const match = group.trim().match(/(\d+)\s*x\s*(\d+)G?\s*(QSFP-DD|QSFP28|QSFP\+|QSFP56|SFP28|SFP\+|SFP56|OSFP|CFP2|RJ45)?/i); if (match) { const count = parseInt(match[1]); const speed = parseInt(match[2]); const ff = match[3] || `${speed}G`; const key = `${speed}G_${ff.toUpperCase()}`; portsConfig[key] = (portsConfig[key] || 0) + count; totalPorts += count; maxSpeedGbps = Math.max(maxSpeedGbps, speed); if (match[3] && !formFactors.includes(match[3].toUpperCase())) { formFactors.push(match[3].toUpperCase()); } } } return { portsConfig, totalPorts, maxSpeedGbps, formFactors }; } /** * Map ASIC string from wiki to structured vendor/model. */ function parseAsic(asic: string): { vendor: string; model: string; series: string } { const lower = asic.toLowerCase(); if (lower.includes("memory tomahawk 5") || lower.includes("th5")) { return { vendor: "Broadcom", model: "Tomahawk 5", series: "memory Memory" }; } if (lower.includes("tomahawk 4") || lower.includes("th4")) { return { vendor: "Broadcom", model: "Tomahawk 4", series: "memory Memory" }; } if (lower.includes("tomahawk 3") || lower.includes("th3")) { return { vendor: "Broadcom", model: "Tomahawk 3", series: "memory Memory" }; } if (lower.includes("tomahawk 2") || lower.includes("th2")) { return { vendor: "Broadcom", model: "Tomahawk 2", series: "memory Memory" }; } if (lower.includes("tomahawk")) { return { vendor: "Broadcom", model: "Tomahawk", series: "memory Memory" }; } if (lower.includes("trident 4") || lower.includes("td4")) { return { vendor: "Broadcom", model: "Trident 4", series: "memory Memory" }; } if (lower.includes("trident 3") || lower.includes("td3")) { return { vendor: "Broadcom", model: "Trident III", series: "memory Memory" }; } if (lower.includes("jericho2") || lower.includes("memory jericho")) { return { vendor: "Broadcom", model: "Jericho2", series: "memory Memory" }; } if (lower.includes("spectrum-4") || lower.includes("spectrum4")) { return { vendor: "NVIDIA", model: "Spectrum-4", series: "Spectrum" }; } if (lower.includes("spectrum-3") || lower.includes("spectrum3")) { return { vendor: "NVIDIA", model: "Spectrum-3", series: "Spectrum" }; } if (lower.includes("spectrum-2") || lower.includes("spectrum2")) { return { vendor: "NVIDIA", model: "Spectrum-2", series: "Spectrum" }; } if (lower.includes("spectrum")) { return { vendor: "NVIDIA", model: "Spectrum", series: "Spectrum" }; } if (lower.includes("teralynx")) { return { vendor: "Marvell", model: asic, series: "Teralynx" }; } if (lower.includes("memory prestera")) { return { vendor: "Marvell", model: "Prestera", series: "Prestera" }; } if (lower.includes("memory memory") || lower.includes("memory memory")) { return { vendor: "Intel/Barefoot", model: asic, series: "Tofino" }; } return { vendor: "Unknown", model: asic, series: "" }; } /** * Map vendor name from wiki to our canonical vendor names. */ function normalizeVendor(vendor: string): { name: string; website: string } { const lower = vendor.toLowerCase(); const vendorMap: Record = { edgecore: { name: "Edgecore Networks", website: "https://www.edge-core.com" }, accton: { name: "Edgecore Networks", website: "https://www.edge-core.com" }, celestica: { name: "Celestica", website: "https://www.celestica.com" }, delta: { name: "Delta Networks", website: "https://www.deltaww.com" }, quanta: { name: "Quanta Cloud Technology", website: "https://www.qct.io" }, inventec: { name: "Inventec", website: "https://www.inventec.com" }, ufispace: { name: "UfiSpace", website: "https://www.ufispace.com" }, asterfusion: { name: "Asterfusion", website: "https://www.asterfusion.com" }, netberg: { name: "Netberg", website: "https://netbergtw.com" }, ragile: { name: "Ragile Networks", website: "https://www.ragilenetworks.com" }, mellanox: { name: "NVIDIA Networking", website: "https://www.nvidia.com/networking" }, nvidia: { name: "NVIDIA Networking", website: "https://www.nvidia.com/networking" }, dell: { name: "Dell Technologies", website: "https://www.dell.com" }, arista: { name: "Arista Networks", website: "https://www.arista.com" }, juniper: { name: "Juniper Networks", website: "https://www.juniper.net" }, cisco: { name: "Cisco Systems", website: "https://www.cisco.com" }, nokia: { name: "Nokia", website: "https://www.nokia.com" }, }; for (const [key, value] of Object.entries(vendorMap)) { if (lower.includes(key)) return value; } return { name: vendor, website: "" }; } export async function scrapeSonicHcl(): Promise { console.log("\n=== SONiC HCL Scraper ===\n"); // 1. Fetch the wiki page console.log(" Fetching SONiC wiki: Supported Devices..."); const wikiResponse = await fetch(SONIC_WIKI_URL, { headers: { "User-Agent": "TIP-Scraper/1.0 (transceiver-intelligence-platform)" }, }); if (!wikiResponse.ok) { console.error(` ! Wiki fetch failed: ${wikiResponse.status}`); return; } const wikiMarkdown = await wikiResponse.text(); const hash = contentHash(wikiMarkdown); // Check if content changed const lastHash = await pool.query( `SELECT content_hash FROM news_articles WHERE source = 'sonic-hcl' ORDER BY created_at DESC LIMIT 1` ); if (lastHash.rows.length > 0 && lastHash.rows[0].content_hash === hash) { console.log(" No changes detected in SONiC HCL. Skipping."); return; } // 2. Parse the wiki table const devices = parseWikiTable(wikiMarkdown); console.log(` Found ${devices.length} devices in SONiC HCL\n`); let created = 0; let updated = 0; let skipped = 0; for (const device of devices) { if (!device.platform || !device.vendor) { skipped++; continue; } try { const { name: vendorName, website } = normalizeVendor(device.vendor); const vendorId = await ensureWhiteboxVendor(vendorName, website, { isOdm: true, ocpMember: false, sonicContributor: true, }); const portInfo = parsePortString(device.ports); const asicInfo = parseAsic(device.asic); const existing = await pool.query( `SELECT id FROM switches WHERE model = $1 AND vendor_id = $2`, [device.platform, vendorId] ); const isNew = existing.rows.length === 0; await findOrCreateSwitch({ model: device.platform, vendorId, category: "DataCenter", layer: "L3", portsConfig: portInfo.portsConfig, totalPorts: portInfo.totalPorts, maxSpeedGbps: portInfo.maxSpeedGbps, asicVendor: asicInfo.vendor, asicModel: asicInfo.model, asicSeries: asicInfo.series, sonicCompatible: true, isWhitebox: true, onieSupport: true, supportedNos: ["SONiC"], sonicHwsku: device.hwsku || undefined, transceiverFormFactors: portInfo.formFactors, tags: [ "whitebox", "SONiC", ...(portInfo.maxSpeedGbps > 0 ? [`${portInfo.maxSpeedGbps}G`] : []), asicInfo.vendor, asicInfo.model, ].filter(Boolean), scrapeSource: "sonic-hcl-wiki", }); if (isNew) { created++; console.log(` + ${vendorName} ${device.platform} (${device.ports}, ${device.asic})`); } else { updated++; } } catch (err) { console.error(` ! Error processing ${device.vendor} ${device.platform}:`, err); skipped++; } } // Store scrape record try { await pool.query( `INSERT INTO news_articles (title, source, source_url, summary, content_hash, category, tags) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (source_url) DO UPDATE SET content_hash = $5, summary = $4`, [ `SONiC HCL Update: ${devices.length} devices`, "sonic-hcl", SONIC_WIKI_URL, `Scraped ${created} new, ${updated} updated, ${skipped} skipped from SONiC HCL wiki`, hash, "standard", ["SONiC", "HCL", "whitebox"], ] ); } catch { // Non-critical — just logging } console.log(`\n Created: ${created}, Updated: ${updated}, Skipped: ${skipped}\n`); }