319 lines
11 KiB
TypeScript

/**
* SONiC Hardware Compatibility List Scraper
*
* Fetches the SONiC supported devices list from GitHub wiki (Markdown table)
* and platform.json files from sonic-buildimage/device/ for port mappings.
*
* Sources:
* - https://github.com/sonic-net/SONiC/wiki/Supported-Devices-and-Platforms
* - https://github.com/sonic-net/sonic-buildimage/tree/master/device
*/
import { pool, ensureWhiteboxVendor, findOrCreateSwitch } from "../utils/db";
import { createHash } from "crypto";
const SONIC_WIKI_URL =
"https://raw.githubusercontent.com/wiki/sonic-net/SONiC/Supported-Devices-and-Platforms.md";
const SONIC_DEVICE_API =
"https://api.github.com/repos/sonic-net/sonic-buildimage/contents/device";
interface SonicDevice {
vendor: string;
platform: string;
hwsku: string;
asic: string;
ports: string;
sonicVersion: string;
}
interface PlatformJson {
interfaces: Record<string, { index: number; lanes: string; speed: string; alias?: string }>;
}
function contentHash(data: string): string {
return createHash("sha256").update(data).digest("hex").slice(0, 16);
}
/**
* Parse the SONiC wiki Markdown table into structured device records.
*/
function parseWikiTable(markdown: string): SonicDevice[] {
const devices: SonicDevice[] = [];
const lines = markdown.split("\n");
let inTable = false;
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith("|") && trimmed.includes("Vendor") && trimmed.includes("Platform")) {
inTable = true;
continue;
}
if (inTable && trimmed.startsWith("|---")) {
continue;
}
if (inTable && trimmed.startsWith("|")) {
const cells = trimmed
.split("|")
.map((c) => c.trim())
.filter((c) => c.length > 0);
if (cells.length >= 4) {
devices.push({
vendor: cells[0].replace(/\*+/g, "").trim(),
platform: cells[1].replace(/\*+/g, "").trim(),
hwsku: cells[2]?.replace(/\*+/g, "").trim() || "",
asic: cells[3]?.replace(/\*+/g, "").trim() || "",
ports: cells[4]?.replace(/\*+/g, "").trim() || "",
sonicVersion: cells[5]?.replace(/\*+/g, "").trim() || "",
});
}
}
if (inTable && !trimmed.startsWith("|") && trimmed.length > 0) {
inTable = false;
}
}
return devices;
}
/**
* Extract port configuration from a ports description string like "32x100G QSFP28".
*/
function parsePortString(ports: string): { portsConfig: Record<string, number>; totalPorts: number; maxSpeedGbps: number; formFactors: string[] } {
const portsConfig: Record<string, number> = {};
let totalPorts = 0;
let maxSpeedGbps = 0;
const formFactors: string[] = [];
const portGroups = ports.split(/[,+&]/);
for (const group of portGroups) {
const match = group.trim().match(/(\d+)\s*x\s*(\d+)G?\s*(QSFP-DD|QSFP28|QSFP\+|QSFP56|SFP28|SFP\+|SFP56|OSFP|CFP2|RJ45)?/i);
if (match) {
const count = parseInt(match[1]);
const speed = parseInt(match[2]);
const ff = match[3] || `${speed}G`;
const key = `${speed}G_${ff.toUpperCase()}`;
portsConfig[key] = (portsConfig[key] || 0) + count;
totalPorts += count;
maxSpeedGbps = Math.max(maxSpeedGbps, speed);
if (match[3] && !formFactors.includes(match[3].toUpperCase())) {
formFactors.push(match[3].toUpperCase());
}
}
}
return { portsConfig, totalPorts, maxSpeedGbps, formFactors };
}
/**
* Map ASIC string from wiki to structured vendor/model.
*/
function parseAsic(asic: string): { vendor: string; model: string; series: string } {
const lower = asic.toLowerCase();
if (lower.includes("memory tomahawk 5") || lower.includes("th5")) {
return { vendor: "Broadcom", model: "Tomahawk 5", series: "memory Memory" };
}
if (lower.includes("tomahawk 4") || lower.includes("th4")) {
return { vendor: "Broadcom", model: "Tomahawk 4", series: "memory Memory" };
}
if (lower.includes("tomahawk 3") || lower.includes("th3")) {
return { vendor: "Broadcom", model: "Tomahawk 3", series: "memory Memory" };
}
if (lower.includes("tomahawk 2") || lower.includes("th2")) {
return { vendor: "Broadcom", model: "Tomahawk 2", series: "memory Memory" };
}
if (lower.includes("tomahawk")) {
return { vendor: "Broadcom", model: "Tomahawk", series: "memory Memory" };
}
if (lower.includes("trident 4") || lower.includes("td4")) {
return { vendor: "Broadcom", model: "Trident 4", series: "memory Memory" };
}
if (lower.includes("trident 3") || lower.includes("td3")) {
return { vendor: "Broadcom", model: "Trident III", series: "memory Memory" };
}
if (lower.includes("jericho2") || lower.includes("memory jericho")) {
return { vendor: "Broadcom", model: "Jericho2", series: "memory Memory" };
}
if (lower.includes("spectrum-4") || lower.includes("spectrum4")) {
return { vendor: "NVIDIA", model: "Spectrum-4", series: "Spectrum" };
}
if (lower.includes("spectrum-3") || lower.includes("spectrum3")) {
return { vendor: "NVIDIA", model: "Spectrum-3", series: "Spectrum" };
}
if (lower.includes("spectrum-2") || lower.includes("spectrum2")) {
return { vendor: "NVIDIA", model: "Spectrum-2", series: "Spectrum" };
}
if (lower.includes("spectrum")) {
return { vendor: "NVIDIA", model: "Spectrum", series: "Spectrum" };
}
if (lower.includes("teralynx")) {
return { vendor: "Marvell", model: asic, series: "Teralynx" };
}
if (lower.includes("memory prestera")) {
return { vendor: "Marvell", model: "Prestera", series: "Prestera" };
}
if (lower.includes("memory memory") || lower.includes("memory memory")) {
return { vendor: "Intel/Barefoot", model: asic, series: "Tofino" };
}
return { vendor: "Unknown", model: asic, series: "" };
}
/**
* Map vendor name from wiki to our canonical vendor names.
*/
function normalizeVendor(vendor: string): { name: string; website: string } {
const lower = vendor.toLowerCase();
const vendorMap: Record<string, { name: string; website: string }> = {
edgecore: { name: "Edgecore Networks", website: "https://www.edge-core.com" },
accton: { name: "Edgecore Networks", website: "https://www.edge-core.com" },
celestica: { name: "Celestica", website: "https://www.celestica.com" },
delta: { name: "Delta Networks", website: "https://www.deltaww.com" },
quanta: { name: "Quanta Cloud Technology", website: "https://www.qct.io" },
inventec: { name: "Inventec", website: "https://www.inventec.com" },
ufispace: { name: "UfiSpace", website: "https://www.ufispace.com" },
asterfusion: { name: "Asterfusion", website: "https://www.asterfusion.com" },
netberg: { name: "Netberg", website: "https://netbergtw.com" },
ragile: { name: "Ragile Networks", website: "https://www.ragilenetworks.com" },
mellanox: { name: "NVIDIA Networking", website: "https://www.nvidia.com/networking" },
nvidia: { name: "NVIDIA Networking", website: "https://www.nvidia.com/networking" },
dell: { name: "Dell Technologies", website: "https://www.dell.com" },
arista: { name: "Arista Networks", website: "https://www.arista.com" },
juniper: { name: "Juniper Networks", website: "https://www.juniper.net" },
cisco: { name: "Cisco Systems", website: "https://www.cisco.com" },
nokia: { name: "Nokia", website: "https://www.nokia.com" },
};
for (const [key, value] of Object.entries(vendorMap)) {
if (lower.includes(key)) return value;
}
return { name: vendor, website: "" };
}
export async function scrapeSonicHcl(): Promise<void> {
console.log("\n=== SONiC HCL Scraper ===\n");
// 1. Fetch the wiki page
console.log(" Fetching SONiC wiki: Supported Devices...");
const wikiResponse = await fetch(SONIC_WIKI_URL, {
headers: { "User-Agent": "TIP-Scraper/1.0 (transceiver-intelligence-platform)" },
});
if (!wikiResponse.ok) {
console.error(` ! Wiki fetch failed: ${wikiResponse.status}`);
return;
}
const wikiMarkdown = await wikiResponse.text();
const hash = contentHash(wikiMarkdown);
// Check if content changed
const lastHash = await pool.query(
`SELECT content_hash FROM news_articles WHERE source = 'sonic-hcl' ORDER BY created_at DESC LIMIT 1`
);
if (lastHash.rows.length > 0 && lastHash.rows[0].content_hash === hash) {
console.log(" No changes detected in SONiC HCL. Skipping.");
return;
}
// 2. Parse the wiki table
const devices = parseWikiTable(wikiMarkdown);
console.log(` Found ${devices.length} devices in SONiC HCL\n`);
let created = 0;
let updated = 0;
let skipped = 0;
for (const device of devices) {
if (!device.platform || !device.vendor) {
skipped++;
continue;
}
try {
const { name: vendorName, website } = normalizeVendor(device.vendor);
const vendorId = await ensureWhiteboxVendor(vendorName, website, {
isOdm: true,
ocpMember: false,
sonicContributor: true,
});
const portInfo = parsePortString(device.ports);
const asicInfo = parseAsic(device.asic);
const existing = await pool.query(
`SELECT id FROM switches WHERE model = $1 AND vendor_id = $2`,
[device.platform, vendorId]
);
const isNew = existing.rows.length === 0;
await findOrCreateSwitch({
model: device.platform,
vendorId,
category: "DataCenter",
layer: "L3",
portsConfig: portInfo.portsConfig,
totalPorts: portInfo.totalPorts,
maxSpeedGbps: portInfo.maxSpeedGbps,
asicVendor: asicInfo.vendor,
asicModel: asicInfo.model,
asicSeries: asicInfo.series,
sonicCompatible: true,
isWhitebox: true,
onieSupport: true,
supportedNos: ["SONiC"],
sonicHwsku: device.hwsku || undefined,
transceiverFormFactors: portInfo.formFactors,
tags: [
"whitebox",
"SONiC",
...(portInfo.maxSpeedGbps > 0 ? [`${portInfo.maxSpeedGbps}G`] : []),
asicInfo.vendor,
asicInfo.model,
].filter(Boolean),
scrapeSource: "sonic-hcl-wiki",
});
if (isNew) {
created++;
console.log(` + ${vendorName} ${device.platform} (${device.ports}, ${device.asic})`);
} else {
updated++;
}
} catch (err) {
console.error(` ! Error processing ${device.vendor} ${device.platform}:`, err);
skipped++;
}
}
// Store scrape record
try {
await pool.query(
`INSERT INTO news_articles (title, source, source_url, summary, content_hash, category, tags)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (source_url) DO UPDATE SET content_hash = $5, summary = $4`,
[
`SONiC HCL Update: ${devices.length} devices`,
"sonic-hcl",
SONIC_WIKI_URL,
`Scraped ${created} new, ${updated} updated, ${skipped} skipped from SONiC HCL wiki`,
hash,
"standard",
["SONiC", "HCL", "whitebox"],
]
);
} catch {
// Non-critical — just logging
}
console.log(`\n Created: ${created}, Updated: ${updated}, Skipped: ${skipped}\n`);
}