201 lines
7.2 KiB
TypeScript

/**
* Edgecore Networks Product Catalog Scraper
*
* Scrapes switch product pages from edge-core.com for:
* - Product specs (ports, ASIC, power, dimensions)
* - Transceiver form factor compatibility
* - Datasheet URLs
*
* Source: https://www.edge-core.com/productsList.php?cls=1
*/
import { CheerioCrawler } from "crawlee";
import { pool, ensureWhiteboxVendor, findOrCreateSwitch } from "../utils/db";
const BASE_URL = "https://www.edge-core.com";
const PRODUCT_LIST_URL = `${BASE_URL}/productsList.php?cls=1`;
/**
* Extract port configuration from spec text.
* Handles formats like "32x 100GbE QSFP28" or "48x25G SFP28 + 8x100G QSFP28"
*/
function extractPortsFromSpec(specText: string): {
portsConfig: Record<string, number>;
totalPorts: number;
maxSpeedGbps: number;
formFactors: string[];
} {
const portsConfig: Record<string, number> = {};
let totalPorts = 0;
let maxSpeedGbps = 0;
const formFactors: string[] = [];
const portPattern = /(\d+)\s*x\s*(\d+)\s*G(?:bE|b\/s)?\s*(QSFP-DD|QSFP28|QSFP\+|QSFP56|SFP28|SFP\+|SFP56|OSFP|CFP2)?/gi;
let match: RegExpExecArray | null;
while ((match = portPattern.exec(specText)) !== null) {
const count = parseInt(match[1]);
const speed = parseInt(match[2]);
const ff = match[3]?.toUpperCase() || `${speed}G`;
const key = `${speed}G_${ff}`;
portsConfig[key] = (portsConfig[key] || 0) + count;
totalPorts += count;
maxSpeedGbps = Math.max(maxSpeedGbps, speed);
if (match[3] && !formFactors.includes(match[3].toUpperCase())) {
formFactors.push(match[3].toUpperCase());
}
}
return { portsConfig, totalPorts, maxSpeedGbps, formFactors };
}
/**
* Detect ASIC from product page text.
*/
function detectAsic(text: string): { vendor: string; model: string; series: string } {
const asicPatterns: Array<{ pattern: RegExp; vendor: string; model: string; series: string }> = [
{ pattern: /tomahawk\s*5/i, vendor: "Broadcom", model: "Tomahawk 5", series: "StrataDNX" },
{ pattern: /tomahawk\s*4/i, vendor: "Broadcom", model: "Tomahawk 4", series: "StrataDNX" },
{ pattern: /tomahawk\s*3/i, vendor: "Broadcom", model: "Tomahawk 3", series: "StrataDNX" },
{ pattern: /tomahawk\s*2/i, vendor: "Broadcom", model: "Tomahawk 2", series: "StrataDNX" },
{ pattern: /tomahawk\+/i, vendor: "Broadcom", model: "Tomahawk+", series: "StrataDNX" },
{ pattern: /tomahawk/i, vendor: "Broadcom", model: "Tomahawk", series: "StrataDNX" },
{ pattern: /trident\s*(4|iv)/i, vendor: "Broadcom", model: "Trident 4", series: "StrataDNX" },
{ pattern: /trident\s*(3|iii)/i, vendor: "Broadcom", model: "Trident III", series: "StrataDNX" },
{ pattern: /jericho\s*2/i, vendor: "Broadcom", model: "Jericho2", series: "StrataDNX" },
{ pattern: /spectrum-?4/i, vendor: "NVIDIA", model: "Spectrum-4", series: "Spectrum" },
{ pattern: /teralynx/i, vendor: "Marvell", model: "Teralynx", series: "Teralynx" },
{ pattern: /prestera/i, vendor: "Marvell", model: "Prestera", series: "Prestera" },
];
for (const { pattern, vendor, model, series } of asicPatterns) {
if (pattern.test(text)) {
return { vendor, model, series };
}
}
return { vendor: "Broadcom", model: "Unknown", series: "" };
}
export async function scrapeEdgecore(): Promise<void> {
console.log("\n=== Edgecore Networks Scraper ===\n");
const vendorId = await ensureWhiteboxVendor("Edgecore Networks", "https://www.edge-core.com", {
isOdm: true,
ocpMember: true,
sonicContributor: true,
});
let created = 0;
let updated = 0;
const crawler = new CheerioCrawler({
maxConcurrency: 2,
maxRequestsPerMinute: 20,
requestHandlerTimeoutSecs: 30,
async requestHandler({ request, $, enqueueLinks }) {
// Product list page — enqueue individual product pages
if (request.url.includes("productsList")) {
console.log(" Parsing product list page...");
const productLinks: string[] = [];
$("a[href*='product']").each((_i, el) => {
const href = $(el).attr("href");
if (href && (href.includes("productsInfo") || href.includes("product/"))) {
const fullUrl = href.startsWith("http") ? href : `${BASE_URL}/${href}`;
if (!productLinks.includes(fullUrl)) {
productLinks.push(fullUrl);
}
}
});
console.log(` Found ${productLinks.length} product links`);
for (const link of productLinks) {
await enqueueLinks({ urls: [link] });
}
return;
}
// Individual product page
const pageText = $("body").text();
const title = $("h1, .product-title, .prod-name").first().text().trim();
if (!title) return;
// Extract model name
const modelMatch = title.match(/(AS\d{4}[A-Z0-9-]*|DCS\d{3}[A-Z0-9-]*|Minipack\d*|Wedge\d*)/i);
if (!modelMatch) return;
const model = modelMatch[1];
const portInfo = extractPortsFromSpec(pageText);
const asicInfo = detectAsic(pageText);
if (portInfo.totalPorts === 0) return;
// Extract additional specs
const powerMatch = pageText.match(/(?:max|maximum)\s*power[:\s]*(\d+)\s*W/i);
const cpuMatch = pageText.match(/(Intel\s+(?:Xeon|Atom|Core)[^\n,;]+)/i);
const ramMatch = pageText.match(/(\d+)\s*GB?\s*(?:DDR[34]|RAM|memory)/i);
const storageMatch = pageText.match(/(\d+)\s*GB?\s*(?:SSD|eMMC|M\.2)/i);
const switchCapMatch = pageText.match(/switching\s*capacity[:\s]*([\d.]+)\s*Tb/i);
const seriesMatch = model.match(/^(AS\d{4}|DCS\d{3}|Minipack|Wedge)/);
const series = seriesMatch ? seriesMatch[1] : "";
const existing = await pool.query(
`SELECT id FROM switches WHERE model = $1 AND vendor_id = $2`,
[model, vendorId]
);
const isNew = existing.rows.length === 0;
await findOrCreateSwitch({
model,
vendorId,
series,
category: "DataCenter",
layer: "L3",
portsConfig: portInfo.portsConfig,
totalPorts: portInfo.totalPorts,
maxSpeedGbps: portInfo.maxSpeedGbps,
switchingCapacityTbps: switchCapMatch ? parseFloat(switchCapMatch[1]) : undefined,
asicVendor: asicInfo.vendor,
asicModel: asicInfo.model,
asicSeries: asicInfo.series,
maxPowerW: powerMatch ? parseInt(powerMatch[1]) : undefined,
cpu: cpuMatch ? cpuMatch[1].trim() : undefined,
ramGb: ramMatch ? parseInt(ramMatch[1]) : undefined,
storageGb: storageMatch ? parseInt(storageMatch[1]) : undefined,
sonicCompatible: true,
isWhitebox: true,
onieSupport: true,
supportedNos: ["SONiC", "ONL"],
transceiverFormFactors: portInfo.formFactors,
catalogUrl: request.url,
tags: [
"whitebox",
"Edgecore",
`${portInfo.maxSpeedGbps}G`,
asicInfo.model,
],
scrapeSource: "edgecore-catalog",
});
if (isNew) {
created++;
console.log(` + ${model} (${portInfo.maxSpeedGbps}G, ${asicInfo.vendor} ${asicInfo.model})`);
} else {
updated++;
}
},
failedRequestHandler({ request }) {
console.error(` ! Failed: ${request.url}`);
},
});
await crawler.run([PRODUCT_LIST_URL]);
console.log(`\n Created: ${created}, Updated: ${updated}\n`);
}