201 lines
7.2 KiB
TypeScript
201 lines
7.2 KiB
TypeScript
/**
|
|
* Edgecore Networks Product Catalog Scraper
|
|
*
|
|
* Scrapes switch product pages from edge-core.com for:
|
|
* - Product specs (ports, ASIC, power, dimensions)
|
|
* - Transceiver form factor compatibility
|
|
* - Datasheet URLs
|
|
*
|
|
* Source: https://www.edge-core.com/productsList.php?cls=1
|
|
*/
|
|
import { CheerioCrawler } from "crawlee";
|
|
import { pool, ensureWhiteboxVendor, findOrCreateSwitch } from "../utils/db";
|
|
|
|
const BASE_URL = "https://www.edge-core.com";
|
|
const PRODUCT_LIST_URL = `${BASE_URL}/productsList.php?cls=1`;
|
|
|
|
/**
|
|
* Extract port configuration from spec text.
|
|
* Handles formats like "32x 100GbE QSFP28" or "48x25G SFP28 + 8x100G QSFP28"
|
|
*/
|
|
function extractPortsFromSpec(specText: string): {
|
|
portsConfig: Record<string, number>;
|
|
totalPorts: number;
|
|
maxSpeedGbps: number;
|
|
formFactors: string[];
|
|
} {
|
|
const portsConfig: Record<string, number> = {};
|
|
let totalPorts = 0;
|
|
let maxSpeedGbps = 0;
|
|
const formFactors: string[] = [];
|
|
|
|
const portPattern = /(\d+)\s*x\s*(\d+)\s*G(?:bE|b\/s)?\s*(QSFP-DD|QSFP28|QSFP\+|QSFP56|SFP28|SFP\+|SFP56|OSFP|CFP2)?/gi;
|
|
let match: RegExpExecArray | null;
|
|
|
|
while ((match = portPattern.exec(specText)) !== null) {
|
|
const count = parseInt(match[1]);
|
|
const speed = parseInt(match[2]);
|
|
const ff = match[3]?.toUpperCase() || `${speed}G`;
|
|
const key = `${speed}G_${ff}`;
|
|
|
|
portsConfig[key] = (portsConfig[key] || 0) + count;
|
|
totalPorts += count;
|
|
maxSpeedGbps = Math.max(maxSpeedGbps, speed);
|
|
|
|
if (match[3] && !formFactors.includes(match[3].toUpperCase())) {
|
|
formFactors.push(match[3].toUpperCase());
|
|
}
|
|
}
|
|
|
|
return { portsConfig, totalPorts, maxSpeedGbps, formFactors };
|
|
}
|
|
|
|
/**
|
|
* Detect ASIC from product page text.
|
|
*/
|
|
function detectAsic(text: string): { vendor: string; model: string; series: string } {
|
|
const asicPatterns: Array<{ pattern: RegExp; vendor: string; model: string; series: string }> = [
|
|
{ pattern: /tomahawk\s*5/i, vendor: "Broadcom", model: "Tomahawk 5", series: "StrataDNX" },
|
|
{ pattern: /tomahawk\s*4/i, vendor: "Broadcom", model: "Tomahawk 4", series: "StrataDNX" },
|
|
{ pattern: /tomahawk\s*3/i, vendor: "Broadcom", model: "Tomahawk 3", series: "StrataDNX" },
|
|
{ pattern: /tomahawk\s*2/i, vendor: "Broadcom", model: "Tomahawk 2", series: "StrataDNX" },
|
|
{ pattern: /tomahawk\+/i, vendor: "Broadcom", model: "Tomahawk+", series: "StrataDNX" },
|
|
{ pattern: /tomahawk/i, vendor: "Broadcom", model: "Tomahawk", series: "StrataDNX" },
|
|
{ pattern: /trident\s*(4|iv)/i, vendor: "Broadcom", model: "Trident 4", series: "StrataDNX" },
|
|
{ pattern: /trident\s*(3|iii)/i, vendor: "Broadcom", model: "Trident III", series: "StrataDNX" },
|
|
{ pattern: /jericho\s*2/i, vendor: "Broadcom", model: "Jericho2", series: "StrataDNX" },
|
|
{ pattern: /spectrum-?4/i, vendor: "NVIDIA", model: "Spectrum-4", series: "Spectrum" },
|
|
{ pattern: /teralynx/i, vendor: "Marvell", model: "Teralynx", series: "Teralynx" },
|
|
{ pattern: /prestera/i, vendor: "Marvell", model: "Prestera", series: "Prestera" },
|
|
];
|
|
|
|
for (const { pattern, vendor, model, series } of asicPatterns) {
|
|
if (pattern.test(text)) {
|
|
return { vendor, model, series };
|
|
}
|
|
}
|
|
|
|
return { vendor: "Broadcom", model: "Unknown", series: "" };
|
|
}
|
|
|
|
export async function scrapeEdgecore(): Promise<void> {
|
|
console.log("\n=== Edgecore Networks Scraper ===\n");
|
|
|
|
const vendorId = await ensureWhiteboxVendor("Edgecore Networks", "https://www.edge-core.com", {
|
|
isOdm: true,
|
|
ocpMember: true,
|
|
sonicContributor: true,
|
|
});
|
|
|
|
let created = 0;
|
|
let updated = 0;
|
|
|
|
const crawler = new CheerioCrawler({
|
|
maxConcurrency: 2,
|
|
maxRequestsPerMinute: 20,
|
|
requestHandlerTimeoutSecs: 30,
|
|
|
|
async requestHandler({ request, $, enqueueLinks }) {
|
|
// Product list page — enqueue individual product pages
|
|
if (request.url.includes("productsList")) {
|
|
console.log(" Parsing product list page...");
|
|
|
|
const productLinks: string[] = [];
|
|
$("a[href*='product']").each((_i, el) => {
|
|
const href = $(el).attr("href");
|
|
if (href && (href.includes("productsInfo") || href.includes("product/"))) {
|
|
const fullUrl = href.startsWith("http") ? href : `${BASE_URL}/${href}`;
|
|
if (!productLinks.includes(fullUrl)) {
|
|
productLinks.push(fullUrl);
|
|
}
|
|
}
|
|
});
|
|
|
|
console.log(` Found ${productLinks.length} product links`);
|
|
for (const link of productLinks) {
|
|
await enqueueLinks({ urls: [link] });
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Individual product page
|
|
const pageText = $("body").text();
|
|
const title = $("h1, .product-title, .prod-name").first().text().trim();
|
|
|
|
if (!title) return;
|
|
|
|
// Extract model name
|
|
const modelMatch = title.match(/(AS\d{4}[A-Z0-9-]*|DCS\d{3}[A-Z0-9-]*|Minipack\d*|Wedge\d*)/i);
|
|
if (!modelMatch) return;
|
|
|
|
const model = modelMatch[1];
|
|
const portInfo = extractPortsFromSpec(pageText);
|
|
const asicInfo = detectAsic(pageText);
|
|
|
|
if (portInfo.totalPorts === 0) return;
|
|
|
|
// Extract additional specs
|
|
const powerMatch = pageText.match(/(?:max|maximum)\s*power[:\s]*(\d+)\s*W/i);
|
|
const cpuMatch = pageText.match(/(Intel\s+(?:Xeon|Atom|Core)[^\n,;]+)/i);
|
|
const ramMatch = pageText.match(/(\d+)\s*GB?\s*(?:DDR[34]|RAM|memory)/i);
|
|
const storageMatch = pageText.match(/(\d+)\s*GB?\s*(?:SSD|eMMC|M\.2)/i);
|
|
const switchCapMatch = pageText.match(/switching\s*capacity[:\s]*([\d.]+)\s*Tb/i);
|
|
|
|
const seriesMatch = model.match(/^(AS\d{4}|DCS\d{3}|Minipack|Wedge)/);
|
|
const series = seriesMatch ? seriesMatch[1] : "";
|
|
|
|
const existing = await pool.query(
|
|
`SELECT id FROM switches WHERE model = $1 AND vendor_id = $2`,
|
|
[model, vendorId]
|
|
);
|
|
const isNew = existing.rows.length === 0;
|
|
|
|
await findOrCreateSwitch({
|
|
model,
|
|
vendorId,
|
|
series,
|
|
category: "DataCenter",
|
|
layer: "L3",
|
|
portsConfig: portInfo.portsConfig,
|
|
totalPorts: portInfo.totalPorts,
|
|
maxSpeedGbps: portInfo.maxSpeedGbps,
|
|
switchingCapacityTbps: switchCapMatch ? parseFloat(switchCapMatch[1]) : undefined,
|
|
asicVendor: asicInfo.vendor,
|
|
asicModel: asicInfo.model,
|
|
asicSeries: asicInfo.series,
|
|
maxPowerW: powerMatch ? parseInt(powerMatch[1]) : undefined,
|
|
cpu: cpuMatch ? cpuMatch[1].trim() : undefined,
|
|
ramGb: ramMatch ? parseInt(ramMatch[1]) : undefined,
|
|
storageGb: storageMatch ? parseInt(storageMatch[1]) : undefined,
|
|
sonicCompatible: true,
|
|
isWhitebox: true,
|
|
onieSupport: true,
|
|
supportedNos: ["SONiC", "ONL"],
|
|
transceiverFormFactors: portInfo.formFactors,
|
|
catalogUrl: request.url,
|
|
tags: [
|
|
"whitebox",
|
|
"Edgecore",
|
|
`${portInfo.maxSpeedGbps}G`,
|
|
asicInfo.model,
|
|
],
|
|
scrapeSource: "edgecore-catalog",
|
|
});
|
|
|
|
if (isNew) {
|
|
created++;
|
|
console.log(` + ${model} (${portInfo.maxSpeedGbps}G, ${asicInfo.vendor} ${asicInfo.model})`);
|
|
} else {
|
|
updated++;
|
|
}
|
|
},
|
|
|
|
failedRequestHandler({ request }) {
|
|
console.error(` ! Failed: ${request.url}`);
|
|
},
|
|
});
|
|
|
|
await crawler.run([PRODUCT_LIST_URL]);
|
|
console.log(`\n Created: ${created}, Updated: ${updated}\n`);
|
|
}
|