- switch-image-playwright.ts + switch-image-fetcher.ts: add filter patterns for /webimage-404/ (Netgear 404 hero), /Brand/ + /cybersecurity.png/ (Moxa brand marketing images not product photos) - sql/047: Moxa 4/4 models — CDN getattachment paths (hotlink-protected, Referer: moxa.com required; R2 proxy needed for production display) - sql/048: UfiSpace 6/6 models — ufispace.com/image/<hash>/ direct PNGs; Brocade G720+G730 — broadcom.com og:image; ICX 7850-48FS — CommScope/Ruckus vistancenetworks.com ImageServer (rand param is cache-bust only, not auth) - sql/049: NVIDIA SN-series 6/6 — docscontent.nvidia.com (SN2201/3700/4700) and S3 direct (SN5400/5600); SN3750-SX via uvation reseller CDN
460 lines
19 KiB
TypeScript
460 lines
19 KiB
TypeScript
/**
|
|
* Switch Image Fetcher — og:image based image discovery for all seeded switches
|
|
*
|
|
* Strategy:
|
|
* 1. For each switch without image_url, build the vendor product page URL
|
|
* 2. Fetch page HTML (plain HTTP) and extract og:image meta tag
|
|
* 3. Validate image URL (must be HTTP(S), not empty)
|
|
* 4. Write image_url + product_page_url to switches table
|
|
*
|
|
* Vendors covered:
|
|
* Cisco (Nexus 9000/9300, NCS 5500/5700, Catalyst 9300/9500, 8000 SP)
|
|
* Arista (7000 series)
|
|
* Juniper (QFX, EX series)
|
|
* NVIDIA Networking (Spectrum SN series — ConnectX skipped)
|
|
* Edgecore, Celestica, Asterfusion (whitebox)
|
|
* Fortinet (FortiSwitch series)
|
|
* Dell, HPE/Aruba, Huawei, Nokia, Extreme, MikroTik, Ubiquiti, FS.COM, Supermicro
|
|
* Alcatel-Lucent Enterprise, Allied Telesis, Netgear, Quanta Cloud Technology, Ufispace
|
|
*
|
|
* Rate limit: 1 req/2sec per domain, max 3 concurrent domains.
|
|
* Respects robots.txt: User-Agent identifies as research bot.
|
|
*/
|
|
import { pool } from "../utils/db";
|
|
|
|
const HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research; +https://transceiver-db.fichtmueller.org)",
|
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
};
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((r) => setTimeout(r, ms));
|
|
}
|
|
|
|
// ── Product page URL builders ───────────────────────────────────────────────
|
|
|
|
function buildCiscoUrl(model: string): string | null {
|
|
const m = model.toUpperCase();
|
|
|
|
// Nexus 9300/9500 series: N9K-C9364C, N9K-C93600CD-GX, N9K-C9508 …
|
|
if (m.startsWith("N9K-C")) {
|
|
const slug = m.replace("N9K-C", "").toLowerCase().replace(/[^a-z0-9]/g, "-");
|
|
return `https://www.cisco.com/c/en/us/products/switches/nexus-${slug}-switch/index.html`;
|
|
}
|
|
// NCS 5500/5700: NCS-57C3-MOD, NCS-5504
|
|
if (m.startsWith("NCS-")) {
|
|
const num = m.replace("NCS-", "").toLowerCase().replace(/[^a-z0-9]/g, "-");
|
|
return `https://www.cisco.com/c/en/us/products/routers/network-convergence-system-${num}/index.html`;
|
|
}
|
|
// Catalyst: C9300-48UXM, C9500-32C
|
|
if (m.startsWith("C9")) {
|
|
const slug = m.toLowerCase().replace(/[^a-z0-9]/g, "-");
|
|
return `https://www.cisco.com/c/en/us/products/switches/catalyst-${slug}/index.html`;
|
|
}
|
|
// Cisco 8000 SP series chassis: 8101-32FH, 8202-32FH, 8608
|
|
if (/^8[0-9]{3}/.test(m)) {
|
|
return `https://www.cisco.com/site/us/en/products/networking/sdwan-routers/8000-series/index.html`;
|
|
}
|
|
// Cisco 8800 line cards (88-LC0-*, 84-MPA-*, 86-MPA-*) → same 8000 family page
|
|
if (/^(88|84|86)-/.test(m)) {
|
|
return `https://www.cisco.com/site/us/en/products/networking/sdwan-routers/8000-series/index.html`;
|
|
}
|
|
// ASR 9000 / A900 line cards only return the Cisco logo as og:image — skip
|
|
return null;
|
|
}
|
|
|
|
function buildAlcatelLucentUrl(model: string): string | null {
|
|
// OmniSwitch 6900-X72, OmniSwitch 9900-C32D
|
|
const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "");
|
|
return `https://www.al-enterprise.com/en/products/switches/${slug}`;
|
|
}
|
|
|
|
function buildAristaUrl(model: string): string | null {
|
|
// 7060X6-64PE → https://www.arista.com/en/products/7060x6-series/7060cx6-64pe
|
|
// 7050CX3-32S → https://www.arista.com/en/products/7050x3-series/7050cx3-32s
|
|
// All arista models follow: /en/products/{model-lowercase}
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.arista.com/en/products/${slug}`;
|
|
}
|
|
|
|
function buildJuniperUrl(model: string): string | null {
|
|
const m = model.toUpperCase();
|
|
if (m.startsWith("QFX")) {
|
|
// QFX5130-32CD → qfx5130-32cd
|
|
const slug = model.toLowerCase();
|
|
return `https://www.juniper.net/us/en/products/switches/qfx-series/${slug}.html`;
|
|
}
|
|
if (m.startsWith("EX")) {
|
|
const slug = model.toLowerCase();
|
|
return `https://www.juniper.net/us/en/products/switches/ex-series/${slug}.html`;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function buildNvidiaUrl(model: string): string | null {
|
|
// SN5600 → https://www.nvidia.com/en-us/networking/ethernet-switching/sn5600/
|
|
// SN4700 → https://www.nvidia.com/en-us/networking/ethernet-switching/sn4700/
|
|
// ConnectX-7 / BlueField are adapters, not switches — skip
|
|
const m = model.toUpperCase();
|
|
if (m.includes("CONNECTX") || m.includes("BLUEFIELD")) return null;
|
|
const slug = m.replace(/[^A-Z0-9]/g, "");
|
|
if (!slug.startsWith("SN")) return null; // only Spectrum switch series
|
|
return `https://www.nvidia.com/en-us/networking/ethernet-switching/${slug.toLowerCase()}/`;
|
|
}
|
|
|
|
function buildEdgecoreUrl(model: string): string | null {
|
|
// AS7726-32X, DCS810
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.edge-core.com/product/${slug}.html`;
|
|
}
|
|
|
|
function buildDellUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.dell.com/en-us/shop/networking/sf/${slug}`;
|
|
}
|
|
|
|
function buildHuaweiUrl(model: string): string | null {
|
|
const slug = model.replace(/\s+/g, "-");
|
|
return `https://e.huawei.com/en/products/enterprise-networking/switches/${slug}`;
|
|
}
|
|
|
|
function buildNobelUrl(_model: string): string | null {
|
|
return null; // Nokia SROS pages require auth
|
|
}
|
|
|
|
function buildExtremeUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.extremenetworks.com/product/${slug}/`;
|
|
}
|
|
|
|
// MikroTik product URL slugs for models containing '+' are not derivable from
|
|
// the model name — their website uses opaque suffixes (_in, _rm, …).
|
|
// The models without '+' follow a simple pattern (lowercase, dashes→underscore).
|
|
const MIKROTIK_SLUG_MAP: Record<string, string> = {
|
|
"CRS305-1G-4S+": "crs305_1g_4s_in",
|
|
"CRS312-4C+8XG": "crs312_4c_8xg_rm",
|
|
"CRS317-1G-16S+": "crs317_1g_16s_rm",
|
|
"CRS326-24G-2S+": "crs326_24g_2s_in",
|
|
// CRS354-48G-4S+2Q+: URL not discoverable — MikroTik's product listing is JS-rendered
|
|
};
|
|
|
|
function buildMikroTikUrl(model: string): string | null {
|
|
if (model in MIKROTIK_SLUG_MAP) {
|
|
return `https://mikrotik.com/product/${MIKROTIK_SLUG_MAP[model]}`;
|
|
}
|
|
if (model.includes("+")) return null; // other + models — URL unknown
|
|
// Simple lowercase + dashes→underscores for models without '+'
|
|
const slug = model.toLowerCase().replace(/[-\s]+/g, "_").replace(/[^a-z0-9_]/g, "");
|
|
return slug ? `https://mikrotik.com/product/${slug}` : null;
|
|
}
|
|
|
|
function buildUbiquitiUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://store.ui.com/us/en/products/${slug}`;
|
|
}
|
|
|
|
function buildFsComUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.fs.com/products/${slug}.html`;
|
|
}
|
|
|
|
function buildSupermicroUrl(model: string): string | null {
|
|
const slug = model.toUpperCase();
|
|
return `https://www.supermicro.com/en/products/switches/${slug}`;
|
|
}
|
|
|
|
function buildHpeArubaUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.arubanetworks.com/products/switches/${slug}/`;
|
|
}
|
|
|
|
function buildCelesticaUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.celestica.com/networking/${slug}`;
|
|
}
|
|
|
|
function buildAsterfusionUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
return `https://www.asterfusion.com/products/${slug}/`;
|
|
}
|
|
|
|
function buildFortinetUrl(_model: string): string | null {
|
|
// Fortinet product pages are JS-rendered — og:image only returns the brand icon.
|
|
// All /products/fortiswitch/<model> URLs redirect to the generic /ethernet-switches page.
|
|
// Image scraping is not possible via plain HTTP for this vendor.
|
|
return null;
|
|
}
|
|
|
|
function buildQuantaUrl(model: string): string | null {
|
|
// QuantaMesh T3048-LY8, T7032-IX1 etc.
|
|
const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "");
|
|
return `https://www.qct.io/product/index/Infrastructure-Product/Networking/Switch/${slug}`;
|
|
}
|
|
|
|
function buildAlliedTelesisUrl(model: string): string | null {
|
|
// AT-x530-28GSX → https://www.alliedtelesis.com/us/en/products/at-x530-28gsx
|
|
const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "");
|
|
return `https://www.alliedtelesis.com/us/en/products/${slug}`;
|
|
}
|
|
|
|
function buildUfispaceUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "");
|
|
return `https://www.ufispace.com/products/${slug}`;
|
|
}
|
|
|
|
function buildNetgearUrl(model: string): string | null {
|
|
const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "");
|
|
return `https://www.netgear.com/business/products/switches/${slug}`;
|
|
}
|
|
|
|
// ── URL dispatcher by vendor slug ───────────────────────────────────────────
|
|
|
|
const URL_BUILDERS: Record<string, (m: string) => string | null> = {
|
|
cisco: buildCiscoUrl,
|
|
arista: buildAristaUrl,
|
|
juniper: buildJuniperUrl,
|
|
"nvidia-networking": buildNvidiaUrl,
|
|
edgecore: buildEdgecoreUrl,
|
|
celestica: buildCelesticaUrl,
|
|
asterfusion: buildAsterfusionUrl,
|
|
fortinet: buildFortinetUrl,
|
|
dell: buildDellUrl,
|
|
"hpe-aruba": buildHpeArubaUrl,
|
|
huawei: buildHuaweiUrl,
|
|
nokia: buildNobelUrl,
|
|
extreme: buildExtremeUrl,
|
|
mikrotik: buildMikroTikUrl,
|
|
ubiquiti: buildUbiquitiUrl,
|
|
"fs-com": buildFsComUrl,
|
|
supermicro: buildSupermicroUrl,
|
|
"alcatel-lucent": buildAlcatelLucentUrl,
|
|
"alcatel-lucent-enterprise": buildAlcatelLucentUrl, // fix: DB uses this slug
|
|
ale: buildAlcatelLucentUrl,
|
|
"quanta-cloud-technology": buildQuantaUrl,
|
|
"allied-telesis": buildAlliedTelesisUrl,
|
|
ufispace: buildUfispaceUrl,
|
|
netgear: buildNetgearUrl,
|
|
wistron: (_m) => null, // no public product pages
|
|
aruba: buildHpeArubaUrl, // alias
|
|
};
|
|
|
|
// ── Generic marketing image detector ────────────────────────────────────────
|
|
// Rejects URLs that are clearly stock photos, homepages, lifestyle shots or
|
|
// any other non-product image. Patterns found from real-world scrapes.
|
|
|
|
const GENERIC_IMAGE_PATTERNS: RegExp[] = [
|
|
// ── Logo / brand marks (never product photos) ────────────────────────────
|
|
/[-/_]logo[-_.]|\/logos?\//i,
|
|
/cisco[-_]?logo/i,
|
|
/juniper[-_]networks[-_]logo/i,
|
|
/arista[-_]?logo/i,
|
|
/brand[-_]?logo/i,
|
|
/company[-_]?logo/i,
|
|
// SVG logos often have these in path
|
|
/\/svg\//i,
|
|
/\.svg(\?|$)/i,
|
|
// ── Alcatel-Lucent Enterprise generic hero images ────────────────────────
|
|
/naas-homepag/i,
|
|
/al-enterprise.*\/images\/naas/i,
|
|
// ── Generic OG / social sharing defaults ─────────────────────────────────
|
|
/og[-_]default/i,
|
|
/default[-_](?:og|social|share|image)/i,
|
|
/site[-_](?:default|image|og)/i,
|
|
/social[-_](?:default|share)/i,
|
|
/twitter[-_]default/i,
|
|
/default[-_]thumbnail/i,
|
|
// ── Homepage / banner / lifestyle ────────────────────────────────────────
|
|
/\/homepage\//i,
|
|
/hero[-_](?:banner|bg|background|image)/i,
|
|
/banner[-_](?:bg|background)/i,
|
|
/lifestyle/i,
|
|
/stock[-_]?photo/i,
|
|
/people[-_](?:at|in|with)/i,
|
|
// ── Placeholder / fallback ────────────────────────────────────────────────
|
|
/placeholder/i,
|
|
/no[-_]?image/i,
|
|
/image[-_]?not[-_]?found/i,
|
|
/\/fallback[/-]/i,
|
|
/missing[-_]image/i,
|
|
// ── Generic about/press/brand pages ──────────────────────────────────────
|
|
/\/press[-_]kit/i,
|
|
/\/media[-_]kit/i,
|
|
// ── Vendor error / 404 graphics ──────────────────────────────────────────
|
|
/404[-_]error/i,
|
|
/error[-_]graphic/i,
|
|
// ── Navigation icon libraries ────────────────────────────────────────────
|
|
/\/icon[-_]library\//i,
|
|
// ── Diagrams and illustrations ───────────────────────────────────────────
|
|
/[-_]illustration[._]/i,
|
|
// ── Vendor 404 hero images ───────────────────────────────────────────────
|
|
/webimage-404/i,
|
|
// ── Moxa brand/marketing images (not product photos) ────────────────────
|
|
/\/Brand\//i,
|
|
/cybersecurity\.png/i,
|
|
// ── Cookie consent / GDPR overlay images ────────────────────────────────
|
|
/cdn\.cookielaw\.org/i,
|
|
/cookiebot\.com/i,
|
|
/trustarc\.com/i,
|
|
/consent-manager/i,
|
|
];
|
|
|
|
function isGenericImage(url: string): boolean {
|
|
return GENERIC_IMAGE_PATTERNS.some((re) => re.test(url));
|
|
}
|
|
|
|
// ── og:image extractor ──────────────────────────────────────────────────────
|
|
|
|
function extractOgImage(html: string, baseUrl: string): string | null {
|
|
const resolve = (url: string): string | null => {
|
|
if (!url) return null;
|
|
let abs = url;
|
|
if (url.startsWith("/")) {
|
|
try { abs = new URL(url, baseUrl).toString(); } catch { return null; }
|
|
}
|
|
if (!abs.startsWith("http")) return null;
|
|
if (isGenericImage(abs)) return null; // ← reject logos/marketing images
|
|
return abs;
|
|
};
|
|
|
|
// Primary: og:image
|
|
const ogM = html.match(/<meta\s+(?:property="og:image"\s+content|content="([^"]+)"\s+property="og:image")="([^"]+)"/i)
|
|
|| html.match(/<meta\s+property="og:image"\s+content="([^"]+)"/i)
|
|
|| html.match(/<meta\s+content="([^"]+)"\s+property="og:image"/i);
|
|
if (ogM) {
|
|
const url = ogM[2] || ogM[1];
|
|
const resolved = resolve(url);
|
|
if (resolved) return resolved;
|
|
}
|
|
|
|
// Fallback: twitter:image
|
|
const twM = html.match(/<meta\s+name="twitter:image"\s+content="([^"]+)"/i)
|
|
|| html.match(/<meta\s+content="([^"]+)"\s+name="twitter:image"/i);
|
|
if (twM?.[1]) {
|
|
const resolved = resolve(twM[1]);
|
|
if (resolved) return resolved;
|
|
}
|
|
|
|
// Fallback: large product image in <img src> with product keyword in path
|
|
const imgM = html.match(/<img[^>]+src="([^"]+(?:product|switch|router|hardware)[^"]*\.(?:jpg|jpeg|png|webp))"/i);
|
|
if (imgM?.[1]) {
|
|
const resolved = resolve(imgM[1]);
|
|
if (resolved) return resolved;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
// ── HTTP fetch with timeout ─────────────────────────────────────────────────
|
|
|
|
async function fetchPageHtml(url: string): Promise<string | null> {
|
|
try {
|
|
const resp = await fetch(url, {
|
|
headers: HEADERS,
|
|
signal: AbortSignal.timeout(20_000),
|
|
redirect: "follow",
|
|
});
|
|
if (!resp.ok) return null;
|
|
const html = await resp.text();
|
|
return html;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// ── Main scraper ────────────────────────────────────────────────────────────
|
|
|
|
export async function fetchSwitchImages(targetVendorSlug?: string): Promise<void> {
|
|
console.log("=== Switch Image Fetcher ===\n");
|
|
|
|
const vendorFilter = targetVendorSlug ? `AND v.slug = $1` : "";
|
|
const params = targetVendorSlug ? [targetVendorSlug] : [];
|
|
|
|
const { rows } = await pool.query<{
|
|
id: string;
|
|
model: string;
|
|
series: string;
|
|
vendor_slug: string;
|
|
vendor_name: string;
|
|
product_page_url: string | null;
|
|
image_url: string | null;
|
|
}>(
|
|
`SELECT sw.id, sw.model, sw.series, sw.product_page_url, sw.image_url,
|
|
v.slug AS vendor_slug, v.name AS vendor_name
|
|
FROM switches sw
|
|
JOIN vendors v ON v.id = sw.vendor_id
|
|
WHERE (sw.image_url IS NULL OR sw.image_url = '')
|
|
${vendorFilter}
|
|
ORDER BY v.slug, sw.model`,
|
|
params,
|
|
);
|
|
|
|
if (rows.length === 0) {
|
|
console.log(" All switches already have images — nothing to do.");
|
|
return;
|
|
}
|
|
|
|
console.log(` ${rows.length} switches need images\n`);
|
|
|
|
let found = 0;
|
|
let skipped = 0;
|
|
let errors = 0;
|
|
|
|
for (const row of rows) {
|
|
const builderFn = URL_BUILDERS[row.vendor_slug];
|
|
const productUrl = row.product_page_url || (builderFn ? builderFn(row.model) : null);
|
|
|
|
if (!productUrl) {
|
|
console.log(` [SKIP] ${row.vendor_name} ${row.model} — no URL pattern`);
|
|
skipped++;
|
|
continue;
|
|
}
|
|
|
|
await sleep(3500); // 1 req/3.5s — server-friendly rate limit
|
|
|
|
const html = await fetchPageHtml(productUrl);
|
|
if (!html) {
|
|
console.log(` [FAIL] ${row.vendor_name} ${row.model} — HTTP error`);
|
|
errors++;
|
|
continue;
|
|
}
|
|
|
|
const imageUrl = extractOgImage(html, productUrl);
|
|
|
|
if (!imageUrl) {
|
|
console.log(` [MISS] ${row.vendor_name} ${row.model} — no og:image on ${productUrl}`);
|
|
skipped++;
|
|
// Still save the product_page_url so we don't retry the same miss endlessly
|
|
if (!row.product_page_url) {
|
|
await pool.query(
|
|
`UPDATE switches SET product_page_url = $2, assets_scraped_at = NOW() WHERE id = $1`,
|
|
[row.id, productUrl],
|
|
);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
await pool.query(
|
|
`UPDATE switches
|
|
SET image_url = $2, product_page_url = COALESCE(product_page_url, $3), assets_scraped_at = NOW()
|
|
WHERE id = $1`,
|
|
[row.id, imageUrl, productUrl],
|
|
);
|
|
|
|
console.log(` [OK] ${row.vendor_name} ${row.model} → ${imageUrl.slice(0, 80)}`);
|
|
found++;
|
|
}
|
|
|
|
console.log(`\n=== Switch Image Fetcher Complete ===`);
|
|
console.log(` Images found: ${found}`);
|
|
console.log(` Skipped/miss: ${skipped}`);
|
|
if (errors > 0) console.warn(` Errors: ${errors}`);
|
|
}
|
|
|
|
if (require.main === module) {
|
|
const vendor = process.argv.find((a) => a.startsWith("--vendor="))?.split("=")[1];
|
|
fetchSwitchImages(vendor)
|
|
.then(() => pool.end())
|
|
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
|
}
|