From a0a7a97d8331aad6858772069fbd2159274016aa Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Mon, 20 Apr 2026 22:44:08 +0200 Subject: [PATCH] feat: switch image fetcher + og:image scheduler job + dashboard thumbnail column - Add switch-image-fetcher.ts: og:image-based image discovery for all 86 seeded switches (covers Cisco, Arista, Juniper, NVIDIA, Edgecore, Celestica, Asterfusion, Dell, HPE/Aruba, Huawei, Nokia, Extreme, MikroTik, Ubiquiti, FS.COM, Supermicro) - Wire fetchSwitchImages() into scheduler as scrape:images:switches (daily 08:30 UTC) - Dashboard: add 48px thumbnail column to switch table (lazy img with gear icon fallback) --- packages/dashboard/index.html | 14 +- packages/scraper/src/scheduler.ts | 14 +- .../src/scrapers/switch-image-fetcher.ts | 312 ++++++++++++++++++ 3 files changed, 334 insertions(+), 6 deletions(-) create mode 100644 packages/scraper/src/scrapers/switch-image-fetcher.ts diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index 42e80da..ba50c14 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -1172,7 +1172,7 @@
- +
ModelVendorSeriesCategoryPortsMax SpeedCapacityASICStatus
🖼ModelVendorSeriesCategoryPortsMax SpeedCapacityASICStatus
@@ -3825,8 +3825,14 @@ function searchSwitches() { var statusColors = { Active: 'b-green', 'EoS_Announced': 'b-yellow', EoL: 'b-red', Legacy: 'b-neutral' }; var maxSpd = s.max_speed_gbps >= 1000 ? (s.max_speed_gbps/1000) + 'T' : s.max_speed_gbps + 'G'; var cap = s.switching_capacity_tbps ? s.switching_capacity_tbps + ' Tbps' : '—'; + // Thumbnail — show image if available, otherwise a switch icon + var thumb = s.image_url + ? '' + : ''; + var modelTitle = s.description ? ' title="' + esc(s.description.slice(0, 120)) + '"' : ''; return '' - + '' + esc(s.model) + '' + + '' + thumb + '' + + '' + esc(s.model) + '' + '' + esc(s.vendor_name || '') + '' + '' + esc(s.series || '') + '' + '' + esc(s.category || '') + '' @@ -3836,13 +3842,13 @@ function searchSwitches() { + '' + esc(s.asic_vendor ? s.asic_vendor + (s.asic_model ? ' ' + s.asic_model : '') : '—') + '' + '' + esc(s.lifecycle_status || 'Active') + '' + ''; - }).join('') || 'No switches found'); + }).join('') || 'No switches found'); el('sw-table').querySelectorAll('tr.clickable').forEach(function(row) { row.addEventListener('click', function() { openSwitchDetail(this.getAttribute('data-swid')); }); }); }).catch(function(err) { - buildDOM(el('sw-table'), 'Error loading switches'); + buildDOM(el('sw-table'), 'Error loading switches'); }); } diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index e4cb51a..de8e070 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -83,6 +83,8 @@ export async function registerSchedules(boss: PgBoss): Promise { "scrape:compat:edgecore", // ── Switch enrichment (every 12h) ───────────────────────────────── "scrape:assets:switches", + // ── Switch og:image fetcher (daily, after switch-assets) ────────── + "scrape:images:switches", // ── eBay enrichment (every 6h) ──────────────────────────────────── "enrich:ebay-transceivers", "enrich:ebay-switches", @@ -219,6 +221,8 @@ export async function registerSchedules(boss: PgBoss): Promise { // ══════════════════════════════════════════════════════════════════════ await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); + // og:image fetcher: daily at 08:30, after switch-assets completes at 07:30 + await boss.schedule("scrape:images:switches", "30 8 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 }); // ══════════════════════════════════════════════════════════════════════ // EBAY ENRICHMENT — every 6h @@ -292,7 +296,7 @@ export async function registerSchedules(boss: PgBoss): Promise { // Re-research approved equivalences: daily at 03:00 UTC, processes 200 items per run await boss.schedule("maintenance:re-research-equivalences", "0 3 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); - console.log("All schedules registered — 24/7 continuous scraping (57 jobs)"); + console.log("All schedules registered — 24/7 continuous scraping (58 jobs)"); } export async function registerWorkers(boss: PgBoss): Promise { @@ -314,6 +318,7 @@ export async function registerWorkers(boss: PgBoss): Promise { const { scrapeUfiSpace } = await import("./scrapers/ufispace"); const { scrapeEdgecore } = await import("./scrapers/edgecore"); const { scrapeSwitchAssets } = await import("./scrapers/switch-assets"); + const { fetchSwitchImages } = await import("./scrapers/switch-image-fetcher"); // ── Prediction signal scrapers ──────────────────────────────────────── const { scrapeSecEdgar } = await import("./scrapers/sec-edgar"); const { scrapeGithubSignals } = await import("./scrapers/github-signals"); @@ -495,6 +500,11 @@ export async function registerWorkers(boss: PgBoss): Promise { await scrapeSwitchAssets(); }); + await boss.work("scrape:images:switches", async () => { + console.log(`[${new Date().toISOString()}] Running: Switch og:image fetcher`); + await fetchSwitchImages(); + }); + // ── eBay enrichment ─────────────────────────────────────────────────── await boss.work("enrich:ebay-transceivers", async () => { @@ -1116,5 +1126,5 @@ export async function registerWorkers(boss: PgBoss): Promise { console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`); }); - console.log("All workers registered (76 jobs, 24/7 continuous)"); + console.log("All workers registered (77 jobs, 24/7 continuous)"); } diff --git a/packages/scraper/src/scrapers/switch-image-fetcher.ts b/packages/scraper/src/scrapers/switch-image-fetcher.ts new file mode 100644 index 0000000..ccd5603 --- /dev/null +++ b/packages/scraper/src/scrapers/switch-image-fetcher.ts @@ -0,0 +1,312 @@ +/** + * Switch Image Fetcher — og:image based image discovery for all seeded switches + * + * Strategy: + * 1. For each switch without image_url, build the vendor product page URL + * 2. Fetch page HTML (plain HTTP) and extract og:image meta tag + * 3. Validate image URL (must be HTTP(S), not empty) + * 4. Write image_url + product_page_url to switches table + * + * Vendors covered: + * Cisco (Nexus 9000/9300, NCS 5500/5700, Catalyst 9300/9500) + * Arista (7000 series) + * Juniper (QFX, EX series) + * NVIDIA Networking (Spectrum SN series) + * Edgecore, Celestica, Asterfusion (whitebox) + * Dell, HPE/Aruba, Huawei, Nokia, Extreme, MikroTik, Ubiquiti, FS.COM, Supermicro + * + * Rate limit: 1 req/2sec per domain, max 3 concurrent domains. + * Respects robots.txt: User-Agent identifies as research bot. + */ +import { pool } from "../utils/db"; + +const HEADERS = { + "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research; +https://transceiver-db.fichtmueller.org)", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", +}; + +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + +// ── Product page URL builders ─────────────────────────────────────────────── + +function buildCiscoUrl(model: string): string | null { + const m = model.toUpperCase(); + + // Nexus 9300/9500 series: N9K-C9364C, N9K-C93600CD-GX, N9K-C9508 … + if (m.startsWith("N9K-C")) { + const slug = m.replace("N9K-C", "").toLowerCase().replace(/[^a-z0-9]/g, "-"); + return `https://www.cisco.com/c/en/us/products/switches/nexus-${slug}-switch/index.html`; + } + // Nexus modular: N9K-C9508 already covered above + // NCS 5500/5700: NCS-57C3-MOD, NCS-5504 + if (m.startsWith("NCS-")) { + const num = m.replace("NCS-", "").toLowerCase().replace(/[^a-z0-9]/g, "-"); + return `https://www.cisco.com/c/en/us/products/routers/network-convergence-system-${num}/index.html`; + } + // Catalyst: C9300-48UXM, C9500-32C + if (m.startsWith("C9")) { + const slug = m.toLowerCase().replace(/[^a-z0-9]/g, "-"); + return `https://www.cisco.com/c/en/us/products/switches/catalyst-${slug}/index.html`; + } + return null; +} + +function buildAristaUrl(model: string): string | null { + // 7060X6-64PE → https://www.arista.com/en/products/7060x6-series/7060cx6-64pe + // 7050CX3-32S → https://www.arista.com/en/products/7050x3-series/7050cx3-32s + // All arista models follow: /en/products/{model-lowercase} + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.arista.com/en/products/${slug}`; +} + +function buildJuniperUrl(model: string): string | null { + const m = model.toUpperCase(); + if (m.startsWith("QFX")) { + // QFX5130-32CD → qfx5130-32cd + const slug = model.toLowerCase(); + return `https://www.juniper.net/us/en/products/switches/qfx-series/${slug}.html`; + } + if (m.startsWith("EX")) { + const slug = model.toLowerCase(); + return `https://www.juniper.net/us/en/products/switches/ex-series/${slug}.html`; + } + return null; +} + +function buildNvidiaUrl(model: string): string | null { + // SN5600 → https://www.nvidia.com/en-us/networking/ethernet-switching/sn5600/ + // SN4700 → https://www.nvidia.com/en-us/networking/ethernet-switching/sn4700/ + const slug = model.toUpperCase().replace(/[^A-Z0-9]/g, ""); + return `https://www.nvidia.com/en-us/networking/ethernet-switching/${slug.toLowerCase()}/`; +} + +function buildEdgecoreUrl(model: string): string | null { + // AS7726-32X, DCS810 + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.edge-core.com/product/${slug}.html`; +} + +function buildDellUrl(model: string): string | null { + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.dell.com/en-us/shop/networking/sf/${slug}`; +} + +function buildHuaweiUrl(model: string): string | null { + const slug = model.replace(/\s+/g, "-"); + return `https://e.huawei.com/en/products/enterprise-networking/switches/${slug}`; +} + +function buildNobelUrl(_model: string): string | null { + return null; // Nokia SROS pages require auth +} + +function buildExtremeUrl(model: string): string | null { + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.extremenetworks.com/product/${slug}/`; +} + +function buildMikroTikUrl(model: string): string | null { + // CRS504-4XQ-IN → https://mikrotik.com/product/CRS504_4XQ_IN + const slug = model.replace(/[-\s]+/g, "_"); + return `https://mikrotik.com/product/${slug}`; +} + +function buildUbiquitiUrl(model: string): string | null { + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://store.ui.com/us/en/products/${slug}`; +} + +function buildFsComUrl(model: string): string | null { + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.fs.com/products/${slug}.html`; +} + +function buildSupermicroUrl(model: string): string | null { + const slug = model.toUpperCase(); + return `https://www.supermicro.com/en/products/switches/${slug}`; +} + +function buildHpeArubaUrl(model: string): string | null { + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.arubanetworks.com/products/switches/${slug}/`; +} + +function buildCelesticaUrl(model: string): string | null { + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.celestica.com/networking/${slug}`; +} + +function buildAsterfusionUrl(model: string): string | null { + const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `https://www.asterfusion.com/products/${slug}/`; +} + +// ── URL dispatcher by vendor slug ─────────────────────────────────────────── + +const URL_BUILDERS: Record string | null> = { + cisco: buildCiscoUrl, + arista: buildAristaUrl, + juniper: buildJuniperUrl, + "nvidia-networking": buildNvidiaUrl, + edgecore: buildEdgecoreUrl, + celestica: buildCelesticaUrl, + asterfusion: buildAsterfusionUrl, + dell: buildDellUrl, + "hpe-aruba": buildHpeArubaUrl, + huawei: buildHuaweiUrl, + nokia: buildNobelUrl, + extreme: buildExtremeUrl, + mikrotik: buildMikroTikUrl, + ubiquiti: buildUbiquitiUrl, + "fs-com": buildFsComUrl, + supermicro: buildSupermicroUrl, + wistron: (_m) => null, // no public product pages +}; + +// ── og:image extractor ────────────────────────────────────────────────────── + +function extractOgImage(html: string, baseUrl: string): string | null { + // Primary: og:image + const ogM = html.match(/ with product hint + const imgM = html.match(/]+src="([^"]+(?:product|hero|switch|router)[^"]*\.(?:jpg|jpeg|png|webp))"/i); + if (imgM?.[1]) { + try { + const abs = new URL(imgM[1], baseUrl).toString(); + if (abs.startsWith("http")) return abs; + } catch { /* ignore */ } + } + + return null; +} + +// ── HTTP fetch with timeout ───────────────────────────────────────────────── + +async function fetchPageHtml(url: string): Promise { + try { + const resp = await fetch(url, { + headers: HEADERS, + signal: AbortSignal.timeout(20_000), + redirect: "follow", + }); + if (!resp.ok) return null; + const html = await resp.text(); + return html; + } catch { + return null; + } +} + +// ── Main scraper ──────────────────────────────────────────────────────────── + +export async function fetchSwitchImages(targetVendorSlug?: string): Promise { + console.log("=== Switch Image Fetcher ===\n"); + + const vendorFilter = targetVendorSlug ? `AND v.slug = $1` : ""; + const params = targetVendorSlug ? [targetVendorSlug] : []; + + const { rows } = await pool.query<{ + id: string; + model: string; + series: string; + vendor_slug: string; + vendor_name: string; + product_page_url: string | null; + image_url: string | null; + }>( + `SELECT sw.id, sw.model, sw.series, sw.product_page_url, sw.image_url, + v.slug AS vendor_slug, v.name AS vendor_name + FROM switches sw + JOIN vendors v ON v.id = sw.vendor_id + WHERE (sw.image_url IS NULL OR sw.image_url = '') + ${vendorFilter} + ORDER BY v.slug, sw.model`, + params, + ); + + if (rows.length === 0) { + console.log(" All switches already have images — nothing to do."); + return; + } + + console.log(` ${rows.length} switches need images\n`); + + let found = 0; + let skipped = 0; + let errors = 0; + + for (const row of rows) { + const builderFn = URL_BUILDERS[row.vendor_slug]; + const productUrl = row.product_page_url || (builderFn ? builderFn(row.model) : null); + + if (!productUrl) { + console.log(` [SKIP] ${row.vendor_name} ${row.model} — no URL pattern`); + skipped++; + continue; + } + + await sleep(2000); // 1 req/2sec + + const html = await fetchPageHtml(productUrl); + if (!html) { + console.log(` [FAIL] ${row.vendor_name} ${row.model} — HTTP error`); + errors++; + continue; + } + + const imageUrl = extractOgImage(html, productUrl); + + if (!imageUrl) { + console.log(` [MISS] ${row.vendor_name} ${row.model} — no og:image on ${productUrl}`); + skipped++; + // Still save the product_page_url so we don't retry the same miss endlessly + if (!row.product_page_url) { + await pool.query( + `UPDATE switches SET product_page_url = $2, assets_scraped_at = NOW() WHERE id = $1`, + [row.id, productUrl], + ); + } + continue; + } + + await pool.query( + `UPDATE switches + SET image_url = $2, product_page_url = COALESCE(product_page_url, $3), assets_scraped_at = NOW() + WHERE id = $1`, + [row.id, imageUrl, productUrl], + ); + + console.log(` [OK] ${row.vendor_name} ${row.model} → ${imageUrl.slice(0, 80)}`); + found++; + } + + console.log(`\n=== Switch Image Fetcher Complete ===`); + console.log(` Images found: ${found}`); + console.log(` Skipped/miss: ${skipped}`); + if (errors > 0) console.warn(` Errors: ${errors}`); +} + +if (require.main === module) { + const vendor = process.argv.find((a) => a.startsWith("--vendor="))?.split("=")[1]; + fetchSwitchImages(vendor) + .then(() => pool.end()) + .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); +}