From 1d50fd1c8fc45e74f406d9332f39b93ce0ba60aa Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Mon, 20 Apr 2026 23:31:36 +0200 Subject: [PATCH] feat: Flexoptix order section per switch + reject generic/logo images --- .claude/launch.json | 1 + lama2/01_health.l2 | 1 + lama2/02_transceivers_search.l2 | 1 + lama2/03_vendors.l2 | 1 + lama2/04_semantic_search.l2 | 1 + lama2/05_hype_cycle.l2 | 1 + lama2/06_competitor_alerts.l2 | 1 + lama2/07_hot_topics.l2 | 1 + lama2/08_blog.l2 | 1 + lama2/09_news.l2 | 1 + lama2/10_finder.l2 | 9 + package-lock.json | 1 + packages/api/src/db/queries.ts | 54 +++++ packages/api/src/routes/switches.ts | 15 +- packages/dashboard/index.html | 215 +++++++++--------- .../src/scrapers/switch-image-fetcher.ts | 127 ++++++++--- sql/043-clear-generic-switch-images.sql | 29 +++ 17 files changed, 322 insertions(+), 138 deletions(-) create mode 100644 .claude/launch.json create mode 100644 lama2/01_health.l2 create mode 100644 lama2/02_transceivers_search.l2 create mode 100644 lama2/03_vendors.l2 create mode 100644 lama2/04_semantic_search.l2 create mode 100644 lama2/05_hype_cycle.l2 create mode 100644 lama2/06_competitor_alerts.l2 create mode 100644 lama2/07_hot_topics.l2 create mode 100644 lama2/08_blog.l2 create mode 100644 lama2/09_news.l2 create mode 100644 lama2/10_finder.l2 create mode 100644 sql/043-clear-generic-switch-images.sql diff --git a/.claude/launch.json b/.claude/launch.json new file mode 100644 index 0000000..6c440e8 --- /dev/null +++ b/.claude/launch.json @@ -0,0 +1 @@ +{"version":"0.0.1","configurations":[{"name":"dashboard","runtimeExecutable":"npx","runtimeArgs":["serve","-p","5555","packages/dashboard"],"port":5555}]} diff --git a/lama2/01_health.l2 b/lama2/01_health.l2 new file mode 100644 index 0000000..f1d363a --- /dev/null +++ b/lama2/01_health.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/health diff --git a/lama2/02_transceivers_search.l2 b/lama2/02_transceivers_search.l2 new file mode 100644 index 0000000..5c99f16 --- /dev/null +++ b/lama2/02_transceivers_search.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/transceivers?q=100G&limit=10 diff --git a/lama2/03_vendors.l2 b/lama2/03_vendors.l2 new file mode 100644 index 0000000..90b5ad0 --- /dev/null +++ b/lama2/03_vendors.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/vendors diff --git a/lama2/04_semantic_search.l2 b/lama2/04_semantic_search.l2 new file mode 100644 index 0000000..a0fc23e --- /dev/null +++ b/lama2/04_semantic_search.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/search?q=coherent+400G&limit=10 diff --git a/lama2/05_hype_cycle.l2 b/lama2/05_hype_cycle.l2 new file mode 100644 index 0000000..bd3fdd1 --- /dev/null +++ b/lama2/05_hype_cycle.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/hype-cycle?technology=400G diff --git a/lama2/06_competitor_alerts.l2 b/lama2/06_competitor_alerts.l2 new file mode 100644 index 0000000..c009c5e --- /dev/null +++ b/lama2/06_competitor_alerts.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/competitor-alerts?limit=20 diff --git a/lama2/07_hot_topics.l2 b/lama2/07_hot_topics.l2 new file mode 100644 index 0000000..f63874d --- /dev/null +++ b/lama2/07_hot_topics.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/hot-topics?limit=10 diff --git a/lama2/08_blog.l2 b/lama2/08_blog.l2 new file mode 100644 index 0000000..481a613 --- /dev/null +++ b/lama2/08_blog.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/blog?limit=10 diff --git a/lama2/09_news.l2 b/lama2/09_news.l2 new file mode 100644 index 0000000..49eb4b8 --- /dev/null +++ b/lama2/09_news.l2 @@ -0,0 +1 @@ +GET https://tip.fichtmueller.org/api/news?limit=20 diff --git a/lama2/10_finder.l2 b/lama2/10_finder.l2 new file mode 100644 index 0000000..fe25d3e --- /dev/null +++ b/lama2/10_finder.l2 @@ -0,0 +1,9 @@ +POST https://tip.fichtmueller.org/api/finder +Content-Type: application/json + +{ + "switch_vendor": "Cisco", + "switch_model": "Nexus 9300", + "speed_gbps": 100, + "fiber_type": "SM" +} diff --git a/package-lock.json b/package-lock.json index 9fe9553..8df50b2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6386,6 +6386,7 @@ "pg": "^8.13.1", "pg-boss": "^10.1.5", "playwright": "^1.50.0", + "socks-proxy-agent": "^8.0.5", "xml2js": "^0.6.2" }, "devDependencies": { diff --git a/packages/api/src/db/queries.ts b/packages/api/src/db/queries.ts index 4135625..dd04787 100644 --- a/packages/api/src/db/queries.ts +++ b/packages/api/src/db/queries.ts @@ -307,6 +307,60 @@ export async function getCompatibleTransceivers(switchId: string) { return result.rows; } +/** + * getFlexoptixSuggestions — returns Flexoptix transceivers that physically fit + * the switch's port slots, derived from ports_config JSONB keys. + * Works even before the compat scraper has processed the switch. + */ +export async function getFlexoptixSuggestions(switchId: string) { + const result = await pool.query( + `WITH switch_form_factors AS ( + SELECT DISTINCT + CASE + WHEN k ILIKE '%QSFP-DD800%' THEN 'QSFP-DD800' + WHEN k ILIKE '%QSFP-DD%' THEN 'QSFP-DD' + WHEN k ILIKE '%OSFP224%' THEN 'OSFP224' + WHEN k ILIKE '%OSFP%' THEN 'OSFP' + WHEN k ILIKE '%QSFP28%' THEN 'QSFP28' + WHEN k ILIKE '%QSFP+%' THEN 'QSFP+' + WHEN k ILIKE '%QSFP%' THEN 'QSFP+' + WHEN k ILIKE '%SFP28%' THEN 'SFP28' + WHEN k ILIKE '%SFP+%' THEN 'SFP+' + WHEN k ILIKE '%SFP%' THEN 'SFP+' + WHEN k ILIKE '%CFP2%' THEN 'CFP2' + WHEN k ILIKE '%CFP4%' THEN 'CFP4' + WHEN k ILIKE '%CFP%' THEN 'CFP' + END AS form_factor + FROM switches sw, + jsonb_object_keys(sw.ports_config) AS k + WHERE sw.id = $1 AND sw.ports_config IS NOT NULL + ) + SELECT t.id, t.slug, t.part_number, t.standard_name, t.form_factor, + t.speed, t.speed_gbps, t.reach_meters, t.reach_label, + t.fiber_type, t.wavelength_nm, t.market_status, + t.product_page_url, t.image_url, + t.price_verified_eur, t.price_verified_at, t.price_verified_usd, + v.name AS vendor_name, v.website AS vendor_website, + COALESCE(t.price_verified_eur, + (SELECT po.price FROM price_observations po + WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) + ) AS latest_price, + CASE WHEN t.price_verified_eur IS NOT NULL THEN 'EUR' + ELSE (SELECT po.currency FROM price_observations po + WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) + END AS latest_currency + FROM transceivers t + JOIN vendors v ON t.vendor_id = v.id + WHERE LOWER(v.name) = 'flexoptix' + AND t.form_factor IN ( + SELECT form_factor FROM switch_form_factors WHERE form_factor IS NOT NULL + ) + ORDER BY t.speed_gbps DESC NULLS LAST, t.reach_meters ASC NULLS LAST`, + [switchId] + ); + return result.rows; +} + export async function listVendors(type?: string) { const query = type ? `SELECT v.*, diff --git a/packages/api/src/routes/switches.ts b/packages/api/src/routes/switches.ts index d481f62..7c5bb54 100644 --- a/packages/api/src/routes/switches.ts +++ b/packages/api/src/routes/switches.ts @@ -1,5 +1,5 @@ import { Router, Request, Response } from "express"; -import { searchSwitches, getSwitchById, getCompatibleTransceivers, getSwitchDocuments, getSwitchIssues } from "../db/queries"; +import { searchSwitches, getSwitchById, getCompatibleTransceivers, getFlexoptixSuggestions, getSwitchDocuments, getSwitchIssues } from "../db/queries"; export const switchRouter = Router(); @@ -73,3 +73,16 @@ switchRouter.get("/:id/compatibility", async (req: Request, res: Response) => { res.status(500).json({ success: false, error: "Internal server error" }); } }); + +// GET /api/switches/:id/flexoptix — Flexoptix transceivers by form factor (always available) +// Returns Flexoptix catalog items that physically fit the switch's port slots, +// derived from ports_config keys — works before the compat scraper has run. +switchRouter.get("/:id/flexoptix", async (req: Request, res: Response) => { + try { + const suggestions = await getFlexoptixSuggestions(String(req.params.id)); + res.json({ success: true, data: suggestions, total: suggestions.length }); + } catch (err) { + console.error("Get Flexoptix suggestions error:", err); + res.status(500).json({ success: false, error: "Internal server error" }); + } +}); diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index 4e7c8db..a7702e9 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -4064,135 +4064,134 @@ async function openSwitchDetail(id) { buildDOM(body, dh); }).catch(function() {}); + // ── Load Flexoptix orderable transceivers (form-factor based, always works) ── + api('/api/switches/' + id + '/flexoptix').then(function(foData) { + var foAll = foData.data || []; + if (foAll.length === 0) return; + + var fch = ''; + fch += '
' + + 'Bei Flexoptix bestellen' + + '' + foAll.length + '' + + '
'; + fch += '
Passend für diesen Switch — FlexBox-Codierung möglich
'; + + // Group by speed class + var foGroups = {}; + foAll.forEach(function(t) { + var key = (t.speed_gbps ? t.speed_gbps + 'G' : (t.speed || '?')) + ' ' + (t.form_factor || '?'); + if (!foGroups[key]) foGroups[key] = []; + foGroups[key].push(t); + }); + + // Sort speed groups descending (highest speed first) + var foKeys = Object.keys(foGroups).sort(function(a, b) { + var ga = parseFloat(a) || 0, gb = parseFloat(b) || 0; + return gb - ga; + }); + + foKeys.forEach(function(key) { + var items = foGroups[key]; + fch += '
' + + esc(key) + '(' + items.length + ')
'; + fch += '
'; + + items.slice(0, 10).forEach(function(t) { + var priceStr = ''; + if (t.latest_price) { + var _pAmt = parseFloat(t.latest_price); + var _pCur = (t.latest_currency || 'EUR').toUpperCase(); + var _pEUR = toEUR(_pAmt, _pCur); + var _pUSD = toUSD(_pAmt, _pCur); + priceStr = _pEUR !== null ? fmtEUR(_pEUR) : (_pUSD !== null ? fmtUSD(_pUSD) : _pCur + ' ' + _pAmt.toFixed(2)); + } + var shopHref = t.product_page_url || ('https://www.flexoptix.net/en/search/ajax/suggest/?q=' + encodeURIComponent(t.part_number || t.standard_name || '')); + var reach = t.reach_label ? '' + esc(t.reach_label) + '' : ''; + + fch += '
' + + '
' + + '' + esc(t.part_number || t.standard_name || t.slug) + '' + + reach + + '
' + + (priceStr + ? '' + priceStr + '' + : 'Preis anfragen') + + 'Bestellen ↗' + + '
'; + }); + + if (items.length > 10) { + fch += '
+' + (items.length - 10) + ' weitere Flexoptix-Optionen
'; + } + fch += '
'; + }); + + el('panel-content').insertAdjacentHTML('beforeend', fch); + }).catch(function() {}); + + // ── Load compatibility table (vendor-tested + competitor data) ──────────── api('/api/switches/' + id + '/compatibility').then(function(cdata) { var txList = cdata.data || cdata.transceivers || []; if (txList.length === 0) return; - // Split: Flexoptix vs others - var foList = txList.filter(function(t) { return (t.vendor_name || '').toLowerCase() === 'flexoptix'; }); + // Only show non-Flexoptix here — Flexoptix already shown via /flexoptix var otherList = txList.filter(function(t) { return (t.vendor_name || '').toLowerCase() !== 'flexoptix'; }); + if (otherList.length === 0) return; + + var verifiedOthers = otherList.filter(function(t) { + return t.verification_method === 'vendor_matrix' || t.verification_method === 'vendor_compat'; + }); + var specOthers = otherList.filter(function(t) { + return t.verification_method !== 'vendor_matrix' && t.verification_method !== 'vendor_compat'; + }); var ch = ''; + ch += '
Competitor Transceivers ' + otherList.length + '' + + (verifiedOthers.length > 0 ? '(' + verifiedOthers.length + ' vendor-tested)' : '') + '
'; - // ── FLEXOPTIX RECOMMENDED ────────────────────────────────────────────── - if (foList.length > 0) { - ch += '
Flexoptix Recommended ' + foList.length + '
'; - ch += '
Directly available from Flexoptix — FlexBox coding supported
'; - - // Split Flexoptix by verification method - var foVendorVerified = foList.filter(function(t) { return t.verification_method === 'vendor_compat' || t.verification_method === 'vendor_matrix'; }); - var foSpecMatch = foList.filter(function(t) { return t.verification_method !== 'vendor_compat' && t.verification_method !== 'vendor_matrix'; }); - - // Show explicitly verified first - var foToShow = foVendorVerified.length > 0 ? foVendorVerified : foList; - if (foVendorVerified.length > 0 && foSpecMatch.length > 0) { - ch += '
✓ Vendor-tested compatibility (' + foVendorVerified.length + ')
'; - } - - // Group by speed class - var foGroups = {}; - foToShow.forEach(function(t) { - var key = (t.speed || '?') + ' ' + (t.form_factor || '?'); - if (!foGroups[key]) foGroups[key] = []; - foGroups[key].push(t); + // Vendor-tested with price + if (verifiedOthers.length > 0) { + var groups = {}; + verifiedOthers.forEach(function(t) { + var key = (t.form_factor || '?') + ' ' + (t.speed || '?'); + if (!groups[key]) groups[key] = []; + groups[key].push(t); }); - - Object.keys(foGroups).sort().forEach(function(key) { - var items = foGroups[key]; - ch += '
' + esc(key) + ' (' + items.length + ')
'; - ch += '
'; - items.slice(0, 8).forEach(function(t) { + Object.keys(groups).sort().forEach(function(key) { + var items = groups[key]; + ch += '
' + esc(key) + ' (' + items.length + ')
'; + ch += '
'; + items.slice(0, 6).forEach(function(t) { var priceStr = ''; if (t.latest_price) { var _pAmt = parseFloat(t.latest_price); var _pCur = (t.latest_currency || 'USD').toUpperCase(); var _pUSD = toUSD(_pAmt, _pCur); - var _pEUR = toEUR(_pAmt, _pCur); - priceStr = ' — ' + (_pUSD !== null ? fmtUSD(_pUSD) : _pCur + ' ' + _pAmt.toFixed(2)) - + (_pEUR !== null ? ' / ' + fmtEUR(_pEUR) : ''); + priceStr = _pUSD !== null ? fmtUSD(_pUSD) : _pCur + ' ' + _pAmt.toFixed(2); } - var verBadge = (t.price_verified === true) - ? '✓ Verified' : ''; - var fullyBadge = (t.fully_verified === true) - ? '★ 100%' : ''; - var foUrl = t.product_page_url - ? 'Shop ↗' : ''; - ch += '
' - + '' + esc(t.part_number || t.standard_name || t.slug) + '' - + '' + esc(t.reach_label || '') + priceStr + '' - + fullyBadge + verBadge + foUrl + ch += '
' + + '' + esc(t.part_number || t.standard_name) + '' + + '' + esc(t.vendor_name || '') + '' + + (priceStr ? '' + priceStr + '' : '') + '
'; }); - if (items.length > 8) ch += '
+' + (items.length - 8) + ' more Flexoptix options
'; + if (items.length > 6) ch += '
+' + (items.length - 6) + ' more
'; ch += '
'; }); - - // Form-factor matches (spec_match) — collapsed summary - if (foSpecMatch.length > 0) { - ch += '
' - + '+ ' + foSpecMatch.length + ' more by form factor match' - + '
' - + foSpecMatch.slice(0, 20).map(function(t) { - return '' - + esc(t.standard_name || t.part_number) + ''; - }).join('') - + (foSpecMatch.length > 20 ? '+' + (foSpecMatch.length - 20) + ' more' : '') - + '
'; - } } - // ── ALL COMPATIBLE (other vendors) ──────────────────────────────────── - if (otherList.length > 0) { - var verifiedOthers = otherList.filter(function(t) { return t.verification_method === 'vendor_matrix'; }); - var specOthers = otherList.filter(function(t) { return t.verification_method !== 'vendor_matrix'; }); - - ch += '
Competitor Transceivers ' + otherList.length + '' - + (verifiedOthers.length > 0 ? '(' + verifiedOthers.length + ' vendor-tested)' : '') + '
'; - - // Show vendor-tested ones with price info - if (verifiedOthers.length > 0) { - var groups = {}; - verifiedOthers.forEach(function(t) { - var key = (t.form_factor || '?') + ' ' + (t.speed || '?'); - if (!groups[key]) groups[key] = []; - groups[key].push(t); - }); - Object.keys(groups).sort().forEach(function(key) { - var items = groups[key]; - ch += '
' + esc(key) + ' (' + items.length + ')
'; - ch += '
'; - items.slice(0, 6).forEach(function(t) { - var priceStr = ''; - if (t.latest_price) { - var _pAmt = parseFloat(t.latest_price); - var _pCur = (t.latest_currency || 'USD').toUpperCase(); - var _pUSD = toUSD(_pAmt, _pCur); - priceStr = _pUSD !== null ? fmtUSD(_pUSD) : _pCur + ' ' + _pAmt.toFixed(2); - } - ch += '
' - + '' + esc(t.part_number || t.standard_name) + '' - + '' + esc(t.vendor_name || '') + '' - + (priceStr ? '' + priceStr + '' : '') - + '
'; - }); - if (items.length > 6) ch += '
+' + (items.length - 6) + ' more
'; - ch += '
'; - }); - } - - // Show spec-match ones as compact chips - if (specOthers.length > 0) { - ch += '
'; - ch += '
Form factor compatible
'; - ch += '
'; - specOthers.slice(0, 20).forEach(function(t) { - var fullyBadge = (t.fully_verified === true) ? '★ ' : ''; - ch += '' - + fullyBadge + esc(t.standard_name || t.slug || t.part_number) + ''; - }); - if (specOthers.length > 20) ch += '+' + (specOthers.length - 20) + ' more'; - ch += '
'; - } + // Spec-match as compact chips + if (specOthers.length > 0) { + ch += '
'; + ch += '
Form factor compatible
'; + ch += '
'; + specOthers.slice(0, 20).forEach(function(t) { + var fullyBadge = (t.fully_verified === true) ? '★ ' : ''; + ch += '' + + fullyBadge + esc(t.standard_name || t.slug || t.part_number) + ''; + }); + if (specOthers.length > 20) ch += '+' + (specOthers.length - 20) + ' more'; + ch += '
'; } el('panel-content').insertAdjacentHTML('beforeend', ch); diff --git a/packages/scraper/src/scrapers/switch-image-fetcher.ts b/packages/scraper/src/scrapers/switch-image-fetcher.ts index ccd5603..4f96917 100644 --- a/packages/scraper/src/scrapers/switch-image-fetcher.ts +++ b/packages/scraper/src/scrapers/switch-image-fetcher.ts @@ -40,7 +40,6 @@ function buildCiscoUrl(model: string): string | null { const slug = m.replace("N9K-C", "").toLowerCase().replace(/[^a-z0-9]/g, "-"); return `https://www.cisco.com/c/en/us/products/switches/nexus-${slug}-switch/index.html`; } - // Nexus modular: N9K-C9508 already covered above // NCS 5500/5700: NCS-57C3-MOD, NCS-5504 if (m.startsWith("NCS-")) { const num = m.replace("NCS-", "").toLowerCase().replace(/[^a-z0-9]/g, "-"); @@ -51,9 +50,20 @@ function buildCiscoUrl(model: string): string | null { const slug = m.toLowerCase().replace(/[^a-z0-9]/g, "-"); return `https://www.cisco.com/c/en/us/products/switches/catalyst-${slug}/index.html`; } + // Cisco 8000 SP series: 8101-32FH, 8202-32FH, 8608 + if (/^8[0-9]{3}/.test(m)) { + const slug = m.toLowerCase().replace(/[^a-z0-9]/g, "-"); + return `https://www.cisco.com/c/en/us/products/routers/8000-series-routers/${slug}/index.html`; + } return null; } +function buildAlcatelLucentUrl(model: string): string | null { + // OmniSwitch 6900-X72, OmniSwitch 9900-C32D + const slug = model.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, ""); + return `https://www.al-enterprise.com/en/products/switches/${slug}`; +} + function buildAristaUrl(model: string): string | null { // 7060X6-64PE → https://www.arista.com/en/products/7060x6-series/7060cx6-64pe // 7050CX3-32S → https://www.arista.com/en/products/7050x3-series/7050cx3-32s @@ -147,52 +157,111 @@ function buildAsterfusionUrl(model: string): string | null { // ── URL dispatcher by vendor slug ─────────────────────────────────────────── const URL_BUILDERS: Record string | null> = { - cisco: buildCiscoUrl, - arista: buildAristaUrl, - juniper: buildJuniperUrl, - "nvidia-networking": buildNvidiaUrl, - edgecore: buildEdgecoreUrl, - celestica: buildCelesticaUrl, - asterfusion: buildAsterfusionUrl, - dell: buildDellUrl, - "hpe-aruba": buildHpeArubaUrl, - huawei: buildHuaweiUrl, - nokia: buildNobelUrl, - extreme: buildExtremeUrl, - mikrotik: buildMikroTikUrl, - ubiquiti: buildUbiquitiUrl, - "fs-com": buildFsComUrl, - supermicro: buildSupermicroUrl, - wistron: (_m) => null, // no public product pages + cisco: buildCiscoUrl, + arista: buildAristaUrl, + juniper: buildJuniperUrl, + "nvidia-networking": buildNvidiaUrl, + edgecore: buildEdgecoreUrl, + celestica: buildCelesticaUrl, + asterfusion: buildAsterfusionUrl, + dell: buildDellUrl, + "hpe-aruba": buildHpeArubaUrl, + huawei: buildHuaweiUrl, + nokia: buildNobelUrl, + extreme: buildExtremeUrl, + mikrotik: buildMikroTikUrl, + ubiquiti: buildUbiquitiUrl, + "fs-com": buildFsComUrl, + supermicro: buildSupermicroUrl, + "alcatel-lucent": buildAlcatelLucentUrl, + "ale": buildAlcatelLucentUrl, + wistron: (_m) => null, // no public product pages }; +// ── Generic marketing image detector ──────────────────────────────────────── +// Rejects URLs that are clearly stock photos, homepages, lifestyle shots or +// any other non-product image. Patterns found from real-world scrapes. + +const GENERIC_IMAGE_PATTERNS: RegExp[] = [ + // ── Logo / brand marks (never product photos) ──────────────────────────── + /[-/_]logo[-_.]|\/logos?\//i, + /cisco[-_]?logo/i, + /juniper[-_]networks[-_]logo/i, + /arista[-_]?logo/i, + /brand[-_]?logo/i, + /company[-_]?logo/i, + // SVG logos often have these in path + /\/svg\//i, + /\.svg(\?|$)/i, + // ── Alcatel-Lucent Enterprise generic hero images ──────────────────────── + /naas-homepag/i, + /al-enterprise.*\/images\/naas/i, + // ── Generic OG / social sharing defaults ───────────────────────────────── + /og[-_]default/i, + /default[-_](?:og|social|share|image)/i, + /site[-_](?:default|image|og)/i, + /social[-_](?:default|share)/i, + /twitter[-_]default/i, + /default[-_]thumbnail/i, + // ── Homepage / banner / lifestyle ──────────────────────────────────────── + /\/homepage\//i, + /hero[-_](?:banner|bg|background|image)/i, + /banner[-_](?:bg|background)/i, + /lifestyle/i, + /stock[-_]?photo/i, + /people[-_](?:at|in|with)/i, + // ── Placeholder / fallback ──────────────────────────────────────────────── + /placeholder/i, + /no[-_]?image/i, + /image[-_]?not[-_]?found/i, + /\/fallback[/-]/i, + /missing[-_]image/i, + // ── Generic about/press/brand pages ────────────────────────────────────── + /\/press[-_]kit/i, + /\/media[-_]kit/i, +]; + +function isGenericImage(url: string): boolean { + return GENERIC_IMAGE_PATTERNS.some((re) => re.test(url)); +} + // ── og:image extractor ────────────────────────────────────────────────────── function extractOgImage(html: string, baseUrl: string): string | null { + const resolve = (url: string): string | null => { + if (!url) return null; + let abs = url; + if (url.startsWith("/")) { + try { abs = new URL(url, baseUrl).toString(); } catch { return null; } + } + if (!abs.startsWith("http")) return null; + if (isGenericImage(abs)) return null; // ← reject logos/marketing images + return abs; + }; + // Primary: og:image const ogM = html.match(/ with product hint - const imgM = html.match(/]+src="([^"]+(?:product|hero|switch|router)[^"]*\.(?:jpg|jpeg|png|webp))"/i); + // Fallback: large product image in with product keyword in path + const imgM = html.match(/]+src="([^"]+(?:product|switch|router|hardware)[^"]*\.(?:jpg|jpeg|png|webp))"/i); if (imgM?.[1]) { - try { - const abs = new URL(imgM[1], baseUrl).toString(); - if (abs.startsWith("http")) return abs; - } catch { /* ignore */ } + const resolved = resolve(imgM[1]); + if (resolved) return resolved; } return null; diff --git a/sql/043-clear-generic-switch-images.sql b/sql/043-clear-generic-switch-images.sql new file mode 100644 index 0000000..1969ffd --- /dev/null +++ b/sql/043-clear-generic-switch-images.sql @@ -0,0 +1,29 @@ +-- Migration 043 — Clear generic / logo images from switches +-- Removes image_url entries that are clearly logos or marketing photos, +-- not actual product hardware images. The scraper will re-fetch with the +-- improved isGenericImage() filter on the next daily run. + +UPDATE switches +SET image_url = NULL, assets_scraped_at = NULL +WHERE image_url IS NOT NULL + AND ( + -- Generic logos (Cisco, vendor brand marks) + image_url ILIKE '%logo%' + OR image_url ILIKE '%.svg' + -- Alcatel-Lucent Enterprise lifestyle / naas hero images + OR image_url ILIKE '%naas-homepag%' + OR image_url ILIKE '%al-enterprise%naas%' + -- Generic OG defaults + OR image_url ILIKE '%og-default%' + OR image_url ILIKE '%default-social%' + OR image_url ILIKE '%og_default%' + -- Placeholder/fallback images + OR image_url ILIKE '%placeholder%' + OR image_url ILIKE '%no-image%' + OR image_url ILIKE '%noimage%' + -- Generic homepage images + OR image_url ILIKE '%/homepage/%' + -- Lifestyle / stock photos (common CDN path patterns) + OR image_url ILIKE '%lifestyle%' + OR image_url ILIKE '%stock-photo%' + );