From 1c8dec52c9ae3ec18dd23b56ee569758912e0a14 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 18 Apr 2026 01:02:08 +0200 Subject: [PATCH] feat: Price Comparison dashboard + Eoptolink OEM scraper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add public /api/price-comparison API (summary, top-50, per-SKU detail) β€” no auth required, 3 Express routes, DISTINCT ON latest-price logic - Add 'πŸ’² Price Comparison' dashboard tab: stat cards, form-factor breakdown, top-50 SKU table (clickable rows β†’ SKU detail), per-vendor price + stock + spread% lookup panel - Add Eoptolink OEM catalog scraper (93 product-solution pages, part-number regex EOLO-*/EOLQ-* etc., no prices, seeds transceivers table as manufacturer entries) - Register scrape:catalog:eoptolink in scheduler: schedule every 4h (40 */4 * * *), lazy-import worker, added to known-jobs array --- CHANGELOG_PENDING.md | 3 + packages/api/src/index.ts | 3 + packages/api/src/routes/price-comparison.ts | 262 +++++++++++++++++ packages/dashboard/index.html | 303 +++++++++++++++++++- packages/scraper/src/scheduler.ts | 8 + packages/scraper/src/scrapers/eoptolink.ts | 237 +++++++++++++++ 6 files changed, 809 insertions(+), 7 deletions(-) create mode 100644 packages/api/src/routes/price-comparison.ts create mode 100644 packages/scraper/src/scrapers/eoptolink.ts diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 2187ea6..10c5002 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -3,6 +3,9 @@ Format: `{"d":"YYYY-MM-DD","t":"TYPE","m":"Description"}` Types: FEAT Β· FIX Β· UI Β· DATA Β· AI Β· INFRA +{"d":"2026-04-18","t":"FEAT","m":"Price Comparison Dashboard: public /api/price-comparison (summary, list top-50 SKUs by vendor coverage, per-SKU detail). Express Router, no auth required. New 'πŸ’² Price Comparison' dashboard tab with stat cards, form-factor breakdown table, top-50 SKU table (clickable rows), and SKU detail lookup with per-vendor prices + stock + spread %."} +{"d":"2026-04-18","t":"DATA","m":"Eoptolink OEM catalog scraper: harvests 93 product-solution pages from eoptolink.com, extracts part numbers (EOLO-*/EOLQ-* format), seeds transceivers table as manufacturer=Eoptolink entries with form_factor/speed/fiber/category. No prices (B2B OEM). Scheduled every 4h (40 */4 * * *)."} +{"d":"2026-04-18","t":"FIX","m":"stock_observations repopulated after TRUNCATE: storage-fs/request_queues/default/ directory re-created on Erik; NADDOD scraper manual-triggered; 4+ prices confirmed written within 20s."} {"d":"2026-04-17","t":"FEAT","m":"MCP Server v0.2.0: wired finder.ts (find_flexoptix_for_switch, get_competitor_alerts), switch-docs (get_switch_docs, get_switch_image), analyze_market_with_llm (qwen2.5:14b via Ollama, enriched with live hype cycle + pricing + news), generate_blog_post (fo-blog-v5 fine-tuned model with qwen2.5:14b fallback + live pricing enrichment). OLLAMA_BASE_URL env var for Ollama endpoint."} {"d":"2026-04-17","t":"UI","m":"Stock dashboard: 6th stat card (Multi-Vendor SKUs), confidence quality badge column in vendor breakdown (🟒 L3 per-warehouse / 🟑 L2 aggregated / βšͺ L1 boolean), new Multi-Vendor Price Comparison table with min/max/avg per SKU. Subtitle updated to mention QSFPTEK + NADDOD."} {"d":"2026-04-17","t":"FEAT","m":"/api/stock/summary enhanced: vendor_breakdown adds avg_confidence + currencies + confidence breakdown (conf_per_warehouse/aggregated/boolean); new price_comparison endpoint (top 50 SKUs tracked by 2+ vendors with price spread); totals adds multi_vendor_skus count."} diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index 9b68a60..6c4cd61 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -30,6 +30,7 @@ import { newsRouter } from "./routes/news"; import { proxyRouter } from "./routes/proxy"; import { reviewRouter } from "./routes/review"; import { stockRouter } from "./routes/stock"; +import { priceComparisonRouter } from "./routes/price-comparison"; const app = express(); @@ -61,6 +62,7 @@ app.use("/api", (req, res, next) => { if (req.path.startsWith("/health") || req.path.startsWith("/auth")) return next(); if (req.path.startsWith("/proxy")) return next(); if (req.path.startsWith("/hot-topics")) return next(); + if (req.path.startsWith("/price-comparison")) return next(); requireAuth(req, res, next); }); @@ -88,6 +90,7 @@ app.use("/api/changelog", changelogRouter); app.use("/api/news", newsRouter); app.use("/api/review", reviewRouter); app.use("/api/stock", stockRouter); +app.use("/api/price-comparison", priceComparisonRouter); // Dashboard (static HTML) app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard"))); diff --git a/packages/api/src/routes/price-comparison.ts b/packages/api/src/routes/price-comparison.ts new file mode 100644 index 0000000..3b05690 --- /dev/null +++ b/packages/api/src/routes/price-comparison.ts @@ -0,0 +1,262 @@ +/** + * Price Comparison Dashboard β€” Public API + * + * Public-facing endpoints powering the "Octopart for optical transceivers" + * price comparison page. No authentication required. + * + * Routes: + * GET /api/price-comparison β€” Top 50 SKUs by vendor coverage + * GET /api/price-comparison/summary β€” Market-level aggregate stats + * GET /api/price-comparison/:sku β€” Per-SKU price breakdown + */ +import { Router, Request, Response } from "express"; +import { pool } from "../db/client"; + +export const priceComparisonRouter = Router(); + +// ─── GET /api/price-comparison/summary ─────────────────────────────────────── +// MUST be registered before /:sku to avoid route conflict +/** + * Market summary: + * - Total unique SKUs tracked + * - Total price observations + * - Number of active vendors + * - Average prices broken down by form_factor + */ +priceComparisonRouter.get("/summary", async (_req: Request, res: Response) => { + try { + const [overview, byFormFactor] = await Promise.all([ + // Overall market counts + pool.query(` + WITH latest AS ( + SELECT DISTINCT ON (po.transceiver_id, po.source_vendor_id) + po.transceiver_id, + po.source_vendor_id, + po.price, + po.currency + FROM price_observations po + ORDER BY po.transceiver_id, po.source_vendor_id, po.time DESC + ) + SELECT + COUNT(DISTINCT l.transceiver_id) AS total_skus_tracked, + (SELECT COUNT(*) FROM price_observations)::bigint AS total_observations, + COUNT(DISTINCT l.source_vendor_id) AS active_vendor_count, + ROUND(AVG(l.price)::numeric, 2) AS overall_avg_price + FROM latest l + `), + + // Avg price per form_factor (using latest price per vendor per transceiver) + pool.query(` + WITH latest AS ( + SELECT DISTINCT ON (po.transceiver_id, po.source_vendor_id) + po.transceiver_id, + po.source_vendor_id, + po.price, + po.currency + FROM price_observations po + ORDER BY po.transceiver_id, po.source_vendor_id, po.time DESC + ) + SELECT + t.form_factor, + COUNT(DISTINCT l.transceiver_id) AS sku_count, + COUNT(DISTINCT l.source_vendor_id) AS vendor_count, + ROUND(MIN(l.price)::numeric, 2) AS min_price, + ROUND(MAX(l.price)::numeric, 2) AS max_price, + ROUND(AVG(l.price)::numeric, 2) AS avg_price, + -- Most common currency for this form factor + ( + SELECT currency + FROM price_observations po2 + JOIN transceivers t2 ON t2.id = po2.transceiver_id + WHERE t2.form_factor = t.form_factor + GROUP BY currency + ORDER BY COUNT(*) DESC + LIMIT 1 + ) AS currency + FROM latest l + JOIN transceivers t ON t.id = l.transceiver_id + GROUP BY t.form_factor + ORDER BY sku_count DESC + `), + ]); + + res.json({ + success: true, + data: { + ...overview.rows[0], + by_form_factor: byFormFactor.rows, + }, + }); + } catch (err) { + console.error("GET /api/price-comparison/summary error:", err); + res.status(500).json({ success: false, error: "Internal server error" }); + } +}); + +// ─── GET /api/price-comparison ─────────────────────────────────────────────── +/** + * Top 50 transceivers ranked by number of vendors tracking them. + * Shows price spread across vendors β€” the more vendors, the better the comparison. + */ +priceComparisonRouter.get("/", async (_req: Request, res: Response) => { + try { + const result = await pool.query(` + WITH latest AS ( + SELECT DISTINCT ON (po.transceiver_id, po.source_vendor_id) + po.transceiver_id, + po.source_vendor_id, + po.price, + po.currency + FROM price_observations po + ORDER BY po.transceiver_id, po.source_vendor_id, po.time DESC + ) + SELECT + t.form_factor, + t.speed, + t.standard_name, + COUNT(DISTINCT l.source_vendor_id) AS vendor_count, + ROUND(MIN(l.price)::numeric, 2) AS min_price, + ROUND(MAX(l.price)::numeric, 2) AS max_price, + ROUND(AVG(l.price)::numeric, 2) AS avg_price, + -- Dominant currency (most common for this SKU) + ( + SELECT currency + FROM latest l2 + WHERE l2.transceiver_id = t.id + GROUP BY currency + ORDER BY COUNT(*) DESC + LIMIT 1 + ) AS currency, + ROUND( + ((MAX(l.price) - MIN(l.price)) / NULLIF(MIN(l.price), 0) * 100)::numeric, + 1 + ) AS spread_pct + FROM latest l + JOIN transceivers t ON t.id = l.transceiver_id + GROUP BY t.id, t.form_factor, t.speed, t.standard_name + ORDER BY vendor_count DESC, avg_price ASC + LIMIT 50 + `); + + res.json({ + success: true, + data: result.rows, + }); + } catch (err) { + console.error("GET /api/price-comparison error:", err); + res.status(500).json({ success: false, error: "Internal server error" }); + } +}); + +// ─── GET /api/price-comparison/:sku ───────────────────────────────────────── +/** + * Full price breakdown for a single SKU. + * :sku matches against standard_name OR part_number (case-insensitive ILIKE). + * Returns per-vendor prices, stock status, and aggregate stats. + */ +priceComparisonRouter.get("/:sku", async (req: Request, res: Response) => { + try { + const sku = String(req.params.sku).trim(); + + // Resolve transceiver + const transceiverResult = await pool.query( + `SELECT id, standard_name, form_factor, speed, reach_label, fiber_type, part_number + FROM transceivers + WHERE standard_name ILIKE $1 OR part_number ILIKE $1 + LIMIT 1`, + [sku] + ); + + if (transceiverResult.rows.length === 0) { + res.status(404).json({ success: false, error: "Transceiver not found" }); + return; + } + + const transceiver = transceiverResult.rows[0]; + + // Latest price per vendor + stock level from stock_observations (if available) + const pricesResult = await pool.query(` + SELECT + v.name AS vendor, + po.price, + po.currency, + po.stock_level, + -- Prefer stock_observations for latest stock info + COALESCE( + ( + SELECT so.stock_level + FROM stock_observations so + WHERE so.transceiver_id = po.transceiver_id + AND so.source_vendor_id = po.source_vendor_id + ORDER BY so.time DESC + LIMIT 1 + ), + po.stock_level + ) AS stock_level, + -- Build product URL: use vendor search_url_template if no direct url + COALESCE( + v.search_url_template, + v.website_url + ) AS url, + po.time AS observed_at + FROM ( + SELECT DISTINCT ON (po.transceiver_id, po.source_vendor_id) + po.transceiver_id, + po.source_vendor_id, + po.price, + po.currency, + po.stock_level, + po.time + FROM price_observations po + WHERE po.transceiver_id = $1 + ORDER BY po.transceiver_id, po.source_vendor_id, po.time DESC + ) po + JOIN vendors v ON v.id = po.source_vendor_id + ORDER BY po.price ASC + `, [transceiver.id]); + + const prices = pricesResult.rows; + + // Compute aggregate stats + const priceValues = prices.map((r) => parseFloat(r.price)).filter((v) => Number.isFinite(v)); + + let stats: Record = { + vendor_count: prices.length, + min: null, + max: null, + avg: null, + spread_pct: null, + }; + + if (priceValues.length > 0) { + const min = Math.min(...priceValues); + const max = Math.max(...priceValues); + const avg = priceValues.reduce((a, b) => a + b, 0) / priceValues.length; + const spreadPct = min > 0 ? Math.round(((max - min) / min) * 1000) / 10 : null; + + stats = { + vendor_count: prices.length, + min: Math.round(min * 100) / 100, + max: Math.round(max * 100) / 100, + avg: Math.round(avg * 100) / 100, + spread_pct: spreadPct, + }; + } + + res.json({ + success: true, + transceiver: { + standard_name: transceiver.standard_name, + form_factor: transceiver.form_factor, + speed: transceiver.speed, + reach_label: transceiver.reach_label, + fiber_type: transceiver.fiber_type, + }, + prices, + stats, + }); + } catch (err) { + console.error("GET /api/price-comparison/:sku error:", err); + res.status(500).json({ success: false, error: "Internal server error" }); + } +}); diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index adf15e3..a6c1ec4 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -803,6 +803,7 @@
🌐 Network
✎ Review
🏭 Stock
+
πŸ’² Price Comparison
@@ -902,6 +903,7 @@
Norton-Bass Multigenerational Diffusion Model — click any technology for details
+ Innovation Peak Trough @@ -921,6 +923,8 @@ Adoption Peak To Plateau + OEM ASP + RΒ² @@ -1844,6 +1848,100 @@
+ + + @@ -2223,6 +2321,7 @@ function goToTab(tabName) { if (tabName === 'network') loadProxyNetwork(); if (tabName === 'review') loadReview(); if (tabName === 'stock') loadStock(); + if (tabName === 'prices') loadPriceComparison(); } document.querySelectorAll('.tab').forEach(function(tab) { @@ -2775,16 +2874,63 @@ async function loadRegionalData(techs) { buildDOM(body, html); } +// Map DB snake_case phase β†’ UI display label +var DB_PHASE_LABEL = { + 'innovation_trigger': 'Innovation Trigger', + 'peak_inflated_expectations': 'Peak of Inflated Expectations', + 'trough_disillusionment': 'Trough of Disillusionment', + 'slope_enlightenment': 'Slope of Enlightenment', + 'plateau_productivity': 'Plateau of Productivity' +}; + async function loadHypeCycle() { - // Use enriched endpoint for forecast data - var data; + var techs = [], dataSource = 'static'; + + // 1) Try DB-fitted Bass model results (freshest β€” computed daily 04:30) try { - data = await api('/api/hype-cycle/enriched'); - } catch(e) { - data = await api('/api/hype-cycle'); + var dbRes = await api('/api/hype-cycle/analysis'); + if (dbRes.success && Array.isArray(dbRes.data) && dbRes.data.length > 0) { + var now = new Date().getFullYear(); + techs = dbRes.data.map(function(r) { + var phaseLabel = DB_PHASE_LABEL[r.hype_phase] || r.hype_phase; + // Estimate years to plateau: rough heuristic from phase + years_to_next_phase + var phasesLeft = { innovation_trigger:4, peak_inflated_expectations:3, trough_disillusionment:2, slope_enlightenment:1, plateau_productivity:0 }; + var ytp = (phasesLeft[r.hype_phase] || 0) * (r.years_to_next_phase || 2); + return { + technology: r.technology, + phase: phaseLabel, + positionPct: Math.round(r.hype_score || 0), + adoptionPct: Math.round((r.current_share || 0) * 100), + peakYear: r.t_peak_year ? Math.round(r.t_peak_year) : null, + yearsToPlateauFromNow: ytp > 0 ? Math.round(ytp) : null, + // extra DB fields for tooltip + aspCurrentUsd: r.asp_current_usd, + aspDecline3y: r.asp_decline_pct_3y, + rSquared: r.r_squared, + computedAt: r.computed_at + }; + }); + dataSource = 'db'; + var computedAt = dbRes.data[0] && dbRes.data[0].computed_at ? new Date(dbRes.data[0].computed_at).toLocaleDateString() : ''; + el('hype-year').textContent = now + (computedAt ? ' Β· computed ' + computedAt : ''); + } + } catch(e) { /* fall through to static */ } + + // 2) Fallback: static enriched/base endpoint + if (techs.length === 0) { + var data; + try { + data = await api('/api/hype-cycle/enriched'); + } catch(e) { + data = await api('/api/hype-cycle'); + } + techs = data.technologies || []; + el('hype-year').textContent = data.year; } - var techs = data.technologies || []; - el('hype-year').textContent = data.year; + + // Badge showing data source + var srcBadge = el('hype-data-source'); + if (srcBadge) srcBadge.textContent = dataSource === 'db' ? '● Live DB' : '● Static'; var c = el('hype-svg-container'); buildDOM(c, renderHypeSvg(techs)); @@ -2803,6 +2949,8 @@ async function loadHypeCycle() { + '' + adoptionDisplay + '' + '' + esc(t.peakYear || 'β€”') + '' + '' + (t.yearsToPlateauFromNow != null ? t.yearsToPlateauFromNow + 'y' : 'β€”') + '' + + '' + (t.aspCurrentUsd != null ? '$' + Number(t.aspCurrentUsd).toLocaleString() : 'β€”') + '' + + '' + (t.rSquared != null ? Number(t.rSquared).toFixed(2) : 'β€”') + '' + ''; }).join('')); @@ -6533,6 +6681,147 @@ async function lookupStock() { resultEl.textContent = 'Error: ' + e.message; } } + +// ── Price Comparison ────────────────────────────────────────────────────────── + +var pricesLoaded = false; + +async function loadPriceComparison() { + if (pricesLoaded) return; + try { + // Load summary + top SKUs in parallel + var [sumData, listData] = await Promise.all([ + api('/api/price-comparison/summary'), + api('/api/price-comparison') + ]); + + // ── Stat cards ────────────────────────────────────────────────────────── + if (sumData.success && sumData.data) { + var s = sumData.data; + setEl('pc-stat-skus', Number(s.total_skus_tracked || 0).toLocaleString()); + setEl('pc-stat-vendors', Number(s.active_vendor_count || 0).toLocaleString()); + setEl('pc-stat-obs', Number(s.total_observations || 0).toLocaleString()); + setEl('pc-stat-avg', s.overall_avg_price != null + ? 'USD ' + Number(s.overall_avg_price).toLocaleString('en-US', {minimumFractionDigits:2, maximumFractionDigits:2}) + : 'β€”'); + + // ── Form factor table ───────────────────────────────────────────────── + var ffBody = el('pc-ff-body'); + if (ffBody) { + var ffs = s.by_form_factor || []; + if (ffs.length === 0) { + ffBody.innerHTML = 'No data yet'; + } else { + ffBody.innerHTML = ffs.map(function(r) { + var cur = r.currency || 'USD'; + return '' + + '' + esc(r.form_factor || 'β€”') + '' + + '' + Number(r.sku_count).toLocaleString() + '' + + '' + Number(r.vendor_count).toLocaleString() + '' + + '' + (r.min_price != null ? cur + '\u00a0' + Number(r.min_price).toFixed(2) : 'β€”') + '' + + '' + (r.avg_price != null ? cur + '\u00a0' + Number(r.avg_price).toFixed(2) : 'β€”') + '' + + '' + (r.max_price != null ? cur + '\u00a0' + Number(r.max_price).toFixed(2) : 'β€”') + '' + + ''; + }).join(''); + } + } + } + + // ── Top SKUs table ──────────────────────────────────────────────────────── + var topBody = el('pc-top-body'); + if (topBody && listData.success && Array.isArray(listData.data)) { + var rows = listData.data; + if (rows.length === 0) { + topBody.innerHTML = 'No price data yet β€” waiting for first scrape run'; + } else { + topBody.innerHTML = rows.map(function(r) { + var spread = r.spread_pct != null ? Number(r.spread_pct).toFixed(1) + '%' : 'β€”'; + var spreadColor = r.spread_pct != null && r.spread_pct > 30 ? '#f87171' : r.spread_pct > 10 ? '#f59e0b' : '#22c55e'; + var cur = r.currency || 'USD'; + return '' + + '' + esc(r.standard_name || 'β€”') + '' + + '' + esc(r.form_factor || 'β€”') + '' + + '' + esc(r.speed || 'β€”') + '' + + '' + r.vendor_count + '' + + '' + (r.min_price != null ? cur + '\u00a0' + Number(r.min_price).toFixed(2) : 'β€”') + '' + + '' + (r.avg_price != null ? cur + '\u00a0' + Number(r.avg_price).toFixed(2) : 'β€”') + '' + + '' + spread + '' + + ''; + }).join(''); + } + } + + pricesLoaded = true; + } catch(e) { + console.error('loadPriceComparison error', e); + } +} + +async function lookupPriceComparison() { + var input = el('pc-lookup-input'); + var resultEl = el('pc-lookup-result'); + if (!input || !resultEl) return; + var q = (input.value || '').trim(); + if (!q) return; + resultEl.innerHTML = 'Looking up…'; + try { + var data = await api('/api/price-comparison/' + encodeURIComponent(q)); + if (!data.success || !data.transceiver) { + resultEl.textContent = 'Not found: ' + q; + return; + } + var tx = data.transceiver; + var stats = data.stats || {}; + var prices = data.prices || []; + var cur = (prices[0] && prices[0].currency) ? prices[0].currency : 'USD'; + + var statsHtml = '
' + + '' + esc(tx.standard_name) + '' + + '  Β·  ' + esc(tx.form_factor || '') + ' ' + esc(tx.speed || '') + + (tx.fiber_type ? '  Β·  ' + esc(tx.fiber_type) : '') + + (tx.reach_label ? '  Β·  ' + esc(tx.reach_label) : '') + + '
' + + '
' + + 'πŸ“Š ' + stats.vendor_count + ' vendors' + + (stats.min != null ? 'Min: ' + cur + '\u00a0' + Number(stats.min).toFixed(2) + '' : '') + + (stats.avg != null ? 'Avg: ' + cur + '\u00a0' + Number(stats.avg).toFixed(2) + '' : '') + + (stats.max != null ? 'Max: ' + cur + '\u00a0' + Number(stats.max).toFixed(2) + '' : '') + + (stats.spread_pct != null ? 'Spread: ' + Number(stats.spread_pct).toFixed(1) + '%' : '') + + '
'; + + var tableHtml = ''; + if (prices.length > 0) { + tableHtml = '' + + '' + + '' + + '' + + '' + + '' + + '' + + prices.map(function(p, i) { + var stock = p.stock_level || 'β€”'; + var stockColor = /in.stock|available/i.test(stock) ? '#22c55e' : /out|unavail/i.test(stock) ? '#f87171' : 'var(--text-dim)'; + var vendorHtml = p.url + ? '' + esc(p.vendor) + '' + : esc(p.vendor); + var rowBg = i % 2 === 0 ? '' : 'background:var(--surface2)'; + return '' + + '' + + '' + + '' + + '' + + ''; + }).join('') + + '
VendorPriceStockObserved
' + vendorHtml + '' + (p.price != null ? esc(p.currency || cur) + '\u00a0' + Number(p.price).toFixed(2) : 'β€”') + '' + esc(stock) + '' + (p.observed_at ? new Date(p.observed_at).toLocaleDateString('en-US') : 'β€”') + '
'; + } else { + tableHtml = '

No price observations found.

'; + } + + resultEl.innerHTML = '
' + statsHtml + tableHtml + '
'; + } catch(e) { + resultEl.textContent = 'Error: ' + e.message; + } +} diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 6365dd5..7e56bf2 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -94,6 +94,7 @@ export async function registerSchedules(boss: PgBoss): Promise { // ── Manufacturer catalogs (every 8h, no prices) ──────────────────── "scrape:catalog:smartoptics", "scrape:catalog:hubersuhner", + "scrape:catalog:eoptolink", // ── Vendor lists ─────────────────────────────────────────────────── "scrape:vendors:flexoptix", "scrape:vendors:flexoptix-supported", @@ -217,6 +218,7 @@ export async function registerSchedules(boss: PgBoss): Promise { await boss.schedule("scrape:catalog:smartoptics", "10 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:catalog:hubersuhner", "25 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); + await boss.schedule("scrape:catalog:eoptolink", "40 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); // ══════════════════════════════════════════════════════════════════════ // VENDOR LISTS β€” every 12h @@ -450,6 +452,12 @@ export async function registerWorkers(boss: PgBoss): Promise { await scrapeHuberSuhner(); }); + await boss.work("scrape:catalog:eoptolink", async () => { + console.log(`[${new Date().toISOString()}] Running: Eoptolink OEM catalog`); + const { scrapeEoptolink } = await import("./scrapers/eoptolink"); + await scrapeEoptolink(); + }); + // ── Vendor lists ────────────────────────────────────────────────────── await boss.work("scrape:vendors:flexoptix", async () => { diff --git a/packages/scraper/src/scrapers/eoptolink.ts b/packages/scraper/src/scrapers/eoptolink.ts new file mode 100644 index 0000000..8c75726 --- /dev/null +++ b/packages/scraper/src/scrapers/eoptolink.ts @@ -0,0 +1,237 @@ +/** + * Eoptolink Manufacturer Catalog Scraper + * + * Source: www.eoptolink.com β€” One of China's top-3 optical transceiver OEMs. + * (Finisar competitor, supplies tier-1 cloud hyperscalers) + * Target: Discover all product families + part numbers, seed transceivers table + * as manufacturer=Eoptolink entries. + * + * Strategy: + * Phase 1: Fetch homepage β†’ extract all /product-solutions/* category URLs (β‰ˆ90) + * Phase 2: Fetch each category page β†’ parse product name + Eoptolink part numbers + * (format: E[A-Z]{2,5}-\d{2,4}[A-Z0-9-]*) + * + * Note: Eoptolink does NOT publish retail prices (B2B OEM manufacturer). + * This scraper adds manufacturer catalog entries β€” no price_observations. + * + * Rate limit: 1 req/2s β€” polite crawl of OEM's website. + */ + +import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db"; + +const BASE = "https://www.eoptolink.com"; +const HEADERS = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", +}; +const DELAY_MS = 2000; + +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + +async function fetchHtml(url: string): Promise { + const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20_000) }); + if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); + return resp.text(); +} + +// ── Classification helpers ────────────────────────────────────────────────── + +function speedFromSlug(slug: string): { speed: string; speedGbps: number } { + if (/\b1\.?6t\b/i.test(slug)) return { speed: "1.6T", speedGbps: 1600 }; + if (/\b800g\b/i.test(slug)) return { speed: "800G", speedGbps: 800 }; + if (/\b400g\b/i.test(slug)) return { speed: "400G", speedGbps: 400 }; + if (/\b200g\b/i.test(slug)) return { speed: "200G", speedGbps: 200 }; + if (/\b100g\b/i.test(slug)) return { speed: "100G", speedGbps: 100 }; + if (/\b50g\b/i.test(slug)) return { speed: "50G", speedGbps: 50 }; + if (/\b40g\b/i.test(slug)) return { speed: "40G", speedGbps: 40 }; + if (/\b32g\b/i.test(slug)) return { speed: "32G", speedGbps: 32 }; + if (/\b25g\b/i.test(slug)) return { speed: "25G", speedGbps: 25 }; + if (/\b16g\b/i.test(slug)) return { speed: "16G", speedGbps: 16 }; + if (/\b10g\b/i.test(slug)) return { speed: "10G", speedGbps: 10 }; + if (/\b8g\b/i.test(slug)) return { speed: "8G", speedGbps: 8 }; + if (/\b4g\b/i.test(slug)) return { speed: "4G", speedGbps: 4 }; + if (/\b1g\b/i.test(slug)) return { speed: "1G", speedGbps: 1 }; + return { speed: "Unknown", speedGbps: 0 }; +} + +function formFactorFromText(text: string): string { + const t = text.toUpperCase(); + if (/\bOSFP\b/.test(t)) return "OSFP"; + if (/\bQSFP.?DD800\b|\bQSFP-DD800\b/.test(t)) return "QSFP-DD800"; + if (/\bQSFP.?DD\b/.test(t)) return "QSFP-DD"; + if (/\bQSFP56\b/.test(t)) return "QSFP56"; + if (/\bQSFP112\b/.test(t)) return "QSFP112"; + if (/\bQSFP28\b/.test(t)) return "QSFP28"; + if (/\bQSFP\+|\bQSFP PLUS\b/.test(t)) return "QSFP+"; + if (/\bSFP56.DD\b/.test(t)) return "SFP56-DD"; + if (/\bSFP56\b/.test(t)) return "SFP56"; + if (/\bSFP28\b/.test(t)) return "SFP28"; + if (/\bSFP\+|SFP-PLUS|SFP PLUS\b/.test(t)) return "SFP+"; + if (/\bXFP\b/.test(t)) return "XFP"; + if (/\bCFP4\b/.test(t)) return "CFP4"; + if (/\bCFP2\b/.test(t)) return "CFP2"; + if (/\bCFP\b/.test(t)) return "CFP"; + if (/\bSFP\b/.test(t)) return "SFP"; + return "SFP"; +} + +function fiberFromText(text: string): string { + const t = text.toLowerCase(); + if (/multimode|mmf|sr|om[1-5]/i.test(t)) return "MMF"; + if (/single.?mode|smf|lr|er|zr|fr|dr|bidi|cwdm|dwdm|coherent/i.test(t)) return "SMF"; + return "SMF"; // OEM products default to SMF +} + +function categoryFromText(text: string): string { + const t = text.toLowerCase(); + if (/coherent|zr|dpsk/.test(t)) return "Coherent"; + if (/dwdm/.test(t)) return "DWDM"; + if (/cwdm/.test(t)) return "CWDM"; + if (/aoc/.test(t)) return "AOC"; + if (/dac/.test(t)) return "DAC"; + if (/pon|gpon|gepon/.test(t)) return "PON"; + return "DataCenter"; +} + +// ── Phase 1: Discover product solution URLs ────────────────────────────────── + +async function fetchProductSolutionUrls(): Promise { + console.log(` Fetching Eoptolink homepage for product solution links...`); + const html = await fetchHtml(`${BASE}/`); + const links = html.match(/href="(\/product-solutions\/[^"#?]+)"/gi) ?? []; + const unique = [...new Set(links.map((l) => l.match(/href="([^"]+)"/)?.[1] ?? "").filter(Boolean))]; + // Skip OSA (optical sub-assemblies) and test-board entries β€” no transceiver catalog + const filtered = unique.filter((u) => + !u.includes("/osa/") && + !u.includes("/other/") && + !u.endsWith("/400g/") && + !u.endsWith("/800g/") && + !u.endsWith("/product-solutions/") + ); + console.log(` Found ${filtered.length} product solution pages`); + return filtered; +} + +// ── Phase 2: Parse product detail page ────────────────────────────────────── + +interface EoptolinkProduct { + pageTitle: string; + partNumbers: string[]; + speed: string; + speedGbps: number; + formFactor: string; + fiberType: string; + category: string; + pageUrl: string; +} + +function parseProductPage(html: string, pageUrl: string): EoptolinkProduct | null { + // Page title + const titleMatch = html.match(/([^<]+)/i) || html.match(/<h1[^>]*>([^<]{5,80})</i); + const pageTitle = (titleMatch?.[1] ?? "").replace(/\s*\|.*$/, "").replace(/[||]+[^||]*$/, "").trim(); + if (!pageTitle || pageTitle.length < 3) return null; + + // Eoptolink part numbers: format like EOLO-168HG-10-XDX, EOLQ-128HG-02-PX + const pnRegex = /E[A-Z]{2,5}-\d{2,3}[A-Z0-9]{1,3}(?:-\d{1,3})?(?:-[A-Z0-9]{1,6})*/g; + const partNumbers = [...new Set([...(html.matchAll(pnRegex) ?? [])].map((m) => m[0].trim()))]; + + const slug = pageUrl.split("/").slice(-2).join("-"); + const { speed, speedGbps } = speedFromSlug(slug + " " + pageTitle); + const formFactor = formFactorFromText(pageTitle + " " + slug); + const fiberType = fiberFromText(pageTitle + " " + slug); + const category = categoryFromText(pageTitle + " " + slug); + + return { pageTitle, partNumbers, speed, speedGbps, formFactor, fiberType, category, pageUrl }; +} + +// ── Main ──────────────────────────────────────────────────────────────────── + +export async function scrapeEoptolink(): Promise<void> { + console.log("=== Eoptolink Manufacturer Catalog Scraper ===\n"); + + const vendorId = await ensureVendor( + "Eoptolink", + "manufacturer", + "https://www.eoptolink.com", + "https://www.eoptolink.com/product-solutions/" + ); + console.log(` Vendor ID: ${vendorId}`); + + // Phase 1: Collect product solution URLs + let productUrls: string[]; + try { + productUrls = await fetchProductSolutionUrls(); + } catch (err) { + console.error(` Homepage fetch failed: ${(err as Error).message}`); + return; + } + + console.log(`\n[Phase 2] Fetching ${productUrls.length} product detail pages...\n`); + + let added = 0; + let skipped = 0; + let errors = 0; + + for (const relPath of productUrls) { + await sleep(DELAY_MS); + const url = `${BASE}${relPath}`; + try { + const html = await fetchHtml(url); + const product = parseProductPage(html, relPath); + if (!product || product.speedGbps === 0) { + skipped++; + continue; + } + + // Use page title as the primary product entry; also seed one row per part number + const namesToSeed: string[] = product.partNumbers.length > 0 + ? product.partNumbers.slice(0, 10) // max 10 part numbers per product family page + : [product.pageTitle]; + + for (const partNumber of namesToSeed) { + try { + await findOrCreateScrapedTransceiver({ + partNumber: partNumber.slice(0, 80), + vendorId, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + fiberType: product.fiberType, + category: product.category, + }); + added++; + } catch (dbErr) { + // Duplicate or constraint error β€” expected for re-runs + } + } + + console.log( + ` βœ“ ${product.pageTitle.padEnd(45)} ff=${product.formFactor.padEnd(8)} speed=${product.speed.padEnd(5)} pn=${product.partNumbers.length}` + ); + } catch (err: unknown) { + errors++; + if (errors <= 10) console.warn(` βœ— Error ${relPath}: ${(err as Error).message.slice(0, 60)}`); + } + } + + console.log(`\n=== Eoptolink Catalog Scraper Complete ===`); + console.log(` Pages processed: ${productUrls.length - errors}`); + console.log(` Transceivers seeded: ${added}`); + console.log(` Skipped (no speed): ${skipped}`); + console.log(` Errors: ${errors}`); +} + +// ── CLI ──────────────────────────────────────────────────────────────────── + +if (require.main === module) { + scrapeEoptolink() + .then(() => pool.end()) + .catch((err: unknown) => { + console.error("Fatal:", err); + pool.end(); + process.exit(1); + }); +}