diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index a3e8fab..fbb6d96 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -20,6 +20,7 @@ import { transportRouter } from "./routes/transport"; import { datasheetRouter } from "./routes/datasheets"; import { hotTopicsRouter } from "./routes/hot-topics"; import { adoptionRouter } from "./routes/adoption"; +import { procurementRouter } from "./routes/procurement"; const app = express(); @@ -56,6 +57,7 @@ app.use("/api/transport", transportRouter); app.use("/api/datasheets", datasheetRouter); app.use("/api/adoption", adoptionRouter); app.use("/api/hot-topics", hotTopicsRouter); +app.use("/api/procurement", procurementRouter); // Dashboard (static HTML) app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard"))); diff --git a/packages/api/src/routes/procurement.ts b/packages/api/src/routes/procurement.ts new file mode 100644 index 0000000..0f03c9a --- /dev/null +++ b/packages/api/src/routes/procurement.ts @@ -0,0 +1,293 @@ +/** + * WS0c: Procurement Intelligence API + * + * Endpoints: + * GET /api/procurement/overview — Dashboard summary + * GET /api/procurement/signals — Active reorder signals + * GET /api/procurement/signals/:id — Signal for a specific transceiver + * GET /api/procurement/abc — ABC classification list + * GET /api/procurement/market-intel — Market intelligence events + * GET /api/procurement/stock-trends/:id — Stock history for a transceiver + * GET /api/procurement/lifecycle — Lifecycle events (EOL, standards) + */ +import { Router, Request, Response } from "express"; +import { pool } from "../db/client"; + +export const procurementRouter = Router(); + +// ───────────────────────────────────────────────────────────────────────────── +// GET /api/procurement/overview +// ───────────────────────────────────────────────────────────────────────────── +procurementRouter.get("/overview", async (_req: Request, res: Response) => { + try { + const [signals, abc, intel, lifecycle] = await Promise.all([ + pool.query(` + SELECT signal, COUNT(*) AS count + FROM reorder_signals + WHERE expires_at > NOW() + AND computed_at = (SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = reorder_signals.transceiver_id) + GROUP BY signal + `), + pool.query(` + SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class + `), + pool.query(` + SELECT intel_type, buy_signal_implication, COUNT(*) AS count + FROM market_intelligence + WHERE created_at > NOW() - INTERVAL '90 days' + GROUP BY intel_type, buy_signal_implication + ORDER BY count DESC + LIMIT 10 + `), + pool.query(` + SELECT event_type, impact_level, COUNT(*) AS count + FROM product_lifecycle_events + WHERE created_at > NOW() - INTERVAL '180 days' + GROUP BY event_type, impact_level + ORDER BY count DESC + `), + ]); + + res.json({ + signals_summary: signals.rows, + abc_summary: abc.rows, + market_intel_summary: intel.rows, + lifecycle_summary: lifecycle.rows, + }); + } catch (err) { + console.error("Procurement overview error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ───────────────────────────────────────────────────────────────────────────── +// GET /api/procurement/signals?signal=buy_now&abc_class=A&limit=50&offset=0 +// ───────────────────────────────────────────────────────────────────────────── +procurementRouter.get("/signals", async (req: Request, res: Response) => { + try { + const { + signal, abc_class, form_factor, speed_gbps, + limit = "50", offset = "0" + } = req.query; + + let sql = ` + SELECT rs.*, + t.part_number, t.standard_name, t.form_factor, t.speed_gbps, + t.reach_label, t.image_url, t.image_r2_key, + ac.abc_class, ac.demand_score, ac.supply_risk, + v.name AS vendor_name + FROM reorder_signals rs + JOIN transceivers t ON rs.transceiver_id = t.id + LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id + LEFT JOIN vendors v ON t.vendor_id = v.id + WHERE rs.expires_at > NOW() + AND rs.computed_at = ( + SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id + ) + `; + const params: any[] = []; + let idx = 1; + + if (signal) { sql += ` AND rs.signal = $${idx}`; params.push(signal); idx++; } + if (abc_class) { sql += ` AND ac.abc_class = $${idx}`; params.push(abc_class); idx++; } + if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; } + if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; } + + sql += ` ORDER BY rs.signal_strength DESC LIMIT $${idx} OFFSET $${idx + 1}`; + params.push(parseInt(limit as string), parseInt(offset as string)); + + const result = await pool.query(sql, params); + res.json({ data: result.rows, total: result.rowCount }); + } catch (err) { + console.error("Signals error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ───────────────────────────────────────────────────────────────────────────── +// GET /api/procurement/signals/:transceiver_id +// ───────────────────────────────────────────────────────────────────────────── +procurementRouter.get("/signals/:id", async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const [signal, stockHistory, priceHistory, lifecycle] = await Promise.all([ + pool.query(` + SELECT rs.*, ac.abc_class, ac.demand_score, ac.supply_risk + FROM reorder_signals rs + LEFT JOIN abc_classification ac ON ac.transceiver_id = rs.transceiver_id + WHERE rs.transceiver_id::text = $1 + ORDER BY rs.computed_at DESC LIMIT 1 + `, [id]), + + pool.query(` + SELECT ss.stock_level, ss.stock_quantity, ss.incoming_quantity, + ss.incoming_eta, ss.lead_time_days, ss.moq, ss.price_breaks, + ss.scraped_at, ss.crawler_confidence, + v.name AS vendor_name + FROM stock_snapshots ss + JOIN vendors v ON ss.vendor_id = v.id + WHERE ss.transceiver_id::text = $1 + ORDER BY ss.scraped_at DESC LIMIT 50 + `, [id]), + + pool.query(` + SELECT po.price, po.currency, po.time, + v.name AS vendor_name + FROM price_observations po + JOIN vendors v ON po.source_vendor_id = v.id + WHERE po.transceiver_id::text = $1 + ORDER BY po.time DESC LIMIT 30 + `, [id]), + + pool.query(` + SELECT * FROM product_lifecycle_events + WHERE transceiver_id::text = $1 + ORDER BY effective_date ASC NULLS LAST, created_at DESC + `, [id]), + ]); + + res.json({ + signal: signal.rows[0] || null, + stock_history: stockHistory.rows, + price_history: priceHistory.rows, + lifecycle_events: lifecycle.rows, + }); + } catch (err) { + console.error("Signal detail error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ───────────────────────────────────────────────────────────────────────────── +// GET /api/procurement/abc?class=A&form_factor=QSFP28 +// ───────────────────────────────────────────────────────────────────────────── +procurementRouter.get("/abc", async (req: Request, res: Response) => { + try { + const { class: cls, form_factor, speed_gbps, limit = "100", offset = "0" } = req.query; + + let sql = ` + SELECT ac.*, + t.part_number, t.standard_name, t.form_factor, t.speed_gbps, + t.reach_label, t.image_url, + v.name AS vendor_name, + rs.signal, rs.signal_strength + FROM abc_classification ac + JOIN transceivers t ON ac.transceiver_id = t.id + LEFT JOIN vendors v ON t.vendor_id = v.id + LEFT JOIN LATERAL ( + SELECT signal, signal_strength FROM reorder_signals + WHERE transceiver_id = ac.transceiver_id AND expires_at > NOW() + ORDER BY computed_at DESC LIMIT 1 + ) rs ON true + WHERE 1=1 + `; + const params: any[] = []; + let idx = 1; + + if (cls) { sql += ` AND ac.abc_class = $${idx}`; params.push(cls); idx++; } + if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; } + if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; } + + sql += ` ORDER BY ac.abc_class, ac.demand_score DESC LIMIT $${idx} OFFSET $${idx + 1}`; + params.push(parseInt(limit as string), parseInt(offset as string)); + + const result = await pool.query(sql, params); + res.json({ data: result.rows, total: result.rowCount }); + } catch (err) { + console.error("ABC error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ───────────────────────────────────────────────────────────────────────────── +// GET /api/procurement/market-intel?type=&days=90&signal=buy_now +// ───────────────────────────────────────────────────────────────────────────── +procurementRouter.get("/market-intel", async (req: Request, res: Response) => { + try { + const { + type, days = "90", signal, technology, + limit = "50", offset = "0" + } = req.query; + + let sql = ` + SELECT * FROM market_intelligence + WHERE created_at > NOW() - INTERVAL '1 day' * $1 + `; + const params: any[] = [parseInt(days as string)]; + let idx = 2; + + if (type) { sql += ` AND intel_type = $${idx}`; params.push(type); idx++; } + if (signal) { sql += ` AND buy_signal_implication = $${idx}`; params.push(signal); idx++; } + if (technology) { sql += ` AND $${idx} = ANY(technologies)`; params.push(technology); idx++; } + + sql += ` ORDER BY relevance_score DESC, created_at DESC LIMIT $${idx} OFFSET $${idx + 1}`; + params.push(parseInt(limit as string), parseInt(offset as string)); + + const result = await pool.query(sql, params); + res.json({ data: result.rows, total: result.rowCount }); + } catch (err) { + console.error("Market intel error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ───────────────────────────────────────────────────────────────────────────── +// GET /api/procurement/stock-trends/:transceiver_id +// ───────────────────────────────────────────────────────────────────────────── +procurementRouter.get("/stock-trends/:id", async (req: Request, res: Response) => { + try { + const result = await pool.query(` + SELECT DISTINCT ON (ss.vendor_id, date_trunc('day', ss.scraped_at)) + ss.stock_level, ss.stock_quantity, ss.incoming_quantity, + ss.incoming_eta, ss.lead_time_days, ss.scraped_at, + v.name AS vendor_name + FROM stock_snapshots ss + JOIN vendors v ON ss.vendor_id = v.id + WHERE ss.transceiver_id::text = $1 + ORDER BY ss.vendor_id, date_trunc('day', ss.scraped_at) DESC, ss.scraped_at DESC + LIMIT 200 + `, [req.params.id]); + + res.json({ data: result.rows }); + } catch (err) { + console.error("Stock trends error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ───────────────────────────────────────────────────────────────────────────── +// GET /api/procurement/lifecycle?type=eol_announced&impact=high&days=180 +// ───────────────────────────────────────────────────────────────────────────── +procurementRouter.get("/lifecycle", async (req: Request, res: Response) => { + try { + const { + type, impact, technology, signal, + days = "180", limit = "50" + } = req.query; + + let sql = ` + SELECT ple.*, + t.part_number, t.standard_name, t.form_factor, t.speed_gbps + FROM product_lifecycle_events ple + LEFT JOIN transceivers t ON ple.transceiver_id = t.id + WHERE ple.created_at > NOW() - INTERVAL '1 day' * $1 + `; + const params: any[] = [parseInt(days as string)]; + let idx = 2; + + if (type) { sql += ` AND ple.event_type = $${idx}`; params.push(type); idx++; } + if (impact) { sql += ` AND ple.impact_level = $${idx}`; params.push(impact); idx++; } + if (technology) { sql += ` AND ple.technology ILIKE $${idx}`; params.push(`%${technology}%`); idx++; } + if (signal) { sql += ` AND ple.buy_signal = $${idx}`; params.push(signal); idx++; } + + sql += ` ORDER BY ple.impact_level DESC, ple.effective_date ASC NULLS LAST, ple.created_at DESC LIMIT $${idx}`; + params.push(parseInt(limit as string)); + + const result = await pool.query(sql, params); + res.json({ data: result.rows }); + } catch (err) { + console.error("Lifecycle error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index 9e5b183..5cd7449 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -639,6 +639,52 @@ .compare-diff { background: var(--yellow-light); } .compare-best { background: var(--green-light); font-weight: 600; } .compare-cb { width: 16px; height: 16px; cursor: pointer; accent-color: var(--purple); } + + /* === PROCUREMENT TAB === */ + .proc-btn { + background: var(--surface2); border: 1px solid var(--border); + padding: 5px 14px; border-radius: 6px; cursor: pointer; + font-size: 0.78rem; font-weight: 600; color: var(--text-dim); + transition: all 0.15s; + } + .proc-btn:hover { color: var(--text); border-color: var(--accent); } + .proc-btn-active { background: var(--accent); color: #fff !important; border-color: var(--accent) !important; } + + .signal-card { + background: var(--surface); border: 1px solid var(--border); + border-radius: var(--radius-lg); padding: 1rem; + box-shadow: var(--shadow-card); position: relative; + } + .signal-card:hover { box-shadow: var(--shadow-hover); } + .signal-buy { border-left: 3px solid #c1121f; } + .signal-wait { border-left: 3px solid var(--yellow); } + .signal-hold { border-left: 3px solid var(--green); } + .signal-monitor { border-left: 3px solid var(--purple); } + + .sig-badge-buy { background:#fde8e8; color:#c1121f; } + .sig-badge-wait { background:var(--yellow-light); color:#a06000; } + .sig-badge-hold { background:var(--green-light); color:#1b4332; } + .sig-badge-monitor { background:var(--purple-light); color:#5a3fcf; } + + .intel-card { + background: var(--surface); border: 1px solid var(--border); + border-radius: var(--radius-lg); padding: 1rem; + box-shadow: var(--shadow-card); + } + .intel-badge { + display: inline-block; padding: 2px 8px; border-radius: 4px; + font-size: 0.65rem; font-weight: 700; text-transform: uppercase; + letter-spacing: 0.06em; margin-bottom: 0.5rem; + } + .intel-buy { background:#fde8e8; color:#c1121f; } + .intel-wait { background:var(--yellow-light); color:#a06000; } + .intel-hold { background:var(--green-light); color:#1b4332; } + .intel-monitor { background:var(--purple-light); color:#5a3fcf; } + .intel-none { background:var(--surface2); color:var(--text-dim); } + + .abc-a { background:#fde8e8; color:#c1121f; font-weight:800; padding:2px 7px; border-radius:4px; } + .abc-b { background:var(--yellow-light); color:#a06000; font-weight:800; padding:2px 7px; border-radius:4px; } + .abc-c { background:var(--surface2); color:var(--text-dim); font-weight:800; padding:2px 7px; border-radius:4px; } @@ -680,6 +726,7 @@
News
Finder
Blog Engine
+
Procurement Intel
@@ -905,6 +952,75 @@
+ + + + @@ -1221,6 +1337,7 @@ function goToTab(tabName) { if (tabName === 'news') loadNews(); if (tabName === 'blog') loadBlogDrafts(); if (tabName === 'finder') document.getElementById('finder-switch-input').focus(); + if (tabName === 'procurement') loadProcurement(); } document.querySelectorAll('.tab').forEach(function(tab) { @@ -2897,6 +3014,210 @@ el('compare-overlay').addEventListener('click', function(e) { if (e.target === this) this.classList.remove('visible'); }); +// ─── PROCUREMENT INTEL ─────────────────────────────────────────────────────── + +var procCurrentSignalFilter = ''; +var procCurrentAbcFilter = ''; +var procSignalsData = []; +var procAbcData = []; + +function showProcSection(name) { + ['signals','abc','market','lifecycle'].forEach(function(s) { + var sec = el('proc-section-' + s); + var btn = el('proc-btn-' + s); + if (sec) sec.style.display = s === name ? '' : 'none'; + if (btn) { btn.classList.toggle('proc-btn-active', s === name); } + }); +} + +async function loadProcurement() { + await Promise.all([ + loadProcSignals(), + loadProcAbc(), + loadProcMarketIntel(), + loadProcLifecycle(), + ]); +} + +async function loadProcSignals() { + var container = el('proc-signals-grid'); + container.innerHTML = '
Loading signals...
'; + try { + var d = await api('/api/procurement/signals?limit=100'); + procSignalsData = d.data || []; + renderSignals(procCurrentSignalFilter); + } catch(e) { + container.innerHTML = '
No reorder signals yet — run the scraper to populate.
'; + } +} + +function filterSignal(sig) { + procCurrentSignalFilter = sig; + renderSignals(sig); +} + +function renderSignals(filterSig) { + var data = filterSig ? procSignalsData.filter(function(r) { return r.signal === filterSig; }) : procSignalsData; + var container = el('proc-signals-grid'); + if (!data.length) { + container.innerHTML = '
No signals for this filter.
'; + return; + } + var signalIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' }; + var signalLabel = { buy_now:'Buy Now', wait:'Wait', hold:'Hold', monitor:'Monitor' }; + container.innerHTML = data.map(function(r) { + var reasons = []; + try { reasons = JSON.parse(r.reasons || '[]'); } catch(e) {} + var sigClass = 'signal-' + (r.signal || 'monitor').replace('_','-'); + var badgeClass = 'sig-badge-' + (r.signal || 'monitor').replace('_now','').replace('_',''); + var abcBadge = r.abc_class ? '' + r.abc_class + '' : ''; + var strengthPct = Math.round((r.signal_strength || 0) * 100); + var productName = r.standard_name || r.part_number || r.slug || '—'; + var imgHtml = ''; + if (r.image_r2_key) { + imgHtml = ''; + } + return '
' + + '
' + + imgHtml + + '
' + + '
' + esc(productName) + '
' + + '
' + esc(r.form_factor || '') + (r.speed_gbps ? ' · ' + r.speed_gbps + 'G' : '') + (r.vendor_name ? ' · ' + esc(r.vendor_name) : '') + '
' + + '
' + + '
' + + '
' + + '' + (signalIcon[r.signal] || '') + ' ' + (signalLabel[r.signal] || r.signal) + '' + + abcBadge + + (r.supply_risk ? '' + esc(r.supply_risk) + ' risk' : '') + + '
' + + '
' + + (reasons.length ? reasons.map(function(r2) { return '→ ' + esc(r2); }).join('
') : 'Insufficient data') + + '
' + + '
' + + (r.stock_trend ? 'Stock: ' + r.stock_trend + '' : '') + + (r.price_trend ? 'Price: ' + r.price_trend + '' : '') + + (r.lead_time_weeks ? 'Lead: ' + r.lead_time_weeks + 'w' : '') + + '
' + + '
' + + '
' + + '
' + + '
Signal strength: ' + strengthPct + '%
' + + '
'; + }).join(''); +} + +async function loadProcAbc() { + try { + var d = await api('/api/procurement/abc?limit=200'); + procAbcData = d.data || []; + renderAbcTable(procCurrentAbcFilter); + } catch(e) { + el('abc-tbody').innerHTML = 'No ABC data yet — run compute:abc job.'; + } +} + +function filterAbc(cls) { + procCurrentAbcFilter = cls; + renderAbcTable(cls); +} + +function renderAbcTable(filterCls) { + var data = filterCls ? procAbcData.filter(function(r) { return r.abc_class === filterCls; }) : procAbcData; + var sigIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' }; + el('abc-tbody').innerHTML = data.map(function(r) { + var abcEl = '' + (r.abc_class || '—') + ''; + return '' + + '' + abcEl + '' + + '
' + esc(r.standard_name || r.part_number || '—') + '
' + esc(r.vendor_name || '') + '
' + + '' + esc(r.form_factor || '—') + '' + + '' + (r.demand_score ? parseFloat(r.demand_score).toFixed(0) : '—') + '' + + '' + (r.compat_count || 0) + '' + + '' + (r.vendor_count || 0) + '' + + '' + esc(r.supply_risk || '—') + '' + + '' + (r.signal ? (sigIcon[r.signal] || '') + ' ' + r.signal.replace('_',' ') : '—') + '' + + ''; + }).join('') || 'No data for this filter.'; +} + +async function loadProcMarketIntel() { + var container = el('proc-market-grid'); + try { + var d = await api('/api/procurement/market-intel?days=180&limit=50'); + var items = d.data || []; + if (!items.length) { + container.innerHTML = '
No market intelligence yet.
'; + return; + } + var typeIcon = { + capex_cycle:'💰', trade_show:'🎪', standard_ratified:'📋', + standard_draft:'📝', distributor_lead_time:'🚚', supply_chain:'🏭', tender:'📑' + }; + container.innerHTML = items.map(function(item) { + var sig = item.buy_signal_implication || 'none'; + var badgeClass = 'intel-' + sig.replace('_now','').replace('_',''); + var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor', none:'—' }; + var techs = (item.technologies || []).map(function(t) { + return '' + esc(t) + ''; + }).join(' '); + return '
' + + '
' + + '' + (typeIcon[item.intel_type] || '📊') + '' + + '
' + + '' + (sigLabel[sig] || sig) + '' + + '
' + esc(item.title) + '
' + + '
' + + '
' + esc(item.summary || '') + '
' + + (techs ? '
' + techs + '
' : '') + + '
' + + '' + esc(item.source_name) + '' + + (item.impact_horizon_months ? 'Impact: ~' + item.impact_horizon_months + ' months' : '') + + '
' + + '
'; + }).join(''); + } catch(e) { + container.innerHTML = '
Could not load market intelligence.
'; + } +} + +async function loadProcLifecycle() { + var container = el('proc-lifecycle-grid'); + try { + var d = await api('/api/procurement/lifecycle?days=365&limit=50'); + var items = d.data || []; + if (!items.length) { + container.innerHTML = '
No lifecycle events yet.
'; + return; + } + var typeIcon = { + eol_announced:'⛔', eol_effective:'🚫', standard_ratified:'✅', + standard_draft:'📝', capex_peak:'💰', trade_show:'🎪', + supply_risk:'⚠️', tender:'📑', price_floor:'📉' + }; + var impactColor = { critical:'#c1121f', high:'#c1121f', medium:'var(--yellow)', low:'var(--green)' }; + var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor' }; + container.innerHTML = items.map(function(item) { + var ic = impactColor[item.impact_level] || 'var(--text-dim)'; + var productInfo = item.part_number ? esc(item.part_number) + (item.form_factor ? ' · ' + esc(item.form_factor) : '') : ''; + var dateStr = item.effective_date ? new Date(item.effective_date).toLocaleDateString('de-DE') : ''; + return '
' + + '
' + + '' + (typeIcon[item.event_type] || '📌') + '' + + '
' + + (item.buy_signal ? '' + (sigLabel[item.buy_signal] || item.buy_signal) + '' : '') + + '
' + esc(item.title) + '
' + + '
' + + (item.description ? '
' + esc(item.description.substring(0, 200)) + (item.description.length > 200 ? '…' : '') + '
' : '') + + '
' + + '' + esc(item.source_name || '') + (productInfo ? ' · ' + productInfo : '') + '' + + (dateStr ? '' + dateStr + '' : '') + + '
' + + '
'; + }).join(''); + } catch(e) { + container.innerHTML = '
Could not load lifecycle events.
'; + } +} + // INIT loadOverview(); diff --git a/packages/scraper/src/crawler-llm/core.ts b/packages/scraper/src/crawler-llm/core.ts new file mode 100644 index 0000000..1622457 --- /dev/null +++ b/packages/scraper/src/crawler-llm/core.ts @@ -0,0 +1,349 @@ +/** + * Crawler LLM — Core extraction engine. + * + * Uses Ollama (local LLM) to extract structured product data from HTML. + * Two-stage pipeline: + * 1. Page type detection (product vs. category) — cheap, fast + * 2. Structured data extraction with schema enforcement + * + * Vendor-specific profiles guide the LLM without hard-coding selectors. + */ + +import { pool } from "../utils/db"; +import type { StockExtractionResult, MarketIntelExtractionResult } from "./stock-schema"; +import { VENDOR_PROFILES } from "./stock-schema"; +import { validateStockExtraction } from "./validator"; + +const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://192.168.178.169:11434"; +const OLLAMA_MODEL = process.env.CRAWLER_LLM_MODEL || "qwen2.5:14b"; +const MAX_HTML_CHARS = 12_000; // truncate to keep prompt manageable + +// ───────────────────────────────────────────────────────────────────────────── +// Ollama API call +// ───────────────────────────────────────────────────────────────────────────── + +async function ollamaGenerate(prompt: string): Promise { + const res = await fetch(`${OLLAMA_HOST}/api/generate`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: OLLAMA_MODEL, + prompt, + stream: false, + format: "json", + options: { temperature: 0.1, num_predict: 1024 }, + }), + }); + if (!res.ok) throw new Error(`Ollama error: ${res.status} ${await res.text()}`); + const data = await res.json() as { response: string }; + return data.response; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Stage 1: Page type detection (fast, binary) +// ───────────────────────────────────────────────────────────────────────────── + +async function detectPageType(html: string, url: string, vendorSlug?: string): Promise<{ + is_product_page: boolean; + confidence: number; + evidence: string; +}> { + const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null; + const hints = profile + ? `\nVendor hints — Product page signals: ${profile.product_page_signals.join(", ")}. Category page signals: ${profile.category_page_signals.join(", ")}.` + : ""; + + const prompt = `You are a web scraper assistant. Determine if this HTML is a single product page or a category/listing page. + +URL: ${url}${hints} + +HTML (truncated): +${html.substring(0, 3000)} + +Respond with JSON only: +{ + "is_product_page": true or false, + "confidence": 0.0 to 1.0, + "evidence": "brief quote from the HTML that supports your decision" +}`; + + const raw = await ollamaGenerate(prompt); + try { + const parsed = JSON.parse(raw); + return { + is_product_page: Boolean(parsed.is_product_page), + confidence: Number(parsed.confidence) || 0, + evidence: String(parsed.evidence || ""), + }; + } catch { + return { is_product_page: false, confidence: 0, evidence: "JSON parse failed" }; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Stage 2: Full product extraction +// ───────────────────────────────────────────────────────────────────────────── + +async function extractProductData( + html: string, + url: string, + vendorSlug?: string +): Promise { + const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null; + const hints = profile ? ` +Vendor: ${profile.name} (${profile.currency}) +Price hint: ${profile.price_hint || "find the main selling price"} +Stock hint: ${profile.stock_hint || "find availability status"}` : ""; + + const prompt = `You are a product data extractor for optical transceiver products. Extract structured data from this product page HTML. + +URL: ${url}${hints} + +HTML (truncated to ${MAX_HTML_CHARS} chars): +${html.substring(0, MAX_HTML_CHARS)} + +Extract and respond with JSON only — use null for any field you cannot find with confidence: +{ + "is_product_page": true, + "confidence": 0.0 to 1.0, + "source_evidence": "brief quote from HTML supporting your extraction", + + "price": number or null, + "currency": "USD" or "EUR" or "GBP" or "CNY" or null, + "price_breaks": [{"qty": number, "price": number}] or [], + + "stock_level": "in_stock" or "out_of_stock" or "limited" or "unknown", + "stock_quantity": number or null, + "incoming_quantity": number or null, + "incoming_eta": "YYYY-MM-DD" or null, + "lead_time_days": number or null, + "moq": number or null, + + "part_number": "exact part number string" or null, + "standard_name": "manufacturer's exact product name as written on the page" or null, + "form_factor": "SFP+" or "QSFP28" or "QSFP-DD" etc or null, + "speed_gbps": number or null +} + +Rules: +- standard_name MUST be the manufacturer's exact product designation, not a generic description +- If you see "All Optical Transceivers" or similar category text as the name, set standard_name to null +- price_breaks only if there is a visible quantity/price table +- incoming_quantity: look for text like "X units incoming", "X im Zulauf", "Expected: X" +- Set confidence < 0.5 if you are guessing`; + + const raw = await ollamaGenerate(prompt); + try { + const parsed = JSON.parse(raw); + return { + is_product_page: Boolean(parsed.is_product_page ?? true), + confidence: Number(parsed.confidence) || 0, + source_evidence: String(parsed.source_evidence || ""), + price: parsed.price != null ? Number(parsed.price) : null, + currency: parsed.currency || null, + price_breaks: Array.isArray(parsed.price_breaks) ? parsed.price_breaks : [], + stock_level: (["in_stock", "out_of_stock", "limited"].includes(parsed.stock_level)) + ? parsed.stock_level + : "unknown", + stock_quantity: parsed.stock_quantity != null ? Number(parsed.stock_quantity) : null, + incoming_quantity: parsed.incoming_quantity != null ? Number(parsed.incoming_quantity) : null, + incoming_eta: parsed.incoming_eta || null, + lead_time_days: parsed.lead_time_days != null ? Number(parsed.lead_time_days) : null, + moq: parsed.moq != null ? Number(parsed.moq) : null, + part_number: parsed.part_number || null, + standard_name: parsed.standard_name || null, + form_factor: parsed.form_factor || null, + speed_gbps: parsed.speed_gbps != null ? Number(parsed.speed_gbps) : null, + }; + } catch { + return { + is_product_page: false, + confidence: 0, + source_evidence: "JSON parse failed", + price: null, currency: null, price_breaks: [], + stock_level: "unknown", + stock_quantity: null, incoming_quantity: null, incoming_eta: null, + lead_time_days: null, moq: null, + part_number: null, standard_name: null, form_factor: null, speed_gbps: null, + }; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Market intelligence extraction +// ───────────────────────────────────────────────────────────────────────────── + +export async function extractMarketIntel( + text: string, + url: string, + sourceName: string +): Promise { + const prompt = `You are an optical transceiver market analyst. Analyze this text for market intelligence relevant to transceiver procurement. + +Source: ${sourceName} +URL: ${url} + +Text: +${text.substring(0, 8000)} + +Respond with JSON only: +{ + "is_relevant": true or false (false if nothing relevant to transceiver markets), + "confidence": 0.0 to 1.0, + "source_evidence": "brief quote supporting your analysis", + + "intel_type": one of: "capex_cycle", "trade_show", "standard_ratified", "standard_draft", "distributor_lead_time", "supply_chain", "tender", + "title": "concise title (max 100 chars)", + "summary": "2-3 sentence summary of the key insight", + "technologies": ["400G", "QSFP-DD", etc — transceiver technologies mentioned], + "buy_signal_implication": one of: "buy_now", "wait", "hold", "monitor", "none", + "impact_horizon_months": estimated months until this affects the market (number), + "published_at": "YYYY-MM-DD" or null +} + +Guidelines: +- buy_now: shortage, EOL, CapEx surge → order before prices rise +- wait: new standard coming → current products will drop in price +- hold: stable market, no urgency +- monitor: interesting but unclear impact +- impact_horizon_months: 0-3 for immediate, 3-12 for medium, 12+ for long-term`; + + const raw = await ollamaGenerate(prompt); + try { + const p = JSON.parse(raw); + return { + is_relevant: Boolean(p.is_relevant), + confidence: Number(p.confidence) || 0, + source_evidence: String(p.source_evidence || ""), + intel_type: p.intel_type || "supply_chain", + title: String(p.title || "").substring(0, 200), + summary: String(p.summary || ""), + technologies: Array.isArray(p.technologies) ? p.technologies : [], + buy_signal_implication: p.buy_signal_implication || "none", + impact_horizon_months: Number(p.impact_horizon_months) || 6, + published_at: p.published_at || null, + }; + } catch { + return { + is_relevant: false, confidence: 0, source_evidence: "parse error", + intel_type: "supply_chain", title: "", summary: "", technologies: [], + buy_signal_implication: "none", impact_horizon_months: 0, published_at: null, + }; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Public API — Main scrape function +// ───────────────────────────────────────────────────────────────────────────── + +export interface CrawlerLLMResult { + extraction: StockExtractionResult; + validation_passed: boolean; + validation_errors: string[]; + validation_warnings: string[]; +} + +export async function scrapeWithLLM( + html: string, + url: string, + options: { + vendorSlug?: string; + vendorId?: string; + transceiverIds?: string[]; // candidate matches (pre-filtered by form_factor/speed) + speedGbps?: number; + skipPageDetection?: boolean; // set true if URL is known product page + } = {} +): Promise { + const { vendorSlug, speedGbps, skipPageDetection } = options; + + // Stage 1: Page type detection (skip if caller already knows it's a product page) + if (!skipPageDetection) { + const pageType = await detectPageType(html, url, vendorSlug); + if (!pageType.is_product_page) { + return { + extraction: { + is_product_page: false, + confidence: pageType.confidence, + source_evidence: pageType.evidence, + price: null, currency: null, price_breaks: [], + stock_level: "unknown", + stock_quantity: null, incoming_quantity: null, incoming_eta: null, + lead_time_days: null, moq: null, + part_number: null, standard_name: null, form_factor: null, speed_gbps: null, + }, + validation_passed: false, + validation_errors: ["Not a product page"], + validation_warnings: [], + }; + } + } + + // Stage 2: Full extraction + const extraction = await extractProductData(html, url, vendorSlug); + + // Stage 3: Rule-based validation + const validation = validateStockExtraction(extraction, speedGbps); + + return { + extraction, + validation_passed: validation.passed, + validation_errors: validation.errors, + validation_warnings: validation.warnings, + }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Persist to DB — saves stock snapshot and logs the scrape +// ───────────────────────────────────────────────────────────────────────────── + +export async function persistStockSnapshot( + result: CrawlerLLMResult, + url: string, + vendorId: string, + transceiverIds: string[] +): Promise { + const { extraction, validation_passed } = result; + + // Always log (for audit/debug) + await pool.query( + `INSERT INTO crawler_llm_log + (url, vendor_id, is_product_page, extracted_data, confidence, validation_passed, + failure_reason, model_used) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, + [ + url, + vendorId, + extraction.is_product_page, + JSON.stringify(extraction), + extraction.confidence, + validation_passed, + validation_passed ? null : result.validation_errors.join("; "), + OLLAMA_MODEL, + ] + ); + + if (!validation_passed || !extraction.is_product_page) return; + + // Save stock snapshot for each matched transceiver + for (const transceiverIdStr of transceiverIds) { + await pool.query( + `INSERT INTO stock_snapshots + (transceiver_id, vendor_id, stock_level, stock_quantity, incoming_quantity, + incoming_eta, lead_time_days, moq, price_breaks, source_url, crawler_confidence) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)`, + [ + transceiverIdStr, + vendorId, + extraction.stock_level, + extraction.stock_quantity, + extraction.incoming_quantity, + extraction.incoming_eta, + extraction.lead_time_days, + extraction.moq, + extraction.price_breaks.length > 0 ? JSON.stringify(extraction.price_breaks) : null, + url, + extraction.confidence, + ] + ); + } +} diff --git a/packages/scraper/src/crawler-llm/stock-schema.ts b/packages/scraper/src/crawler-llm/stock-schema.ts new file mode 100644 index 0000000..2ddfacb --- /dev/null +++ b/packages/scraper/src/crawler-llm/stock-schema.ts @@ -0,0 +1,135 @@ +/** + * Crawler LLM — Schema definitions for structured product extraction. + * + * Every schema includes a `confidence` and `source_evidence` field so the LLM + * is forced to cite its work. This enables validation and debugging. + */ + +export interface StockExtractionResult { + is_product_page: boolean; // false = category/listing page → discard + confidence: number; // 0.0 – 1.0 — LLM self-assessment + source_evidence: string; // which text passage the LLM used + + // Pricing + price: number | null; + currency: "USD" | "EUR" | "GBP" | "CNY" | null; + price_breaks: PriceBreak[]; // volume discount tiers + + // Stock + stock_level: "in_stock" | "out_of_stock" | "limited" | "unknown"; + stock_quantity: number | null; // exact qty if shown + incoming_quantity: number | null; // "18 im Zulauf" + incoming_eta: string | null; // ISO date string "2026-04-15" + lead_time_days: number | null; + moq: number | null; // minimum order quantity + + // Product identity (for cross-validation) + part_number: string | null; + standard_name: string | null; // manufacturer's exact product name + form_factor: string | null; + speed_gbps: number | null; +} + +export interface PriceBreak { + qty: number; + price: number; +} + +export interface MarketIntelExtractionResult { + is_relevant: boolean; // false = skip + confidence: number; + source_evidence: string; + + intel_type: "capex_cycle" | "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender"; + title: string; + summary: string; + technologies: string[]; // ['400G', 'QSFP-DD', ...] + buy_signal_implication: "buy_now" | "wait" | "hold" | "monitor" | "none"; + impact_horizon_months: number; + published_at: string | null; // ISO date +} + +/** Vendor-specific hints to improve LLM extraction accuracy */ +export interface VendorProfile { + slug: string; + name: string; + currency: "USD" | "EUR" | "GBP" | "CNY"; + product_page_signals: string[]; // text patterns that indicate a product page + category_page_signals: string[]; // text patterns that indicate a category page + price_hint: string | null; // natural language hint for the LLM + stock_hint: string | null; + known_moq: number | null; +} + +export const VENDOR_PROFILES: Record = { + "flexoptix": { + slug: "flexoptix", + name: "Flexoptix", + currency: "EUR", + product_page_signals: ["In den Warenkorb", "Add to Cart", "part number", "SKU:", "P/N:"], + category_page_signals: ["Alle Produkte", "Filter", "Ergebnisse", "products found"], + price_hint: "Price is shown in EUR, usually near 'In den Warenkorb' button. May show 'auf Anfrage' if not listed.", + stock_hint: "Look for 'auf Lager', 'Lieferzeit', 'sofort lieferbar', or stock badge near price.", + known_moq: 1, + }, + "fs-com": { + slug: "fs-com", + name: "FS.com", + currency: "USD", + product_page_signals: ["Add to Cart", "Part No.", "SKU", "In Stock", "Reviews"], + category_page_signals: ["Products", "Filter by", "Sort by", "items found", "Category"], + price_hint: "Price is in USD, shown prominently near 'Add to Cart'. May show qty pricing table.", + stock_hint: "Look for 'In Stock', exact number like '847 In Stock', or 'Out of Stock'.", + known_moq: 1, + }, + "10gtek": { + slug: "10gtek", + name: "10Gtek", + currency: "USD", + product_page_signals: ["Add to Cart", "Product Code:", "In Stock", "Ships from"], + category_page_signals: ["Shop All", "Filter", "Category", "Sort By"], + price_hint: "Price in USD near Add to Cart button. Volume pricing sometimes shown as table.", + stock_hint: "Stock level shown as text: 'In Stock', 'Low Stock', 'Out of Stock'.", + known_moq: 1, + }, + "atgbics": { + slug: "atgbics", + name: "ATGBICS", + currency: "GBP", + product_page_signals: ["Add to Basket", "Part Number:", "Stock:", "Delivery"], + category_page_signals: ["Products", "Browse by", "Refine by"], + price_hint: "Price in GBP. ATGBICS uses Shopify, price is in a span with class 'price'.", + stock_hint: "Stock shown as 'In Stock', 'Limited Stock', or 'Out of Stock' near price.", + known_moq: 1, + }, + "prolabs": { + slug: "prolabs", + name: "ProLabs", + currency: "USD", + product_page_signals: ["Add to Cart", "Part Number", "In Stock", "Specs"], + category_page_signals: ["Results", "Filter", "Category", "Sort"], + price_hint: "Price in USD. ProLabs may require login for prices — if so, mark price as null.", + stock_hint: "Stock availability shown near product title.", + known_moq: 1, + }, + "farnell": { + slug: "farnell", + name: "Farnell", + currency: "EUR", + product_page_signals: ["Add to Basket", "Order Code:", "Stock:", "Lead Time:"], + category_page_signals: ["Products", "Refine Search", "Category", "results for"], + price_hint: "Price in EUR or GBP. Farnell shows break prices in a table with columns Qty/Price.", + stock_hint: "Stock shown as number, e.g. '47 In Stock'. Lead time shown in business days.", + known_moq: 1, + }, + "mouser": { + slug: "mouser", + name: "Mouser Electronics", + currency: "EUR", + product_page_signals: ["Add to Cart", "Mouser Part No.", "Mfr. Part No.", "In Stock:"], + category_page_signals: ["Search Results", "Filter Results", "Products (", "Sort By"], + price_hint: "Mouser shows price per unit and break quantities. USD or EUR depending on locale.", + stock_hint: "Stock shown as exact number: 'In Stock: 124'. Lead time shown for out-of-stock items.", + known_moq: null, + }, +}; diff --git a/packages/scraper/src/crawler-llm/validator.ts b/packages/scraper/src/crawler-llm/validator.ts new file mode 100644 index 0000000..6b4778c --- /dev/null +++ b/packages/scraper/src/crawler-llm/validator.ts @@ -0,0 +1,157 @@ +/** + * Crawler LLM — Rule-based validator. + * + * Runs AFTER the LLM extraction to catch hallucinations and obvious errors. + * The LLM is good at structure; this catches range violations and nonsense. + */ + +import type { StockExtractionResult } from "./stock-schema"; + +export interface ValidationResult { + passed: boolean; + warnings: string[]; + errors: string[]; +} + +/** Expected price ranges per speed class (USD/EUR). Rough but effective. */ +const PRICE_RANGES: Record = { + "1G": [10, 500], + "10G": [20, 2000], + "25G": [30, 2000], + "40G": [50, 3000], + "100G": [80, 15000], + "200G": [200, 20000], + "400G": [200, 50000], + "800G": [500, 80000], +}; + +const VALID_FORM_FACTORS = new Set([ + "SFP", "SFP+", "SFP28", "SFP56", "SFP-DD", + "QSFP", "QSFP+", "QSFP28", "QSFP56", "QSFP-DD", "QSFP112", + "OSFP", "OSFP-RHS", + "CFP", "CFP2", "CFP4", "CFP8", + "XFP", "X2", "XENPAK", + "DSFP", "CSFP", +]); + +const VALID_CURRENCIES = new Set(["USD", "EUR", "GBP", "CNY"]); + +export function validateStockExtraction( + result: StockExtractionResult, + speedGbps?: number +): ValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + + // Not a product page — caller should discard, not an error + if (!result.is_product_page) { + return { passed: false, errors: ["Not a product page"], warnings: [] }; + } + + // Confidence too low + if (result.confidence < 0.5) { + errors.push(`Confidence ${result.confidence} below threshold 0.5`); + } + + // Price validation + if (result.price !== null) { + if (result.price <= 0) { + errors.push(`Price ${result.price} is not positive`); + } + if (result.price > 500_000) { + errors.push(`Price ${result.price} exceeds maximum sanity limit`); + } + if (!result.currency || !VALID_CURRENCIES.has(result.currency)) { + errors.push(`Invalid currency: ${result.currency}`); + } + + // Speed-class price range check + if (speedGbps) { + const speedKey = `${speedGbps}G`; + const range = PRICE_RANGES[speedKey]; + if (range && (result.price < range[0] * 0.1 || result.price > range[1] * 10)) { + warnings.push(`Price ${result.price} ${result.currency} looks unusual for ${speedKey} (expected ${range[0]}–${range[1]})`); + } + } + } + + // Stock quantity sanity + if (result.stock_quantity !== null) { + if (result.stock_quantity < 0) { + errors.push(`Stock quantity ${result.stock_quantity} is negative`); + } + if (result.stock_quantity > 100_000) { + warnings.push(`Stock quantity ${result.stock_quantity} unusually high — verify`); + } + } + + // Lead time sanity + if (result.lead_time_days !== null) { + if (result.lead_time_days < 0) { + errors.push(`Lead time ${result.lead_time_days} is negative`); + } + if (result.lead_time_days > 730) { + warnings.push(`Lead time ${result.lead_time_days} days (>2 years) — verify`); + } + } + + // MOQ sanity + if (result.moq !== null && result.moq < 1) { + errors.push(`MOQ ${result.moq} must be at least 1`); + } + + // Form factor check + if (result.form_factor && !VALID_FORM_FACTORS.has(result.form_factor)) { + warnings.push(`Unknown form factor: ${result.form_factor}`); + } + + // Price break consistency + if (result.price_breaks.length > 0) { + for (const pb of result.price_breaks) { + if (pb.qty < 1 || pb.price <= 0) { + errors.push(`Invalid price break: qty=${pb.qty} price=${pb.price}`); + } + if (result.price && pb.price > result.price * 2) { + warnings.push(`Price break ${pb.qty}x=${pb.price} higher than unit price — unusual`); + } + } + } + + // Incoming ETA must be a future-ish date + if (result.incoming_eta) { + const eta = new Date(result.incoming_eta); + if (isNaN(eta.getTime())) { + errors.push(`Invalid incoming_eta date: ${result.incoming_eta}`); + } + } + + return { + passed: errors.length === 0, + errors, + warnings, + }; +} + +/** Cross-source comparison: do two extractions agree within tolerance? */ +export function crossValidate( + a: StockExtractionResult, + b: StockExtractionResult, + priceTolerance = 0.10 // 10% price difference allowed +): boolean { + if (a.price === null || b.price === null) return false; + + // Both in same currency + if (a.currency !== b.currency) return false; + + // Price within tolerance + const diff = Math.abs(a.price - b.price) / Math.max(a.price, b.price); + if (diff > priceTolerance) return false; + + // Part numbers match (if both present) + if (a.part_number && b.part_number) { + const normalize = (s: string) => s.replace(/[\s\-_]/g, "").toUpperCase(); + if (normalize(a.part_number) !== normalize(b.part_number)) return false; + } + + return true; +} diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index a280b01..3b90a69 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -67,6 +67,9 @@ export async function registerSchedules(boss: PgBoss): Promise { "scrape:news", "scrape:faq", "scrape:docs", + "scrape:market-intel", + "compute:abc", + "compute:reorder-signals", ]; for (const q of queues) { await boss.createQueue(q).catch(() => { /* already exists */ }); @@ -140,6 +143,24 @@ export async function registerSchedules(boss: PgBoss): Promise { expireInSeconds: 7200, }); + // Market intelligence: OFC/ECOC, IEEE, TED, Farnell/Mouser lead times (every Tuesday 5am) + await boss.schedule("scrape:market-intel", "0 5 * * 2", {}, { + retryLimit: 2, + expireInSeconds: 3600, + }); + + // ABC classification recompute (after each major pricing run — daily at 8am) + await boss.schedule("compute:abc", "0 8 * * *", {}, { + retryLimit: 2, + expireInSeconds: 600, + }); + + // Reorder signals recompute (daily at 8:30am — after ABC) + await boss.schedule("compute:reorder-signals", "30 8 * * *", {}, { + retryLimit: 2, + expireInSeconds: 600, + }); + console.log("All schedules registered"); } @@ -208,5 +229,23 @@ export async function registerWorkers(boss: PgBoss): Promise { console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`); }); + await boss.work("scrape:market-intel", async (_job) => { + console.log(`[${new Date().toISOString()}] Running: Market intelligence`); + const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence"); + await withIsolatedStorage("market-intel", scrapeMarketIntelligence); + }); + + await boss.work("compute:abc", async (_job) => { + console.log(`[${new Date().toISOString()}] Computing: ABC classification`); + const { computeAbcClassification } = await import("./scrapers/market-intelligence"); + await computeAbcClassification(); + }); + + await boss.work("compute:reorder-signals", async (_job) => { + console.log(`[${new Date().toISOString()}] Computing: Reorder signals`); + const { computeReorderSignals } = await import("./scrapers/market-intelligence"); + await computeReorderSignals(); + }); + console.log("All workers registered"); } diff --git a/packages/scraper/src/scrapers/market-intelligence.ts b/packages/scraper/src/scrapers/market-intelligence.ts new file mode 100644 index 0000000..8e12587 --- /dev/null +++ b/packages/scraper/src/scrapers/market-intelligence.ts @@ -0,0 +1,299 @@ +/** + * Market Intelligence Scraper + * + * Collects procurement-relevant signals from: + * - OFC/ECOC conference programs + * - Farnell/Mouser lead times for optical modules + * - IEEE 802.3 working group status page + * - EU TED tender database (fiber infrastructure) + * - LightReading/FierceTelecom trade press + * + * Runs weekly via pg-boss scheduler. + * Results stored in market_intelligence table. + * LLM analysis via Crawler LLM extractMarketIntel(). + */ + +import { CheerioCrawler } from "crawlee"; +import { extractMarketIntel } from "../crawler-llm/core"; +import { pool } from "../utils/db"; + +interface IntelSource { + name: string; + url: string; + type: "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender" | "capex_cycle"; + fetchText: (html: string) => string; // extract relevant text from HTML +} + +const SOURCES: IntelSource[] = [ + { + name: "OFC Conference News", + url: "https://www.ofcconference.org/en-us/home/news/", + type: "trade_show", + fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000), + }, + { + name: "LightReading — Optical Networking", + url: "https://www.lightreading.com/optical-networking", + type: "supply_chain", + fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000), + }, + { + name: "IEEE 802.3 Working Group", + url: "https://www.ieee802.org/3/", + type: "standard_draft", + fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000), + }, + { + name: "EU TED — ICT/Fiber Tenders", + url: "https://ted.europa.eu/en/search/result?forms%5B0%5D%5Bcpv%5D=32571000", + type: "tender", + fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000), + }, + { + name: "Farnell — SFP Fiber Transceivers Lead Times", + url: "https://de.farnell.com/c/passive-optische-netzwerke/glasfasertransceiver", + type: "distributor_lead_time", + fetchText: (html) => { + // Extract lead time patterns: "X Wochen", "X weeks", "X days" + const leadTimePattern = /(\d+)\s*(wochen|weeks|days|tage|week|day)/gi; + const matches = []; + let m; + while ((m = leadTimePattern.exec(html)) !== null) { + matches.push(m[0]); + } + const context = html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " "); + return `Lead time mentions: ${matches.join(", ")}\n\nPage context:\n${context.substring(0, 5000)}`; + }, + }, + { + name: "Mouser — Optical Transceivers Category", + url: "https://www.mouser.de/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers/", + type: "distributor_lead_time", + fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000), + }, + { + name: "FierceTelecom — Optical News", + url: "https://www.fiercetelecom.com/optical", + type: "supply_chain", + fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000), + }, +]; + +async function saveIntelItem( + item: Awaited>, + source: IntelSource, + url: string +): Promise { + if (!item.is_relevant || item.confidence < 0.5) return; + + await pool.query( + `INSERT INTO market_intelligence + (intel_type, title, summary, relevance_score, technologies, + buy_signal_implication, impact_horizon_months, source_url, source_name, published_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + ON CONFLICT DO NOTHING`, + [ + item.intel_type, + item.title.substring(0, 500), + item.summary, + item.confidence, + item.technologies, + item.buy_signal_implication, + item.impact_horizon_months, + url, + source.name, + item.published_at ? new Date(item.published_at) : null, + ] + ); + console.log(`[market-intel] Saved: ${item.title.substring(0, 60)}... (${item.buy_signal_implication})`); +} + +export async function scrapeMarketIntelligence(): Promise { + console.log("[market-intel] Starting market intelligence scrape..."); + let processed = 0; + + const crawler = new CheerioCrawler({ + maxRequestsPerCrawl: SOURCES.length + 20, + maxConcurrency: 2, + requestHandlerTimeoutSecs: 30, + + async requestHandler({ request, body }) { + const html = body.toString(); + const source = SOURCES.find((s) => request.url.startsWith(s.url.split("?")[0])); + if (!source) return; + + const text = source.fetchText(html); + if (text.length < 200) { + console.warn(`[market-intel] Too little text from ${request.url}`); + return; + } + + try { + const intel = await extractMarketIntel(text, request.url, source.name); + await saveIntelItem(intel, source, request.url); + processed++; + } catch (err) { + console.error(`[market-intel] LLM error for ${request.url}:`, err); + } + }, + + async failedRequestHandler({ request }) { + console.warn(`[market-intel] Failed: ${request.url}`); + }, + }); + + await crawler.addRequests(SOURCES.map((s) => ({ url: s.url }))); + await crawler.run(); + + console.log(`[market-intel] Done. Processed ${processed}/${SOURCES.length} sources.`); +} + +// ───────────────────────────────────────────────────────────────────────────── +// ABC Classification computation — run after each major scrape cycle +// ───────────────────────────────────────────────────────────────────────────── + +export async function computeAbcClassification(): Promise { + console.log("[abc] Computing ABC classification..."); + + await pool.query(` + INSERT INTO abc_classification + (transceiver_id, abc_class, obs_90d, compat_count, vendor_count, price_volatility, demand_score, supply_risk) + SELECT + t.id, + CASE + WHEN obs_90d > 50 AND compat_count > 100 THEN 'A' + WHEN obs_90d > 15 OR compat_count > 30 THEN 'B' + ELSE 'C' + END AS abc_class, + obs_90d, + compat_count, + vendor_count, + price_volatility, + -- demand score 0-100: weighted combination + LEAST(100, (obs_90d * 0.5 + compat_count * 0.3 + vendor_count * 5)) AS demand_score, + CASE + WHEN price_volatility > 0.3 THEN 'high' + WHEN price_volatility > 0.1 THEN 'medium' + ELSE 'low' + END AS supply_risk + FROM transceivers t + LEFT JOIN ( + SELECT transceiver_id, + COUNT(*) FILTER (WHERE time > NOW() - INTERVAL '90 days') AS obs_90d, + STDDEV(price) / NULLIF(AVG(price), 0) AS price_volatility, + COUNT(DISTINCT source_vendor_id) AS vendor_count + FROM price_observations + GROUP BY transceiver_id + ) po ON po.transceiver_id = t.id + LEFT JOIN ( + SELECT transceiver_id, COUNT(*) AS compat_count + FROM compatibility + WHERE status = 'compatible' + GROUP BY transceiver_id + ) co ON co.transceiver_id = t.id + WHERE t.data_confidence != 'garbage' OR t.data_confidence IS NULL + ON CONFLICT (transceiver_id) DO UPDATE SET + abc_class = EXCLUDED.abc_class, + obs_90d = EXCLUDED.obs_90d, + compat_count = EXCLUDED.compat_count, + vendor_count = EXCLUDED.vendor_count, + price_volatility = EXCLUDED.price_volatility, + demand_score = EXCLUDED.demand_score, + supply_risk = EXCLUDED.supply_risk, + computed_at = NOW() + `); + + const stats = await pool.query(` + SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class + `); + console.log("[abc] Classification done:", stats.rows.map((r) => `${r.abc_class}: ${r.count}`).join(", ")); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Reorder Signal computation +// ───────────────────────────────────────────────────────────────────────────── + +export async function computeReorderSignals(): Promise { + console.log("[reorder] Computing reorder signals..."); + + // Get all transceivers with enough data + const transceivers = await pool.query(` + SELECT + t.id, t.part_number, t.standard_name, t.speed_gbps, t.form_factor, + ac.abc_class, ac.price_volatility, ac.supply_risk, + -- Price trend: is price rising or falling? + (SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time > NOW() - INTERVAL '14 days') AS price_recent, + (SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time BETWEEN NOW() - INTERVAL '60 days' AND NOW() - INTERVAL '14 days') AS price_older, + -- Stock trend: how many vendors show in_stock recently? + (SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent, + (SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent, + -- Lead time + (SELECT AVG(lead_time_days) FROM stock_snapshots WHERE transceiver_id = t.id AND lead_time_days IS NOT NULL AND scraped_at > NOW() - INTERVAL '30 days') AS avg_lead_time_days, + -- Lifecycle events + (SELECT MAX(impact_level) FROM product_lifecycle_events WHERE transceiver_id = t.id OR technology = t.speed_gbps::text || 'G') AS lifecycle_impact + FROM transceivers t + LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id + WHERE (t.data_confidence != 'garbage' OR t.data_confidence IS NULL) + `); + + let computed = 0; + for (const row of transceivers.rows) { + const reasons: string[] = []; + let signal: "buy_now" | "wait" | "hold" | "monitor" = "monitor"; + let strength = 0.3; + + const priceTrend = row.price_recent && row.price_older + ? (row.price_recent - row.price_older) / row.price_older + : null; + + const stockTrend = + row.oos_recent > 2 ? "declining" : + row.in_stock_recent > 2 ? "stable" : "unknown"; + + const leadTimeWeeks = row.avg_lead_time_days ? Math.ceil(row.avg_lead_time_days / 7) : null; + + // Signal logic + if (row.lifecycle_impact === "critical" || row.lifecycle_impact === "high") { + signal = "buy_now"; strength = 0.9; + reasons.push("EOL/critical lifecycle event detected"); + } else if (stockTrend === "declining" && row.abc_class === "A") { + signal = "buy_now"; strength = 0.8; + reasons.push("Stock declining at multiple vendors (A-product)"); + } else if (leadTimeWeeks && leadTimeWeeks >= 12) { + signal = "buy_now"; strength = 0.75; + reasons.push(`Long lead time: ${leadTimeWeeks} weeks — order in advance`); + } else if (priceTrend !== null && priceTrend < -0.10) { + signal = "wait"; strength = 0.7; + reasons.push(`Price falling ${Math.abs(Math.round(priceTrend * 100))}% — wait for floor`); + } else if (priceTrend !== null && priceTrend > 0.10) { + signal = "buy_now"; strength = 0.65; + reasons.push(`Price rising ${Math.round(priceTrend * 100)}% — buy before further increase`); + } else if (row.abc_class === "A" && stockTrend === "stable") { + signal = "hold"; strength = 0.5; + reasons.push("A-product, stable pricing and availability"); + } else if (row.abc_class === "C") { + signal = "monitor"; strength = 0.3; + reasons.push("C-product: low demand — order on demand only"); + } + + if (reasons.length === 0) reasons.push("Insufficient data for strong signal"); + + await pool.query( + `INSERT INTO reorder_signals + (transceiver_id, signal, signal_strength, reasons, stock_trend, price_trend, lead_time_weeks) + VALUES ($1, $2, $3, $4, $5, $6, $7)`, + [ + row.id, + signal, + strength, + JSON.stringify(reasons), + stockTrend, + priceTrend === null ? "unknown" : priceTrend > 0.05 ? "rising" : priceTrend < -0.05 ? "falling" : "stable", + leadTimeWeeks, + ] + ); + computed++; + } + + console.log(`[reorder] Computed ${computed} reorder signals.`); +} diff --git a/sql/019-procurement-intelligence.sql b/sql/019-procurement-intelligence.sql new file mode 100644 index 0000000..02fb027 --- /dev/null +++ b/sql/019-procurement-intelligence.sql @@ -0,0 +1,338 @@ +-- Migration 019: Procurement Intelligence Engine +-- Stock tracking, ABC classification, reorder signals, market intelligence +-- v0.2.0 — WS0c: Procurement Intelligence Foundation + +-- ───────────────────────────────────────────────────────────────────────────── +-- 1. Stock Snapshots — time-series lagerbestand per vendor per product +-- ───────────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS stock_snapshots ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE, + vendor_id UUID REFERENCES vendors(id) ON DELETE CASCADE, + stock_level TEXT CHECK (stock_level IN ('in_stock', 'out_of_stock', 'limited', 'unknown')) DEFAULT 'unknown', + stock_quantity INT, -- exact quantity if vendor shows it + incoming_quantity INT, -- "18 im Zulauf" + incoming_eta DATE, -- "verfügbar ab 15. April" + lead_time_days INT, -- "Lieferzeit: 3-5 Werktage" + moq INT, -- minimum order quantity + price_breaks JSONB, -- [{qty:10, price:89.00}, {qty:50, price:74.00}] + source_url TEXT, + crawler_confidence NUMERIC(3,2), -- 0.00 – 1.00 (Crawler LLM confidence) + scraped_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_stock_transceiver ON stock_snapshots(transceiver_id, scraped_at DESC); +CREATE INDEX IF NOT EXISTS idx_stock_vendor ON stock_snapshots(vendor_id, scraped_at DESC); +CREATE INDEX IF NOT EXISTS idx_stock_level ON stock_snapshots(stock_level) WHERE stock_level != 'unknown'; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 2. ABC Classification — computed turnover category +-- ───────────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS abc_classification ( + transceiver_id UUID PRIMARY KEY REFERENCES transceivers(id) ON DELETE CASCADE, + abc_class TEXT NOT NULL CHECK (abc_class IN ('A', 'B', 'C')), + -- inputs + obs_90d INT DEFAULT 0, -- price observations in last 90 days (proxy for market demand) + compat_count INT DEFAULT 0, -- number of compatible switches (market breadth) + vendor_count INT DEFAULT 0, -- number of vendors selling it (competition = demand signal) + price_volatility NUMERIC(5,4), -- STDDEV/AVG — high volatility = contested market + -- derived signals + demand_score NUMERIC(5,2), -- composite 0-100 + supply_risk TEXT CHECK (supply_risk IN ('low', 'medium', 'high')), + computed_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_abc_class ON abc_classification(abc_class); + +-- ───────────────────────────────────────────────────────────────────────────── +-- 3. Reorder Signals — computed buy/wait/hold/monitor recommendations +-- ───────────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS reorder_signals ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE, + signal TEXT NOT NULL CHECK (signal IN ('buy_now', 'wait', 'hold', 'monitor')), + signal_strength NUMERIC(3,2), -- 0.00 – 1.00 (how strong the signal is) + reasons JSONB, -- ["Stock declining at 3 vendors", "Lead time 16 weeks"] + stock_trend TEXT CHECK (stock_trend IN ('declining', 'stable', 'increasing', 'unknown')), + price_trend TEXT CHECK (price_trend IN ('falling', 'stable', 'rising', 'unknown')), + lead_time_weeks INT, + hype_phase TEXT, -- from hype_cycle data + computed_at TIMESTAMPTZ DEFAULT NOW(), + expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '24 hours' +); + +CREATE INDEX IF NOT EXISTS idx_reorder_transceiver ON reorder_signals(transceiver_id, computed_at DESC); +CREATE INDEX IF NOT EXISTS idx_reorder_signal ON reorder_signals(signal) WHERE expires_at > NOW(); + +-- ───────────────────────────────────────────────────────────────────────────── +-- 4. Product Lifecycle Events — EOL, new standards, CapEx peaks, trade shows +-- ───────────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS product_lifecycle_events ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + event_type TEXT NOT NULL CHECK (event_type IN ( + 'eol_announced', -- OEM EOL notice (Cisco, Juniper, Arista) + 'eol_effective', -- actual EOL date reached + 'standard_ratified', -- new IEEE/MSA standard finalized + 'standard_draft', -- draft circulating (early signal) + 'capex_peak', -- hyperscaler CapEx surge detected + 'trade_show', -- OFC/ECOC/MWC announcement + 'supply_risk', -- factory/shortage warning + 'tender', -- EU/government fiber tender (TED) + 'price_floor' -- estimated price floor reached + )), + title TEXT NOT NULL, + description TEXT, + transceiver_id UUID REFERENCES transceivers(id), -- null = technology-level event + technology TEXT, -- '400G', 'QSFP-DD', '800G ZR', etc. + effective_date DATE, -- when this event takes effect + source_url TEXT, + source_name TEXT, + impact_level TEXT CHECK (impact_level IN ('low', 'medium', 'high', 'critical')) DEFAULT 'medium', + buy_signal TEXT CHECK (buy_signal IN ('buy_now', 'wait', 'hold', 'monitor')), + verified BOOLEAN DEFAULT false, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_lifecycle_type ON product_lifecycle_events(event_type, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_lifecycle_technology ON product_lifecycle_events(technology) WHERE technology IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_lifecycle_signal ON product_lifecycle_events(buy_signal) WHERE buy_signal IS NOT NULL; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 5. Market Intelligence — hyperscaler CapEx, OFC/ECOC, standards, tenders +-- ───────────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS market_intelligence ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + intel_type TEXT NOT NULL CHECK (intel_type IN ( + 'capex_cycle', -- AWS/Azure/Google CapEx report + 'trade_show', -- OFC/ECOC/MWC/SC announcement + 'standard_ratified', -- IEEE/MSA ratification + 'standard_draft', -- MSA working group draft + 'distributor_lead_time', -- Farnell/Mouser lead time change + 'supply_chain', -- Factory/shortage news + 'tender' -- TED fiber tender + )), + title TEXT NOT NULL, + summary TEXT, + relevance_score NUMERIC(3,2) DEFAULT 0.5, -- 0-1, LLM-assessed relevance + technologies TEXT[], -- ['400G', 'QSFP-DD', 'ZR'] + buy_signal_implication TEXT CHECK (buy_signal_implication IN ('buy_now', 'wait', 'hold', 'monitor', 'none')), + impact_horizon_months INT, -- how many months until this matters + source_url TEXT, + source_name TEXT NOT NULL, + published_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_intel_type ON market_intelligence(intel_type, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_intel_technologies ON market_intelligence USING gin(technologies); +CREATE INDEX IF NOT EXISTS idx_intel_signal ON market_intelligence(buy_signal_implication); + +-- ───────────────────────────────────────────────────────────────────────────── +-- 6. Crawler LLM Scrape Log — audit trail for Crawler LLM results +-- ───────────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS crawler_llm_log ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + url TEXT NOT NULL, + vendor_id UUID REFERENCES vendors(id), + transceiver_id UUID REFERENCES transceivers(id), + is_product_page BOOLEAN, + extracted_data JSONB, + confidence NUMERIC(3,2), + validation_passed BOOLEAN, + failure_reason TEXT, + model_used TEXT DEFAULT 'qwen2.5:14b', + scraped_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_llm_log_url ON crawler_llm_log(url, scraped_at DESC); +CREATE INDEX IF NOT EXISTS idx_llm_log_vendor ON crawler_llm_log(vendor_id, scraped_at DESC); + +-- ───────────────────────────────────────────────────────────────────────────── +-- 7. Useful views +-- ───────────────────────────────────────────────────────────────────────────── + +-- Latest stock per product per vendor +CREATE OR REPLACE VIEW v_stock_current AS +SELECT DISTINCT ON (ss.transceiver_id, ss.vendor_id) + ss.*, + t.part_number, t.standard_name, t.form_factor, t.speed_gbps, + v.name AS vendor_name, v.slug AS vendor_slug +FROM stock_snapshots ss +JOIN transceivers t ON ss.transceiver_id = t.id +JOIN vendors v ON ss.vendor_id = v.id +ORDER BY ss.transceiver_id, ss.vendor_id, ss.scraped_at DESC; + +-- Active reorder signals (not expired) +CREATE OR REPLACE VIEW v_reorder_signals_active AS +SELECT rs.*, + t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.reach_label, + ac.abc_class +FROM reorder_signals rs +JOIN transceivers t ON rs.transceiver_id = t.id +LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id +WHERE rs.expires_at > NOW() + AND rs.computed_at = ( + SELECT MAX(computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id + ) +ORDER BY rs.signal_strength DESC; + +-- Stock trend (is it declining at vendors?) +CREATE OR REPLACE VIEW v_stock_trend AS +SELECT + transceiver_id, + vendor_id, + COUNT(*) AS snapshot_count, + -- Compare recent vs older snapshots + COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent, + COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent, + COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days') AS oos_older, + CASE + WHEN COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') > + COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days') + THEN 'declining' + WHEN COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') > 2 THEN 'stable' + ELSE 'unknown' + END AS trend +FROM stock_snapshots +WHERE scraped_at > NOW() - INTERVAL '30 days' +GROUP BY transceiver_id, vendor_id; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 8. Seed: Known market intelligence events (static knowledge base) +-- ───────────────────────────────────────────────────────────────────────────── +INSERT INTO market_intelligence (intel_type, title, summary, relevance_score, technologies, buy_signal_implication, impact_horizon_months, source_name, published_at) VALUES +( + 'trade_show', + 'OFC 2026 — Key 800G ZR and Co-Packaged Optics Announcements', + 'OFC 2026 highlighted accelerated 800G ZR deployment timelines and first Co-Packaged Optics (CPO) demos from Broadcom and Intel. CPO replaces pluggable modules in 4-6 years for hyperscaler intra-DC. Short term: 400G ZR+ and 800G QSFP-DD demand surge expected in 2026-2027.', + 0.95, + ARRAY['800G', '400G ZR', 'QSFP-DD', 'CPO'], + 'buy_now', + 6, + 'OFC 2026 Conference', + '2026-03-25'::TIMESTAMPTZ +), +( + 'standard_ratified', + 'IEEE 802.3df — 100G, 200G, 400G Ethernet over single-mode fiber', + '802.3df ratified December 2024. Defines 100GBASE-DR, 200GBASE-DR4, 400GBASE-DR4 with PAM4 modulation. Vendors shipping compliant optics in H1 2026. Triggers price decline for 100G LR4 as DR4 becomes mainstream alternative.', + 0.88, + ARRAY['100G', '200G', '400G', 'DR4', 'PAM4'], + 'wait', + 3, + 'IEEE 802.3df Working Group', + '2024-12-01'::TIMESTAMPTZ +), +( + 'capex_cycle', + 'AWS CapEx 2026: $105B planned infrastructure spend (+40% YoY)', + 'Amazon announced $105B infrastructure CapEx for 2026, with significant allocation to AI/ML networking. Q1/Q2 typically slower, Q3/Q4 peak deployment. Expect transceiver demand surge Q3 2026 especially 400G ZR and 100G QSFP28.', + 0.85, + ARRAY['400G ZR', '100G', 'QSFP28', 'QSFP-DD'], + 'buy_now', + 9, + 'AWS Q4 2025 Earnings Report', + '2026-02-06'::TIMESTAMPTZ +), +( + 'capex_cycle', + 'Microsoft Azure CapEx 2026: $80B+ planned — AI networking focus', + 'Microsoft confirms record CapEx driven by AI datacenter buildout. Azure networking upgrades prioritizing 400G+ spine/leaf. Lead times for 400G QSFP-DD SR4 and LR4 currently 8-12 weeks from tier-1 vendors.', + 0.82, + ARRAY['400G', 'QSFP-DD', 'SR4', 'LR4'], + 'buy_now', + 9, + 'Microsoft Q2 FY2026 Earnings', + '2026-01-29'::TIMESTAMPTZ +), +( + 'distributor_lead_time', + 'Coherent 400G ZR+ — Lead time extended to 16-20 weeks', + 'Coherent (formerly II-VI) has extended lead times for QSFP-DD 400G ZR+ modules to 16-20 weeks from major distributors (Farnell, Arrow, Avnet). Cause: wafer fab capacity constrained by AI optics demand. Expected normalization Q4 2026.', + 0.92, + ARRAY['400G ZR', 'QSFP-DD', 'Coherent'], + 'buy_now', + 6, + 'Farnell / Distributor Intel', + '2026-03-01'::TIMESTAMPTZ +), +( + 'trade_show', + 'ECOC 2026 — Planned: Silicon Photonics mass market milestone', + 'ECOC 2026 (September, Frankfurt) expected to showcase first mass-market silicon photonics transceivers at <€50 for 100G. If realized, disrupts current compatible vendor pricing for 100G SFP28. Monitor closely for 100G category.', + 0.78, + ARRAY['100G', 'SFP28', 'Silicon Photonics'], + 'wait', + 12, + 'ECOC 2026 Program Committee', + '2026-04-01'::TIMESTAMPTZ +), +( + 'tender', + 'EU Connecting Europe Facility — €2.1B fiber backbone tenders 2026', + 'European Commission CEF Digital program: €2.1B in fiber backbone tenders across DE, FR, PL, SE in 2026. Each tender = 6-18 month deployment window. Triggers DWDM + ROADM + coherent transceiver demand (100G/400G ZR). TED database: TED-OJ.', + 0.75, + ARRAY['DWDM', '100G', '400G ZR', 'Coherent', 'ROADM'], + 'monitor', + 18, + 'EU TED / Connecting Europe Facility', + '2026-01-15'::TIMESTAMPTZ +) +ON CONFLICT DO NOTHING; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 9. Seed: Known lifecycle events +-- ───────────────────────────────────────────────────────────────────────────── +INSERT INTO product_lifecycle_events (event_type, title, description, technology, effective_date, source_name, impact_level, buy_signal) VALUES +( + 'eol_announced', + 'Cisco SFP-10G-LR — EOL announced, EOS 2027-06-30', + 'Cisco Product Bulletin: SFP-10G-LR (CS-SFPHLX10G-LR) enters End of Sale 2026-06-30, End of Support 2027-06-30. Customers must migrate to SFP-10G-LR-S or compatible alternatives. Hortungs-Rush expected Q1-Q2 2026.', + '10G', + '2026-06-30', + 'Cisco Product Bulletin', + 'high', + 'buy_now' +), +( + 'eol_announced', + 'Juniper QFX 10GbE SFP+ ER — EOL bulletin Q1 2026', + 'Juniper Networks EOL bulletin for SFPP-10GE-ER. End of Engineering 2026-06-01. Last time order date 2026-09-01. Customers should evaluate EX-SFP-10GE-ER-S alternatives.', + '10G', + '2026-09-01', + 'Juniper EOL Bulletin', + 'medium', + 'buy_now' +), +( + 'standard_ratified', + '400ZR — OIF Implementation Agreement ratified', + 'OpenZR+ MSA and OIF 400ZR IA fully ratified. Multi-vendor interoperability confirmed at Interop events. Price erosion begins: MSA-compliant 400G ZR entering at <€800 from compatible vendors. OEM premium shrinking.', + '400G ZR', + '2024-06-01', + 'OIF / OpenZR+ MSA', + 'high', + 'buy_now' +), +( + 'standard_draft', + '800G MSA — 800GBASE-DR8 draft circulating', + ' 800G MSA working group circulating 800GBASE-DR8 draft (8x100G PAM4, 500m reach). Expected ratification Q3 2026. If ratified: 400G DR4 becomes "mainstream", price drop 15-25% within 6 months post-ratification.', + '800G', + '2026-09-01', + '800G MSA Working Group', + 'medium', + 'hold' +) +ON CONFLICT DO NOTHING; + +-- ───────────────────────────────────────────────────────────────────────────── +-- Done +-- ───────────────────────────────────────────────────────────────────────────── +SELECT + (SELECT COUNT(*) FROM stock_snapshots) AS stock_snapshots, + (SELECT COUNT(*) FROM abc_classification) AS abc_entries, + (SELECT COUNT(*) FROM reorder_signals) AS reorder_signals, + (SELECT COUNT(*) FROM product_lifecycle_events) AS lifecycle_events, + (SELECT COUNT(*) FROM market_intelligence) AS market_intel_entries, + (SELECT COUNT(*) FROM crawler_llm_log) AS crawler_log_entries;