diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts
index a3e8fab..fbb6d96 100644
--- a/packages/api/src/index.ts
+++ b/packages/api/src/index.ts
@@ -20,6 +20,7 @@ import { transportRouter } from "./routes/transport";
import { datasheetRouter } from "./routes/datasheets";
import { hotTopicsRouter } from "./routes/hot-topics";
import { adoptionRouter } from "./routes/adoption";
+import { procurementRouter } from "./routes/procurement";
const app = express();
@@ -56,6 +57,7 @@ app.use("/api/transport", transportRouter);
app.use("/api/datasheets", datasheetRouter);
app.use("/api/adoption", adoptionRouter);
app.use("/api/hot-topics", hotTopicsRouter);
+app.use("/api/procurement", procurementRouter);
// Dashboard (static HTML)
app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard")));
diff --git a/packages/api/src/routes/procurement.ts b/packages/api/src/routes/procurement.ts
new file mode 100644
index 0000000..0f03c9a
--- /dev/null
+++ b/packages/api/src/routes/procurement.ts
@@ -0,0 +1,293 @@
+/**
+ * WS0c: Procurement Intelligence API
+ *
+ * Endpoints:
+ * GET /api/procurement/overview — Dashboard summary
+ * GET /api/procurement/signals — Active reorder signals
+ * GET /api/procurement/signals/:id — Signal for a specific transceiver
+ * GET /api/procurement/abc — ABC classification list
+ * GET /api/procurement/market-intel — Market intelligence events
+ * GET /api/procurement/stock-trends/:id — Stock history for a transceiver
+ * GET /api/procurement/lifecycle — Lifecycle events (EOL, standards)
+ */
+import { Router, Request, Response } from "express";
+import { pool } from "../db/client";
+
+export const procurementRouter = Router();
+
+// ─────────────────────────────────────────────────────────────────────────────
+// GET /api/procurement/overview
+// ─────────────────────────────────────────────────────────────────────────────
+procurementRouter.get("/overview", async (_req: Request, res: Response) => {
+ try {
+ const [signals, abc, intel, lifecycle] = await Promise.all([
+ pool.query(`
+ SELECT signal, COUNT(*) AS count
+ FROM reorder_signals
+ WHERE expires_at > NOW()
+ AND computed_at = (SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = reorder_signals.transceiver_id)
+ GROUP BY signal
+ `),
+ pool.query(`
+ SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class
+ `),
+ pool.query(`
+ SELECT intel_type, buy_signal_implication, COUNT(*) AS count
+ FROM market_intelligence
+ WHERE created_at > NOW() - INTERVAL '90 days'
+ GROUP BY intel_type, buy_signal_implication
+ ORDER BY count DESC
+ LIMIT 10
+ `),
+ pool.query(`
+ SELECT event_type, impact_level, COUNT(*) AS count
+ FROM product_lifecycle_events
+ WHERE created_at > NOW() - INTERVAL '180 days'
+ GROUP BY event_type, impact_level
+ ORDER BY count DESC
+ `),
+ ]);
+
+ res.json({
+ signals_summary: signals.rows,
+ abc_summary: abc.rows,
+ market_intel_summary: intel.rows,
+ lifecycle_summary: lifecycle.rows,
+ });
+ } catch (err) {
+ console.error("Procurement overview error:", err);
+ res.status(500).json({ error: "Internal server error" });
+ }
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// GET /api/procurement/signals?signal=buy_now&abc_class=A&limit=50&offset=0
+// ─────────────────────────────────────────────────────────────────────────────
+procurementRouter.get("/signals", async (req: Request, res: Response) => {
+ try {
+ const {
+ signal, abc_class, form_factor, speed_gbps,
+ limit = "50", offset = "0"
+ } = req.query;
+
+ let sql = `
+ SELECT rs.*,
+ t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
+ t.reach_label, t.image_url, t.image_r2_key,
+ ac.abc_class, ac.demand_score, ac.supply_risk,
+ v.name AS vendor_name
+ FROM reorder_signals rs
+ JOIN transceivers t ON rs.transceiver_id = t.id
+ LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
+ LEFT JOIN vendors v ON t.vendor_id = v.id
+ WHERE rs.expires_at > NOW()
+ AND rs.computed_at = (
+ SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id
+ )
+ `;
+ const params: any[] = [];
+ let idx = 1;
+
+ if (signal) { sql += ` AND rs.signal = $${idx}`; params.push(signal); idx++; }
+ if (abc_class) { sql += ` AND ac.abc_class = $${idx}`; params.push(abc_class); idx++; }
+ if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; }
+ if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; }
+
+ sql += ` ORDER BY rs.signal_strength DESC LIMIT $${idx} OFFSET $${idx + 1}`;
+ params.push(parseInt(limit as string), parseInt(offset as string));
+
+ const result = await pool.query(sql, params);
+ res.json({ data: result.rows, total: result.rowCount });
+ } catch (err) {
+ console.error("Signals error:", err);
+ res.status(500).json({ error: "Internal server error" });
+ }
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// GET /api/procurement/signals/:transceiver_id
+// ─────────────────────────────────────────────────────────────────────────────
+procurementRouter.get("/signals/:id", async (req: Request, res: Response) => {
+ try {
+ const { id } = req.params;
+
+ const [signal, stockHistory, priceHistory, lifecycle] = await Promise.all([
+ pool.query(`
+ SELECT rs.*, ac.abc_class, ac.demand_score, ac.supply_risk
+ FROM reorder_signals rs
+ LEFT JOIN abc_classification ac ON ac.transceiver_id = rs.transceiver_id
+ WHERE rs.transceiver_id::text = $1
+ ORDER BY rs.computed_at DESC LIMIT 1
+ `, [id]),
+
+ pool.query(`
+ SELECT ss.stock_level, ss.stock_quantity, ss.incoming_quantity,
+ ss.incoming_eta, ss.lead_time_days, ss.moq, ss.price_breaks,
+ ss.scraped_at, ss.crawler_confidence,
+ v.name AS vendor_name
+ FROM stock_snapshots ss
+ JOIN vendors v ON ss.vendor_id = v.id
+ WHERE ss.transceiver_id::text = $1
+ ORDER BY ss.scraped_at DESC LIMIT 50
+ `, [id]),
+
+ pool.query(`
+ SELECT po.price, po.currency, po.time,
+ v.name AS vendor_name
+ FROM price_observations po
+ JOIN vendors v ON po.source_vendor_id = v.id
+ WHERE po.transceiver_id::text = $1
+ ORDER BY po.time DESC LIMIT 30
+ `, [id]),
+
+ pool.query(`
+ SELECT * FROM product_lifecycle_events
+ WHERE transceiver_id::text = $1
+ ORDER BY effective_date ASC NULLS LAST, created_at DESC
+ `, [id]),
+ ]);
+
+ res.json({
+ signal: signal.rows[0] || null,
+ stock_history: stockHistory.rows,
+ price_history: priceHistory.rows,
+ lifecycle_events: lifecycle.rows,
+ });
+ } catch (err) {
+ console.error("Signal detail error:", err);
+ res.status(500).json({ error: "Internal server error" });
+ }
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// GET /api/procurement/abc?class=A&form_factor=QSFP28
+// ─────────────────────────────────────────────────────────────────────────────
+procurementRouter.get("/abc", async (req: Request, res: Response) => {
+ try {
+ const { class: cls, form_factor, speed_gbps, limit = "100", offset = "0" } = req.query;
+
+ let sql = `
+ SELECT ac.*,
+ t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
+ t.reach_label, t.image_url,
+ v.name AS vendor_name,
+ rs.signal, rs.signal_strength
+ FROM abc_classification ac
+ JOIN transceivers t ON ac.transceiver_id = t.id
+ LEFT JOIN vendors v ON t.vendor_id = v.id
+ LEFT JOIN LATERAL (
+ SELECT signal, signal_strength FROM reorder_signals
+ WHERE transceiver_id = ac.transceiver_id AND expires_at > NOW()
+ ORDER BY computed_at DESC LIMIT 1
+ ) rs ON true
+ WHERE 1=1
+ `;
+ const params: any[] = [];
+ let idx = 1;
+
+ if (cls) { sql += ` AND ac.abc_class = $${idx}`; params.push(cls); idx++; }
+ if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; }
+ if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; }
+
+ sql += ` ORDER BY ac.abc_class, ac.demand_score DESC LIMIT $${idx} OFFSET $${idx + 1}`;
+ params.push(parseInt(limit as string), parseInt(offset as string));
+
+ const result = await pool.query(sql, params);
+ res.json({ data: result.rows, total: result.rowCount });
+ } catch (err) {
+ console.error("ABC error:", err);
+ res.status(500).json({ error: "Internal server error" });
+ }
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// GET /api/procurement/market-intel?type=&days=90&signal=buy_now
+// ─────────────────────────────────────────────────────────────────────────────
+procurementRouter.get("/market-intel", async (req: Request, res: Response) => {
+ try {
+ const {
+ type, days = "90", signal, technology,
+ limit = "50", offset = "0"
+ } = req.query;
+
+ let sql = `
+ SELECT * FROM market_intelligence
+ WHERE created_at > NOW() - INTERVAL '1 day' * $1
+ `;
+ const params: any[] = [parseInt(days as string)];
+ let idx = 2;
+
+ if (type) { sql += ` AND intel_type = $${idx}`; params.push(type); idx++; }
+ if (signal) { sql += ` AND buy_signal_implication = $${idx}`; params.push(signal); idx++; }
+ if (technology) { sql += ` AND $${idx} = ANY(technologies)`; params.push(technology); idx++; }
+
+ sql += ` ORDER BY relevance_score DESC, created_at DESC LIMIT $${idx} OFFSET $${idx + 1}`;
+ params.push(parseInt(limit as string), parseInt(offset as string));
+
+ const result = await pool.query(sql, params);
+ res.json({ data: result.rows, total: result.rowCount });
+ } catch (err) {
+ console.error("Market intel error:", err);
+ res.status(500).json({ error: "Internal server error" });
+ }
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// GET /api/procurement/stock-trends/:transceiver_id
+// ─────────────────────────────────────────────────────────────────────────────
+procurementRouter.get("/stock-trends/:id", async (req: Request, res: Response) => {
+ try {
+ const result = await pool.query(`
+ SELECT DISTINCT ON (ss.vendor_id, date_trunc('day', ss.scraped_at))
+ ss.stock_level, ss.stock_quantity, ss.incoming_quantity,
+ ss.incoming_eta, ss.lead_time_days, ss.scraped_at,
+ v.name AS vendor_name
+ FROM stock_snapshots ss
+ JOIN vendors v ON ss.vendor_id = v.id
+ WHERE ss.transceiver_id::text = $1
+ ORDER BY ss.vendor_id, date_trunc('day', ss.scraped_at) DESC, ss.scraped_at DESC
+ LIMIT 200
+ `, [req.params.id]);
+
+ res.json({ data: result.rows });
+ } catch (err) {
+ console.error("Stock trends error:", err);
+ res.status(500).json({ error: "Internal server error" });
+ }
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// GET /api/procurement/lifecycle?type=eol_announced&impact=high&days=180
+// ─────────────────────────────────────────────────────────────────────────────
+procurementRouter.get("/lifecycle", async (req: Request, res: Response) => {
+ try {
+ const {
+ type, impact, technology, signal,
+ days = "180", limit = "50"
+ } = req.query;
+
+ let sql = `
+ SELECT ple.*,
+ t.part_number, t.standard_name, t.form_factor, t.speed_gbps
+ FROM product_lifecycle_events ple
+ LEFT JOIN transceivers t ON ple.transceiver_id = t.id
+ WHERE ple.created_at > NOW() - INTERVAL '1 day' * $1
+ `;
+ const params: any[] = [parseInt(days as string)];
+ let idx = 2;
+
+ if (type) { sql += ` AND ple.event_type = $${idx}`; params.push(type); idx++; }
+ if (impact) { sql += ` AND ple.impact_level = $${idx}`; params.push(impact); idx++; }
+ if (technology) { sql += ` AND ple.technology ILIKE $${idx}`; params.push(`%${technology}%`); idx++; }
+ if (signal) { sql += ` AND ple.buy_signal = $${idx}`; params.push(signal); idx++; }
+
+ sql += ` ORDER BY ple.impact_level DESC, ple.effective_date ASC NULLS LAST, ple.created_at DESC LIMIT $${idx}`;
+ params.push(parseInt(limit as string));
+
+ const result = await pool.query(sql, params);
+ res.json({ data: result.rows });
+ } catch (err) {
+ console.error("Lifecycle error:", err);
+ res.status(500).json({ error: "Internal server error" });
+ }
+});
diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html
index 9e5b183..5cd7449 100644
--- a/packages/dashboard/index.html
+++ b/packages/dashboard/index.html
@@ -639,6 +639,52 @@
.compare-diff { background: var(--yellow-light); }
.compare-best { background: var(--green-light); font-weight: 600; }
.compare-cb { width: 16px; height: 16px; cursor: pointer; accent-color: var(--purple); }
+
+ /* === PROCUREMENT TAB === */
+ .proc-btn {
+ background: var(--surface2); border: 1px solid var(--border);
+ padding: 5px 14px; border-radius: 6px; cursor: pointer;
+ font-size: 0.78rem; font-weight: 600; color: var(--text-dim);
+ transition: all 0.15s;
+ }
+ .proc-btn:hover { color: var(--text); border-color: var(--accent); }
+ .proc-btn-active { background: var(--accent); color: #fff !important; border-color: var(--accent) !important; }
+
+ .signal-card {
+ background: var(--surface); border: 1px solid var(--border);
+ border-radius: var(--radius-lg); padding: 1rem;
+ box-shadow: var(--shadow-card); position: relative;
+ }
+ .signal-card:hover { box-shadow: var(--shadow-hover); }
+ .signal-buy { border-left: 3px solid #c1121f; }
+ .signal-wait { border-left: 3px solid var(--yellow); }
+ .signal-hold { border-left: 3px solid var(--green); }
+ .signal-monitor { border-left: 3px solid var(--purple); }
+
+ .sig-badge-buy { background:#fde8e8; color:#c1121f; }
+ .sig-badge-wait { background:var(--yellow-light); color:#a06000; }
+ .sig-badge-hold { background:var(--green-light); color:#1b4332; }
+ .sig-badge-monitor { background:var(--purple-light); color:#5a3fcf; }
+
+ .intel-card {
+ background: var(--surface); border: 1px solid var(--border);
+ border-radius: var(--radius-lg); padding: 1rem;
+ box-shadow: var(--shadow-card);
+ }
+ .intel-badge {
+ display: inline-block; padding: 2px 8px; border-radius: 4px;
+ font-size: 0.65rem; font-weight: 700; text-transform: uppercase;
+ letter-spacing: 0.06em; margin-bottom: 0.5rem;
+ }
+ .intel-buy { background:#fde8e8; color:#c1121f; }
+ .intel-wait { background:var(--yellow-light); color:#a06000; }
+ .intel-hold { background:var(--green-light); color:#1b4332; }
+ .intel-monitor { background:var(--purple-light); color:#5a3fcf; }
+ .intel-none { background:var(--surface2); color:var(--text-dim); }
+
+ .abc-a { background:#fde8e8; color:#c1121f; font-weight:800; padding:2px 7px; border-radius:4px; }
+ .abc-b { background:var(--yellow-light); color:#a06000; font-weight:800; padding:2px 7px; border-radius:4px; }
+ .abc-c { background:var(--surface2); color:var(--text-dim); font-weight:800; padding:2px 7px; border-radius:4px; }
@@ -680,6 +726,7 @@
News
Finder
Blog Engine
+ Procurement Intel
@@ -905,6 +952,75 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Loading reorder signals...
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ | Class |
+ Product |
+ Form Factor |
+ Demand Score |
+ Compat. |
+ Vendors |
+ Supply Risk |
+ Signal |
+
+ | Loading... |
+
+
+
+
+
+
+
+
Loading market intelligence...
+
+
+
+
+
+
+
Loading lifecycle events...
+
+
+
+
+
@@ -1221,6 +1337,7 @@ function goToTab(tabName) {
if (tabName === 'news') loadNews();
if (tabName === 'blog') loadBlogDrafts();
if (tabName === 'finder') document.getElementById('finder-switch-input').focus();
+ if (tabName === 'procurement') loadProcurement();
}
document.querySelectorAll('.tab').forEach(function(tab) {
@@ -2897,6 +3014,210 @@ el('compare-overlay').addEventListener('click', function(e) {
if (e.target === this) this.classList.remove('visible');
});
+// ─── PROCUREMENT INTEL ───────────────────────────────────────────────────────
+
+var procCurrentSignalFilter = '';
+var procCurrentAbcFilter = '';
+var procSignalsData = [];
+var procAbcData = [];
+
+function showProcSection(name) {
+ ['signals','abc','market','lifecycle'].forEach(function(s) {
+ var sec = el('proc-section-' + s);
+ var btn = el('proc-btn-' + s);
+ if (sec) sec.style.display = s === name ? '' : 'none';
+ if (btn) { btn.classList.toggle('proc-btn-active', s === name); }
+ });
+}
+
+async function loadProcurement() {
+ await Promise.all([
+ loadProcSignals(),
+ loadProcAbc(),
+ loadProcMarketIntel(),
+ loadProcLifecycle(),
+ ]);
+}
+
+async function loadProcSignals() {
+ var container = el('proc-signals-grid');
+ container.innerHTML = 'Loading signals...
';
+ try {
+ var d = await api('/api/procurement/signals?limit=100');
+ procSignalsData = d.data || [];
+ renderSignals(procCurrentSignalFilter);
+ } catch(e) {
+ container.innerHTML = 'No reorder signals yet — run the scraper to populate.
';
+ }
+}
+
+function filterSignal(sig) {
+ procCurrentSignalFilter = sig;
+ renderSignals(sig);
+}
+
+function renderSignals(filterSig) {
+ var data = filterSig ? procSignalsData.filter(function(r) { return r.signal === filterSig; }) : procSignalsData;
+ var container = el('proc-signals-grid');
+ if (!data.length) {
+ container.innerHTML = 'No signals for this filter.
';
+ return;
+ }
+ var signalIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' };
+ var signalLabel = { buy_now:'Buy Now', wait:'Wait', hold:'Hold', monitor:'Monitor' };
+ container.innerHTML = data.map(function(r) {
+ var reasons = [];
+ try { reasons = JSON.parse(r.reasons || '[]'); } catch(e) {}
+ var sigClass = 'signal-' + (r.signal || 'monitor').replace('_','-');
+ var badgeClass = 'sig-badge-' + (r.signal || 'monitor').replace('_now','').replace('_','');
+ var abcBadge = r.abc_class ? '' + r.abc_class + '' : '';
+ var strengthPct = Math.round((r.signal_strength || 0) * 100);
+ var productName = r.standard_name || r.part_number || r.slug || '—';
+ var imgHtml = '';
+ if (r.image_r2_key) {
+ imgHtml = '
';
+ }
+ return ''
+ + '
'
+ + imgHtml
+ + '
'
+ + '
' + esc(productName) + '
'
+ + '
' + esc(r.form_factor || '') + (r.speed_gbps ? ' · ' + r.speed_gbps + 'G' : '') + (r.vendor_name ? ' · ' + esc(r.vendor_name) : '') + '
'
+ + '
'
+ + '
'
+ + '
'
+ + '' + (signalIcon[r.signal] || '') + ' ' + (signalLabel[r.signal] || r.signal) + ''
+ + abcBadge
+ + (r.supply_risk ? '' + esc(r.supply_risk) + ' risk' : '')
+ + '
'
+ + '
'
+ + (reasons.length ? reasons.map(function(r2) { return '→ ' + esc(r2); }).join('
') : 'Insufficient data')
+ + '
'
+ + '
'
+ + (r.stock_trend ? 'Stock: ' + r.stock_trend + '' : '')
+ + (r.price_trend ? 'Price: ' + r.price_trend + '' : '')
+ + (r.lead_time_weeks ? 'Lead: ' + r.lead_time_weeks + 'w' : '')
+ + '
'
+ + '
'
+ + '
Signal strength: ' + strengthPct + '%
'
+ + '
';
+ }).join('');
+}
+
+async function loadProcAbc() {
+ try {
+ var d = await api('/api/procurement/abc?limit=200');
+ procAbcData = d.data || [];
+ renderAbcTable(procCurrentAbcFilter);
+ } catch(e) {
+ el('abc-tbody').innerHTML = '| No ABC data yet — run compute:abc job. |
';
+ }
+}
+
+function filterAbc(cls) {
+ procCurrentAbcFilter = cls;
+ renderAbcTable(cls);
+}
+
+function renderAbcTable(filterCls) {
+ var data = filterCls ? procAbcData.filter(function(r) { return r.abc_class === filterCls; }) : procAbcData;
+ var sigIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' };
+ el('abc-tbody').innerHTML = data.map(function(r) {
+ var abcEl = '' + (r.abc_class || '—') + '';
+ return ''
+ + '| ' + abcEl + ' | '
+ + '' + esc(r.standard_name || r.part_number || '—') + ' ' + esc(r.vendor_name || '') + ' | '
+ + '' + esc(r.form_factor || '—') + ' | '
+ + '' + (r.demand_score ? parseFloat(r.demand_score).toFixed(0) : '—') + ' | '
+ + '' + (r.compat_count || 0) + ' | '
+ + '' + (r.vendor_count || 0) + ' | '
+ + '' + esc(r.supply_risk || '—') + ' | '
+ + '' + (r.signal ? (sigIcon[r.signal] || '') + ' ' + r.signal.replace('_',' ') : '—') + ' | '
+ + '
';
+ }).join('') || '| No data for this filter. |
';
+}
+
+async function loadProcMarketIntel() {
+ var container = el('proc-market-grid');
+ try {
+ var d = await api('/api/procurement/market-intel?days=180&limit=50');
+ var items = d.data || [];
+ if (!items.length) {
+ container.innerHTML = 'No market intelligence yet.
';
+ return;
+ }
+ var typeIcon = {
+ capex_cycle:'💰', trade_show:'🎪', standard_ratified:'📋',
+ standard_draft:'📝', distributor_lead_time:'🚚', supply_chain:'🏭', tender:'📑'
+ };
+ container.innerHTML = items.map(function(item) {
+ var sig = item.buy_signal_implication || 'none';
+ var badgeClass = 'intel-' + sig.replace('_now','').replace('_','');
+ var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor', none:'—' };
+ var techs = (item.technologies || []).map(function(t) {
+ return '' + esc(t) + '';
+ }).join(' ');
+ return ''
+ + '
'
+ + '
' + (typeIcon[item.intel_type] || '📊') + ''
+ + '
'
+ + '
' + (sigLabel[sig] || sig) + ''
+ + '
' + esc(item.title) + '
'
+ + '
'
+ + '
' + esc(item.summary || '') + '
'
+ + (techs ? '
' + techs + '
' : '')
+ + '
'
+ + '' + esc(item.source_name) + ''
+ + (item.impact_horizon_months ? 'Impact: ~' + item.impact_horizon_months + ' months' : '')
+ + '
'
+ + '
';
+ }).join('');
+ } catch(e) {
+ container.innerHTML = 'Could not load market intelligence.
';
+ }
+}
+
+async function loadProcLifecycle() {
+ var container = el('proc-lifecycle-grid');
+ try {
+ var d = await api('/api/procurement/lifecycle?days=365&limit=50');
+ var items = d.data || [];
+ if (!items.length) {
+ container.innerHTML = 'No lifecycle events yet.
';
+ return;
+ }
+ var typeIcon = {
+ eol_announced:'⛔', eol_effective:'🚫', standard_ratified:'✅',
+ standard_draft:'📝', capex_peak:'💰', trade_show:'🎪',
+ supply_risk:'⚠️', tender:'📑', price_floor:'📉'
+ };
+ var impactColor = { critical:'#c1121f', high:'#c1121f', medium:'var(--yellow)', low:'var(--green)' };
+ var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor' };
+ container.innerHTML = items.map(function(item) {
+ var ic = impactColor[item.impact_level] || 'var(--text-dim)';
+ var productInfo = item.part_number ? esc(item.part_number) + (item.form_factor ? ' · ' + esc(item.form_factor) : '') : '';
+ var dateStr = item.effective_date ? new Date(item.effective_date).toLocaleDateString('de-DE') : '';
+ return ''
+ + '
'
+ + '
' + (typeIcon[item.event_type] || '📌') + ''
+ + '
'
+ + (item.buy_signal ? '
' + (sigLabel[item.buy_signal] || item.buy_signal) + '' : '')
+ + '
' + esc(item.title) + '
'
+ + '
'
+ + (item.description ? '
' + esc(item.description.substring(0, 200)) + (item.description.length > 200 ? '…' : '') + '
' : '')
+ + '
'
+ + '' + esc(item.source_name || '') + (productInfo ? ' · ' + productInfo : '') + ''
+ + (dateStr ? '' + dateStr + '' : '')
+ + '
'
+ + '
';
+ }).join('');
+ } catch(e) {
+ container.innerHTML = 'Could not load lifecycle events.
';
+ }
+}
+
// INIT
loadOverview();
diff --git a/packages/scraper/src/crawler-llm/core.ts b/packages/scraper/src/crawler-llm/core.ts
new file mode 100644
index 0000000..1622457
--- /dev/null
+++ b/packages/scraper/src/crawler-llm/core.ts
@@ -0,0 +1,349 @@
+/**
+ * Crawler LLM — Core extraction engine.
+ *
+ * Uses Ollama (local LLM) to extract structured product data from HTML.
+ * Two-stage pipeline:
+ * 1. Page type detection (product vs. category) — cheap, fast
+ * 2. Structured data extraction with schema enforcement
+ *
+ * Vendor-specific profiles guide the LLM without hard-coding selectors.
+ */
+
+import { pool } from "../utils/db";
+import type { StockExtractionResult, MarketIntelExtractionResult } from "./stock-schema";
+import { VENDOR_PROFILES } from "./stock-schema";
+import { validateStockExtraction } from "./validator";
+
+const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://192.168.178.169:11434";
+const OLLAMA_MODEL = process.env.CRAWLER_LLM_MODEL || "qwen2.5:14b";
+const MAX_HTML_CHARS = 12_000; // truncate to keep prompt manageable
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Ollama API call
+// ─────────────────────────────────────────────────────────────────────────────
+
+async function ollamaGenerate(prompt: string): Promise {
+ const res = await fetch(`${OLLAMA_HOST}/api/generate`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ model: OLLAMA_MODEL,
+ prompt,
+ stream: false,
+ format: "json",
+ options: { temperature: 0.1, num_predict: 1024 },
+ }),
+ });
+ if (!res.ok) throw new Error(`Ollama error: ${res.status} ${await res.text()}`);
+ const data = await res.json() as { response: string };
+ return data.response;
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Stage 1: Page type detection (fast, binary)
+// ─────────────────────────────────────────────────────────────────────────────
+
+async function detectPageType(html: string, url: string, vendorSlug?: string): Promise<{
+ is_product_page: boolean;
+ confidence: number;
+ evidence: string;
+}> {
+ const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null;
+ const hints = profile
+ ? `\nVendor hints — Product page signals: ${profile.product_page_signals.join(", ")}. Category page signals: ${profile.category_page_signals.join(", ")}.`
+ : "";
+
+ const prompt = `You are a web scraper assistant. Determine if this HTML is a single product page or a category/listing page.
+
+URL: ${url}${hints}
+
+HTML (truncated):
+${html.substring(0, 3000)}
+
+Respond with JSON only:
+{
+ "is_product_page": true or false,
+ "confidence": 0.0 to 1.0,
+ "evidence": "brief quote from the HTML that supports your decision"
+}`;
+
+ const raw = await ollamaGenerate(prompt);
+ try {
+ const parsed = JSON.parse(raw);
+ return {
+ is_product_page: Boolean(parsed.is_product_page),
+ confidence: Number(parsed.confidence) || 0,
+ evidence: String(parsed.evidence || ""),
+ };
+ } catch {
+ return { is_product_page: false, confidence: 0, evidence: "JSON parse failed" };
+ }
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Stage 2: Full product extraction
+// ─────────────────────────────────────────────────────────────────────────────
+
+async function extractProductData(
+ html: string,
+ url: string,
+ vendorSlug?: string
+): Promise {
+ const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null;
+ const hints = profile ? `
+Vendor: ${profile.name} (${profile.currency})
+Price hint: ${profile.price_hint || "find the main selling price"}
+Stock hint: ${profile.stock_hint || "find availability status"}` : "";
+
+ const prompt = `You are a product data extractor for optical transceiver products. Extract structured data from this product page HTML.
+
+URL: ${url}${hints}
+
+HTML (truncated to ${MAX_HTML_CHARS} chars):
+${html.substring(0, MAX_HTML_CHARS)}
+
+Extract and respond with JSON only — use null for any field you cannot find with confidence:
+{
+ "is_product_page": true,
+ "confidence": 0.0 to 1.0,
+ "source_evidence": "brief quote from HTML supporting your extraction",
+
+ "price": number or null,
+ "currency": "USD" or "EUR" or "GBP" or "CNY" or null,
+ "price_breaks": [{"qty": number, "price": number}] or [],
+
+ "stock_level": "in_stock" or "out_of_stock" or "limited" or "unknown",
+ "stock_quantity": number or null,
+ "incoming_quantity": number or null,
+ "incoming_eta": "YYYY-MM-DD" or null,
+ "lead_time_days": number or null,
+ "moq": number or null,
+
+ "part_number": "exact part number string" or null,
+ "standard_name": "manufacturer's exact product name as written on the page" or null,
+ "form_factor": "SFP+" or "QSFP28" or "QSFP-DD" etc or null,
+ "speed_gbps": number or null
+}
+
+Rules:
+- standard_name MUST be the manufacturer's exact product designation, not a generic description
+- If you see "All Optical Transceivers" or similar category text as the name, set standard_name to null
+- price_breaks only if there is a visible quantity/price table
+- incoming_quantity: look for text like "X units incoming", "X im Zulauf", "Expected: X"
+- Set confidence < 0.5 if you are guessing`;
+
+ const raw = await ollamaGenerate(prompt);
+ try {
+ const parsed = JSON.parse(raw);
+ return {
+ is_product_page: Boolean(parsed.is_product_page ?? true),
+ confidence: Number(parsed.confidence) || 0,
+ source_evidence: String(parsed.source_evidence || ""),
+ price: parsed.price != null ? Number(parsed.price) : null,
+ currency: parsed.currency || null,
+ price_breaks: Array.isArray(parsed.price_breaks) ? parsed.price_breaks : [],
+ stock_level: (["in_stock", "out_of_stock", "limited"].includes(parsed.stock_level))
+ ? parsed.stock_level
+ : "unknown",
+ stock_quantity: parsed.stock_quantity != null ? Number(parsed.stock_quantity) : null,
+ incoming_quantity: parsed.incoming_quantity != null ? Number(parsed.incoming_quantity) : null,
+ incoming_eta: parsed.incoming_eta || null,
+ lead_time_days: parsed.lead_time_days != null ? Number(parsed.lead_time_days) : null,
+ moq: parsed.moq != null ? Number(parsed.moq) : null,
+ part_number: parsed.part_number || null,
+ standard_name: parsed.standard_name || null,
+ form_factor: parsed.form_factor || null,
+ speed_gbps: parsed.speed_gbps != null ? Number(parsed.speed_gbps) : null,
+ };
+ } catch {
+ return {
+ is_product_page: false,
+ confidence: 0,
+ source_evidence: "JSON parse failed",
+ price: null, currency: null, price_breaks: [],
+ stock_level: "unknown",
+ stock_quantity: null, incoming_quantity: null, incoming_eta: null,
+ lead_time_days: null, moq: null,
+ part_number: null, standard_name: null, form_factor: null, speed_gbps: null,
+ };
+ }
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Market intelligence extraction
+// ─────────────────────────────────────────────────────────────────────────────
+
+export async function extractMarketIntel(
+ text: string,
+ url: string,
+ sourceName: string
+): Promise {
+ const prompt = `You are an optical transceiver market analyst. Analyze this text for market intelligence relevant to transceiver procurement.
+
+Source: ${sourceName}
+URL: ${url}
+
+Text:
+${text.substring(0, 8000)}
+
+Respond with JSON only:
+{
+ "is_relevant": true or false (false if nothing relevant to transceiver markets),
+ "confidence": 0.0 to 1.0,
+ "source_evidence": "brief quote supporting your analysis",
+
+ "intel_type": one of: "capex_cycle", "trade_show", "standard_ratified", "standard_draft", "distributor_lead_time", "supply_chain", "tender",
+ "title": "concise title (max 100 chars)",
+ "summary": "2-3 sentence summary of the key insight",
+ "technologies": ["400G", "QSFP-DD", etc — transceiver technologies mentioned],
+ "buy_signal_implication": one of: "buy_now", "wait", "hold", "monitor", "none",
+ "impact_horizon_months": estimated months until this affects the market (number),
+ "published_at": "YYYY-MM-DD" or null
+}
+
+Guidelines:
+- buy_now: shortage, EOL, CapEx surge → order before prices rise
+- wait: new standard coming → current products will drop in price
+- hold: stable market, no urgency
+- monitor: interesting but unclear impact
+- impact_horizon_months: 0-3 for immediate, 3-12 for medium, 12+ for long-term`;
+
+ const raw = await ollamaGenerate(prompt);
+ try {
+ const p = JSON.parse(raw);
+ return {
+ is_relevant: Boolean(p.is_relevant),
+ confidence: Number(p.confidence) || 0,
+ source_evidence: String(p.source_evidence || ""),
+ intel_type: p.intel_type || "supply_chain",
+ title: String(p.title || "").substring(0, 200),
+ summary: String(p.summary || ""),
+ technologies: Array.isArray(p.technologies) ? p.technologies : [],
+ buy_signal_implication: p.buy_signal_implication || "none",
+ impact_horizon_months: Number(p.impact_horizon_months) || 6,
+ published_at: p.published_at || null,
+ };
+ } catch {
+ return {
+ is_relevant: false, confidence: 0, source_evidence: "parse error",
+ intel_type: "supply_chain", title: "", summary: "", technologies: [],
+ buy_signal_implication: "none", impact_horizon_months: 0, published_at: null,
+ };
+ }
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Public API — Main scrape function
+// ─────────────────────────────────────────────────────────────────────────────
+
+export interface CrawlerLLMResult {
+ extraction: StockExtractionResult;
+ validation_passed: boolean;
+ validation_errors: string[];
+ validation_warnings: string[];
+}
+
+export async function scrapeWithLLM(
+ html: string,
+ url: string,
+ options: {
+ vendorSlug?: string;
+ vendorId?: string;
+ transceiverIds?: string[]; // candidate matches (pre-filtered by form_factor/speed)
+ speedGbps?: number;
+ skipPageDetection?: boolean; // set true if URL is known product page
+ } = {}
+): Promise {
+ const { vendorSlug, speedGbps, skipPageDetection } = options;
+
+ // Stage 1: Page type detection (skip if caller already knows it's a product page)
+ if (!skipPageDetection) {
+ const pageType = await detectPageType(html, url, vendorSlug);
+ if (!pageType.is_product_page) {
+ return {
+ extraction: {
+ is_product_page: false,
+ confidence: pageType.confidence,
+ source_evidence: pageType.evidence,
+ price: null, currency: null, price_breaks: [],
+ stock_level: "unknown",
+ stock_quantity: null, incoming_quantity: null, incoming_eta: null,
+ lead_time_days: null, moq: null,
+ part_number: null, standard_name: null, form_factor: null, speed_gbps: null,
+ },
+ validation_passed: false,
+ validation_errors: ["Not a product page"],
+ validation_warnings: [],
+ };
+ }
+ }
+
+ // Stage 2: Full extraction
+ const extraction = await extractProductData(html, url, vendorSlug);
+
+ // Stage 3: Rule-based validation
+ const validation = validateStockExtraction(extraction, speedGbps);
+
+ return {
+ extraction,
+ validation_passed: validation.passed,
+ validation_errors: validation.errors,
+ validation_warnings: validation.warnings,
+ };
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Persist to DB — saves stock snapshot and logs the scrape
+// ─────────────────────────────────────────────────────────────────────────────
+
+export async function persistStockSnapshot(
+ result: CrawlerLLMResult,
+ url: string,
+ vendorId: string,
+ transceiverIds: string[]
+): Promise {
+ const { extraction, validation_passed } = result;
+
+ // Always log (for audit/debug)
+ await pool.query(
+ `INSERT INTO crawler_llm_log
+ (url, vendor_id, is_product_page, extracted_data, confidence, validation_passed,
+ failure_reason, model_used)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
+ [
+ url,
+ vendorId,
+ extraction.is_product_page,
+ JSON.stringify(extraction),
+ extraction.confidence,
+ validation_passed,
+ validation_passed ? null : result.validation_errors.join("; "),
+ OLLAMA_MODEL,
+ ]
+ );
+
+ if (!validation_passed || !extraction.is_product_page) return;
+
+ // Save stock snapshot for each matched transceiver
+ for (const transceiverIdStr of transceiverIds) {
+ await pool.query(
+ `INSERT INTO stock_snapshots
+ (transceiver_id, vendor_id, stock_level, stock_quantity, incoming_quantity,
+ incoming_eta, lead_time_days, moq, price_breaks, source_url, crawler_confidence)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)`,
+ [
+ transceiverIdStr,
+ vendorId,
+ extraction.stock_level,
+ extraction.stock_quantity,
+ extraction.incoming_quantity,
+ extraction.incoming_eta,
+ extraction.lead_time_days,
+ extraction.moq,
+ extraction.price_breaks.length > 0 ? JSON.stringify(extraction.price_breaks) : null,
+ url,
+ extraction.confidence,
+ ]
+ );
+ }
+}
diff --git a/packages/scraper/src/crawler-llm/stock-schema.ts b/packages/scraper/src/crawler-llm/stock-schema.ts
new file mode 100644
index 0000000..2ddfacb
--- /dev/null
+++ b/packages/scraper/src/crawler-llm/stock-schema.ts
@@ -0,0 +1,135 @@
+/**
+ * Crawler LLM — Schema definitions for structured product extraction.
+ *
+ * Every schema includes a `confidence` and `source_evidence` field so the LLM
+ * is forced to cite its work. This enables validation and debugging.
+ */
+
+export interface StockExtractionResult {
+ is_product_page: boolean; // false = category/listing page → discard
+ confidence: number; // 0.0 – 1.0 — LLM self-assessment
+ source_evidence: string; // which text passage the LLM used
+
+ // Pricing
+ price: number | null;
+ currency: "USD" | "EUR" | "GBP" | "CNY" | null;
+ price_breaks: PriceBreak[]; // volume discount tiers
+
+ // Stock
+ stock_level: "in_stock" | "out_of_stock" | "limited" | "unknown";
+ stock_quantity: number | null; // exact qty if shown
+ incoming_quantity: number | null; // "18 im Zulauf"
+ incoming_eta: string | null; // ISO date string "2026-04-15"
+ lead_time_days: number | null;
+ moq: number | null; // minimum order quantity
+
+ // Product identity (for cross-validation)
+ part_number: string | null;
+ standard_name: string | null; // manufacturer's exact product name
+ form_factor: string | null;
+ speed_gbps: number | null;
+}
+
+export interface PriceBreak {
+ qty: number;
+ price: number;
+}
+
+export interface MarketIntelExtractionResult {
+ is_relevant: boolean; // false = skip
+ confidence: number;
+ source_evidence: string;
+
+ intel_type: "capex_cycle" | "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender";
+ title: string;
+ summary: string;
+ technologies: string[]; // ['400G', 'QSFP-DD', ...]
+ buy_signal_implication: "buy_now" | "wait" | "hold" | "monitor" | "none";
+ impact_horizon_months: number;
+ published_at: string | null; // ISO date
+}
+
+/** Vendor-specific hints to improve LLM extraction accuracy */
+export interface VendorProfile {
+ slug: string;
+ name: string;
+ currency: "USD" | "EUR" | "GBP" | "CNY";
+ product_page_signals: string[]; // text patterns that indicate a product page
+ category_page_signals: string[]; // text patterns that indicate a category page
+ price_hint: string | null; // natural language hint for the LLM
+ stock_hint: string | null;
+ known_moq: number | null;
+}
+
+export const VENDOR_PROFILES: Record = {
+ "flexoptix": {
+ slug: "flexoptix",
+ name: "Flexoptix",
+ currency: "EUR",
+ product_page_signals: ["In den Warenkorb", "Add to Cart", "part number", "SKU:", "P/N:"],
+ category_page_signals: ["Alle Produkte", "Filter", "Ergebnisse", "products found"],
+ price_hint: "Price is shown in EUR, usually near 'In den Warenkorb' button. May show 'auf Anfrage' if not listed.",
+ stock_hint: "Look for 'auf Lager', 'Lieferzeit', 'sofort lieferbar', or stock badge near price.",
+ known_moq: 1,
+ },
+ "fs-com": {
+ slug: "fs-com",
+ name: "FS.com",
+ currency: "USD",
+ product_page_signals: ["Add to Cart", "Part No.", "SKU", "In Stock", "Reviews"],
+ category_page_signals: ["Products", "Filter by", "Sort by", "items found", "Category"],
+ price_hint: "Price is in USD, shown prominently near 'Add to Cart'. May show qty pricing table.",
+ stock_hint: "Look for 'In Stock', exact number like '847 In Stock', or 'Out of Stock'.",
+ known_moq: 1,
+ },
+ "10gtek": {
+ slug: "10gtek",
+ name: "10Gtek",
+ currency: "USD",
+ product_page_signals: ["Add to Cart", "Product Code:", "In Stock", "Ships from"],
+ category_page_signals: ["Shop All", "Filter", "Category", "Sort By"],
+ price_hint: "Price in USD near Add to Cart button. Volume pricing sometimes shown as table.",
+ stock_hint: "Stock level shown as text: 'In Stock', 'Low Stock', 'Out of Stock'.",
+ known_moq: 1,
+ },
+ "atgbics": {
+ slug: "atgbics",
+ name: "ATGBICS",
+ currency: "GBP",
+ product_page_signals: ["Add to Basket", "Part Number:", "Stock:", "Delivery"],
+ category_page_signals: ["Products", "Browse by", "Refine by"],
+ price_hint: "Price in GBP. ATGBICS uses Shopify, price is in a span with class 'price'.",
+ stock_hint: "Stock shown as 'In Stock', 'Limited Stock', or 'Out of Stock' near price.",
+ known_moq: 1,
+ },
+ "prolabs": {
+ slug: "prolabs",
+ name: "ProLabs",
+ currency: "USD",
+ product_page_signals: ["Add to Cart", "Part Number", "In Stock", "Specs"],
+ category_page_signals: ["Results", "Filter", "Category", "Sort"],
+ price_hint: "Price in USD. ProLabs may require login for prices — if so, mark price as null.",
+ stock_hint: "Stock availability shown near product title.",
+ known_moq: 1,
+ },
+ "farnell": {
+ slug: "farnell",
+ name: "Farnell",
+ currency: "EUR",
+ product_page_signals: ["Add to Basket", "Order Code:", "Stock:", "Lead Time:"],
+ category_page_signals: ["Products", "Refine Search", "Category", "results for"],
+ price_hint: "Price in EUR or GBP. Farnell shows break prices in a table with columns Qty/Price.",
+ stock_hint: "Stock shown as number, e.g. '47 In Stock'. Lead time shown in business days.",
+ known_moq: 1,
+ },
+ "mouser": {
+ slug: "mouser",
+ name: "Mouser Electronics",
+ currency: "EUR",
+ product_page_signals: ["Add to Cart", "Mouser Part No.", "Mfr. Part No.", "In Stock:"],
+ category_page_signals: ["Search Results", "Filter Results", "Products (", "Sort By"],
+ price_hint: "Mouser shows price per unit and break quantities. USD or EUR depending on locale.",
+ stock_hint: "Stock shown as exact number: 'In Stock: 124'. Lead time shown for out-of-stock items.",
+ known_moq: null,
+ },
+};
diff --git a/packages/scraper/src/crawler-llm/validator.ts b/packages/scraper/src/crawler-llm/validator.ts
new file mode 100644
index 0000000..6b4778c
--- /dev/null
+++ b/packages/scraper/src/crawler-llm/validator.ts
@@ -0,0 +1,157 @@
+/**
+ * Crawler LLM — Rule-based validator.
+ *
+ * Runs AFTER the LLM extraction to catch hallucinations and obvious errors.
+ * The LLM is good at structure; this catches range violations and nonsense.
+ */
+
+import type { StockExtractionResult } from "./stock-schema";
+
+export interface ValidationResult {
+ passed: boolean;
+ warnings: string[];
+ errors: string[];
+}
+
+/** Expected price ranges per speed class (USD/EUR). Rough but effective. */
+const PRICE_RANGES: Record = {
+ "1G": [10, 500],
+ "10G": [20, 2000],
+ "25G": [30, 2000],
+ "40G": [50, 3000],
+ "100G": [80, 15000],
+ "200G": [200, 20000],
+ "400G": [200, 50000],
+ "800G": [500, 80000],
+};
+
+const VALID_FORM_FACTORS = new Set([
+ "SFP", "SFP+", "SFP28", "SFP56", "SFP-DD",
+ "QSFP", "QSFP+", "QSFP28", "QSFP56", "QSFP-DD", "QSFP112",
+ "OSFP", "OSFP-RHS",
+ "CFP", "CFP2", "CFP4", "CFP8",
+ "XFP", "X2", "XENPAK",
+ "DSFP", "CSFP",
+]);
+
+const VALID_CURRENCIES = new Set(["USD", "EUR", "GBP", "CNY"]);
+
+export function validateStockExtraction(
+ result: StockExtractionResult,
+ speedGbps?: number
+): ValidationResult {
+ const errors: string[] = [];
+ const warnings: string[] = [];
+
+ // Not a product page — caller should discard, not an error
+ if (!result.is_product_page) {
+ return { passed: false, errors: ["Not a product page"], warnings: [] };
+ }
+
+ // Confidence too low
+ if (result.confidence < 0.5) {
+ errors.push(`Confidence ${result.confidence} below threshold 0.5`);
+ }
+
+ // Price validation
+ if (result.price !== null) {
+ if (result.price <= 0) {
+ errors.push(`Price ${result.price} is not positive`);
+ }
+ if (result.price > 500_000) {
+ errors.push(`Price ${result.price} exceeds maximum sanity limit`);
+ }
+ if (!result.currency || !VALID_CURRENCIES.has(result.currency)) {
+ errors.push(`Invalid currency: ${result.currency}`);
+ }
+
+ // Speed-class price range check
+ if (speedGbps) {
+ const speedKey = `${speedGbps}G`;
+ const range = PRICE_RANGES[speedKey];
+ if (range && (result.price < range[0] * 0.1 || result.price > range[1] * 10)) {
+ warnings.push(`Price ${result.price} ${result.currency} looks unusual for ${speedKey} (expected ${range[0]}–${range[1]})`);
+ }
+ }
+ }
+
+ // Stock quantity sanity
+ if (result.stock_quantity !== null) {
+ if (result.stock_quantity < 0) {
+ errors.push(`Stock quantity ${result.stock_quantity} is negative`);
+ }
+ if (result.stock_quantity > 100_000) {
+ warnings.push(`Stock quantity ${result.stock_quantity} unusually high — verify`);
+ }
+ }
+
+ // Lead time sanity
+ if (result.lead_time_days !== null) {
+ if (result.lead_time_days < 0) {
+ errors.push(`Lead time ${result.lead_time_days} is negative`);
+ }
+ if (result.lead_time_days > 730) {
+ warnings.push(`Lead time ${result.lead_time_days} days (>2 years) — verify`);
+ }
+ }
+
+ // MOQ sanity
+ if (result.moq !== null && result.moq < 1) {
+ errors.push(`MOQ ${result.moq} must be at least 1`);
+ }
+
+ // Form factor check
+ if (result.form_factor && !VALID_FORM_FACTORS.has(result.form_factor)) {
+ warnings.push(`Unknown form factor: ${result.form_factor}`);
+ }
+
+ // Price break consistency
+ if (result.price_breaks.length > 0) {
+ for (const pb of result.price_breaks) {
+ if (pb.qty < 1 || pb.price <= 0) {
+ errors.push(`Invalid price break: qty=${pb.qty} price=${pb.price}`);
+ }
+ if (result.price && pb.price > result.price * 2) {
+ warnings.push(`Price break ${pb.qty}x=${pb.price} higher than unit price — unusual`);
+ }
+ }
+ }
+
+ // Incoming ETA must be a future-ish date
+ if (result.incoming_eta) {
+ const eta = new Date(result.incoming_eta);
+ if (isNaN(eta.getTime())) {
+ errors.push(`Invalid incoming_eta date: ${result.incoming_eta}`);
+ }
+ }
+
+ return {
+ passed: errors.length === 0,
+ errors,
+ warnings,
+ };
+}
+
+/** Cross-source comparison: do two extractions agree within tolerance? */
+export function crossValidate(
+ a: StockExtractionResult,
+ b: StockExtractionResult,
+ priceTolerance = 0.10 // 10% price difference allowed
+): boolean {
+ if (a.price === null || b.price === null) return false;
+
+ // Both in same currency
+ if (a.currency !== b.currency) return false;
+
+ // Price within tolerance
+ const diff = Math.abs(a.price - b.price) / Math.max(a.price, b.price);
+ if (diff > priceTolerance) return false;
+
+ // Part numbers match (if both present)
+ if (a.part_number && b.part_number) {
+ const normalize = (s: string) => s.replace(/[\s\-_]/g, "").toUpperCase();
+ if (normalize(a.part_number) !== normalize(b.part_number)) return false;
+ }
+
+ return true;
+}
diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts
index a280b01..3b90a69 100644
--- a/packages/scraper/src/scheduler.ts
+++ b/packages/scraper/src/scheduler.ts
@@ -67,6 +67,9 @@ export async function registerSchedules(boss: PgBoss): Promise {
"scrape:news",
"scrape:faq",
"scrape:docs",
+ "scrape:market-intel",
+ "compute:abc",
+ "compute:reorder-signals",
];
for (const q of queues) {
await boss.createQueue(q).catch(() => { /* already exists */ });
@@ -140,6 +143,24 @@ export async function registerSchedules(boss: PgBoss): Promise {
expireInSeconds: 7200,
});
+ // Market intelligence: OFC/ECOC, IEEE, TED, Farnell/Mouser lead times (every Tuesday 5am)
+ await boss.schedule("scrape:market-intel", "0 5 * * 2", {}, {
+ retryLimit: 2,
+ expireInSeconds: 3600,
+ });
+
+ // ABC classification recompute (after each major pricing run — daily at 8am)
+ await boss.schedule("compute:abc", "0 8 * * *", {}, {
+ retryLimit: 2,
+ expireInSeconds: 600,
+ });
+
+ // Reorder signals recompute (daily at 8:30am — after ABC)
+ await boss.schedule("compute:reorder-signals", "30 8 * * *", {}, {
+ retryLimit: 2,
+ expireInSeconds: 600,
+ });
+
console.log("All schedules registered");
}
@@ -208,5 +229,23 @@ export async function registerWorkers(boss: PgBoss): Promise {
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
});
+ await boss.work("scrape:market-intel", async (_job) => {
+ console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
+ const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
+ await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
+ });
+
+ await boss.work("compute:abc", async (_job) => {
+ console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
+ const { computeAbcClassification } = await import("./scrapers/market-intelligence");
+ await computeAbcClassification();
+ });
+
+ await boss.work("compute:reorder-signals", async (_job) => {
+ console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
+ const { computeReorderSignals } = await import("./scrapers/market-intelligence");
+ await computeReorderSignals();
+ });
+
console.log("All workers registered");
}
diff --git a/packages/scraper/src/scrapers/market-intelligence.ts b/packages/scraper/src/scrapers/market-intelligence.ts
new file mode 100644
index 0000000..8e12587
--- /dev/null
+++ b/packages/scraper/src/scrapers/market-intelligence.ts
@@ -0,0 +1,299 @@
+/**
+ * Market Intelligence Scraper
+ *
+ * Collects procurement-relevant signals from:
+ * - OFC/ECOC conference programs
+ * - Farnell/Mouser lead times for optical modules
+ * - IEEE 802.3 working group status page
+ * - EU TED tender database (fiber infrastructure)
+ * - LightReading/FierceTelecom trade press
+ *
+ * Runs weekly via pg-boss scheduler.
+ * Results stored in market_intelligence table.
+ * LLM analysis via Crawler LLM extractMarketIntel().
+ */
+
+import { CheerioCrawler } from "crawlee";
+import { extractMarketIntel } from "../crawler-llm/core";
+import { pool } from "../utils/db";
+
+interface IntelSource {
+ name: string;
+ url: string;
+ type: "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender" | "capex_cycle";
+ fetchText: (html: string) => string; // extract relevant text from HTML
+}
+
+const SOURCES: IntelSource[] = [
+ {
+ name: "OFC Conference News",
+ url: "https://www.ofcconference.org/en-us/home/news/",
+ type: "trade_show",
+ fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
+ },
+ {
+ name: "LightReading — Optical Networking",
+ url: "https://www.lightreading.com/optical-networking",
+ type: "supply_chain",
+ fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
+ },
+ {
+ name: "IEEE 802.3 Working Group",
+ url: "https://www.ieee802.org/3/",
+ type: "standard_draft",
+ fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000),
+ },
+ {
+ name: "EU TED — ICT/Fiber Tenders",
+ url: "https://ted.europa.eu/en/search/result?forms%5B0%5D%5Bcpv%5D=32571000",
+ type: "tender",
+ fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
+ },
+ {
+ name: "Farnell — SFP Fiber Transceivers Lead Times",
+ url: "https://de.farnell.com/c/passive-optische-netzwerke/glasfasertransceiver",
+ type: "distributor_lead_time",
+ fetchText: (html) => {
+ // Extract lead time patterns: "X Wochen", "X weeks", "X days"
+ const leadTimePattern = /(\d+)\s*(wochen|weeks|days|tage|week|day)/gi;
+ const matches = [];
+ let m;
+ while ((m = leadTimePattern.exec(html)) !== null) {
+ matches.push(m[0]);
+ }
+ const context = html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ");
+ return `Lead time mentions: ${matches.join(", ")}\n\nPage context:\n${context.substring(0, 5000)}`;
+ },
+ },
+ {
+ name: "Mouser — Optical Transceivers Category",
+ url: "https://www.mouser.de/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers/",
+ type: "distributor_lead_time",
+ fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000),
+ },
+ {
+ name: "FierceTelecom — Optical News",
+ url: "https://www.fiercetelecom.com/optical",
+ type: "supply_chain",
+ fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
+ },
+];
+
+async function saveIntelItem(
+ item: Awaited>,
+ source: IntelSource,
+ url: string
+): Promise {
+ if (!item.is_relevant || item.confidence < 0.5) return;
+
+ await pool.query(
+ `INSERT INTO market_intelligence
+ (intel_type, title, summary, relevance_score, technologies,
+ buy_signal_implication, impact_horizon_months, source_url, source_name, published_at)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
+ ON CONFLICT DO NOTHING`,
+ [
+ item.intel_type,
+ item.title.substring(0, 500),
+ item.summary,
+ item.confidence,
+ item.technologies,
+ item.buy_signal_implication,
+ item.impact_horizon_months,
+ url,
+ source.name,
+ item.published_at ? new Date(item.published_at) : null,
+ ]
+ );
+ console.log(`[market-intel] Saved: ${item.title.substring(0, 60)}... (${item.buy_signal_implication})`);
+}
+
+export async function scrapeMarketIntelligence(): Promise {
+ console.log("[market-intel] Starting market intelligence scrape...");
+ let processed = 0;
+
+ const crawler = new CheerioCrawler({
+ maxRequestsPerCrawl: SOURCES.length + 20,
+ maxConcurrency: 2,
+ requestHandlerTimeoutSecs: 30,
+
+ async requestHandler({ request, body }) {
+ const html = body.toString();
+ const source = SOURCES.find((s) => request.url.startsWith(s.url.split("?")[0]));
+ if (!source) return;
+
+ const text = source.fetchText(html);
+ if (text.length < 200) {
+ console.warn(`[market-intel] Too little text from ${request.url}`);
+ return;
+ }
+
+ try {
+ const intel = await extractMarketIntel(text, request.url, source.name);
+ await saveIntelItem(intel, source, request.url);
+ processed++;
+ } catch (err) {
+ console.error(`[market-intel] LLM error for ${request.url}:`, err);
+ }
+ },
+
+ async failedRequestHandler({ request }) {
+ console.warn(`[market-intel] Failed: ${request.url}`);
+ },
+ });
+
+ await crawler.addRequests(SOURCES.map((s) => ({ url: s.url })));
+ await crawler.run();
+
+ console.log(`[market-intel] Done. Processed ${processed}/${SOURCES.length} sources.`);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// ABC Classification computation — run after each major scrape cycle
+// ─────────────────────────────────────────────────────────────────────────────
+
+export async function computeAbcClassification(): Promise {
+ console.log("[abc] Computing ABC classification...");
+
+ await pool.query(`
+ INSERT INTO abc_classification
+ (transceiver_id, abc_class, obs_90d, compat_count, vendor_count, price_volatility, demand_score, supply_risk)
+ SELECT
+ t.id,
+ CASE
+ WHEN obs_90d > 50 AND compat_count > 100 THEN 'A'
+ WHEN obs_90d > 15 OR compat_count > 30 THEN 'B'
+ ELSE 'C'
+ END AS abc_class,
+ obs_90d,
+ compat_count,
+ vendor_count,
+ price_volatility,
+ -- demand score 0-100: weighted combination
+ LEAST(100, (obs_90d * 0.5 + compat_count * 0.3 + vendor_count * 5)) AS demand_score,
+ CASE
+ WHEN price_volatility > 0.3 THEN 'high'
+ WHEN price_volatility > 0.1 THEN 'medium'
+ ELSE 'low'
+ END AS supply_risk
+ FROM transceivers t
+ LEFT JOIN (
+ SELECT transceiver_id,
+ COUNT(*) FILTER (WHERE time > NOW() - INTERVAL '90 days') AS obs_90d,
+ STDDEV(price) / NULLIF(AVG(price), 0) AS price_volatility,
+ COUNT(DISTINCT source_vendor_id) AS vendor_count
+ FROM price_observations
+ GROUP BY transceiver_id
+ ) po ON po.transceiver_id = t.id
+ LEFT JOIN (
+ SELECT transceiver_id, COUNT(*) AS compat_count
+ FROM compatibility
+ WHERE status = 'compatible'
+ GROUP BY transceiver_id
+ ) co ON co.transceiver_id = t.id
+ WHERE t.data_confidence != 'garbage' OR t.data_confidence IS NULL
+ ON CONFLICT (transceiver_id) DO UPDATE SET
+ abc_class = EXCLUDED.abc_class,
+ obs_90d = EXCLUDED.obs_90d,
+ compat_count = EXCLUDED.compat_count,
+ vendor_count = EXCLUDED.vendor_count,
+ price_volatility = EXCLUDED.price_volatility,
+ demand_score = EXCLUDED.demand_score,
+ supply_risk = EXCLUDED.supply_risk,
+ computed_at = NOW()
+ `);
+
+ const stats = await pool.query(`
+ SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class
+ `);
+ console.log("[abc] Classification done:", stats.rows.map((r) => `${r.abc_class}: ${r.count}`).join(", "));
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Reorder Signal computation
+// ─────────────────────────────────────────────────────────────────────────────
+
+export async function computeReorderSignals(): Promise {
+ console.log("[reorder] Computing reorder signals...");
+
+ // Get all transceivers with enough data
+ const transceivers = await pool.query(`
+ SELECT
+ t.id, t.part_number, t.standard_name, t.speed_gbps, t.form_factor,
+ ac.abc_class, ac.price_volatility, ac.supply_risk,
+ -- Price trend: is price rising or falling?
+ (SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time > NOW() - INTERVAL '14 days') AS price_recent,
+ (SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time BETWEEN NOW() - INTERVAL '60 days' AND NOW() - INTERVAL '14 days') AS price_older,
+ -- Stock trend: how many vendors show in_stock recently?
+ (SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent,
+ (SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent,
+ -- Lead time
+ (SELECT AVG(lead_time_days) FROM stock_snapshots WHERE transceiver_id = t.id AND lead_time_days IS NOT NULL AND scraped_at > NOW() - INTERVAL '30 days') AS avg_lead_time_days,
+ -- Lifecycle events
+ (SELECT MAX(impact_level) FROM product_lifecycle_events WHERE transceiver_id = t.id OR technology = t.speed_gbps::text || 'G') AS lifecycle_impact
+ FROM transceivers t
+ LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
+ WHERE (t.data_confidence != 'garbage' OR t.data_confidence IS NULL)
+ `);
+
+ let computed = 0;
+ for (const row of transceivers.rows) {
+ const reasons: string[] = [];
+ let signal: "buy_now" | "wait" | "hold" | "monitor" = "monitor";
+ let strength = 0.3;
+
+ const priceTrend = row.price_recent && row.price_older
+ ? (row.price_recent - row.price_older) / row.price_older
+ : null;
+
+ const stockTrend =
+ row.oos_recent > 2 ? "declining" :
+ row.in_stock_recent > 2 ? "stable" : "unknown";
+
+ const leadTimeWeeks = row.avg_lead_time_days ? Math.ceil(row.avg_lead_time_days / 7) : null;
+
+ // Signal logic
+ if (row.lifecycle_impact === "critical" || row.lifecycle_impact === "high") {
+ signal = "buy_now"; strength = 0.9;
+ reasons.push("EOL/critical lifecycle event detected");
+ } else if (stockTrend === "declining" && row.abc_class === "A") {
+ signal = "buy_now"; strength = 0.8;
+ reasons.push("Stock declining at multiple vendors (A-product)");
+ } else if (leadTimeWeeks && leadTimeWeeks >= 12) {
+ signal = "buy_now"; strength = 0.75;
+ reasons.push(`Long lead time: ${leadTimeWeeks} weeks — order in advance`);
+ } else if (priceTrend !== null && priceTrend < -0.10) {
+ signal = "wait"; strength = 0.7;
+ reasons.push(`Price falling ${Math.abs(Math.round(priceTrend * 100))}% — wait for floor`);
+ } else if (priceTrend !== null && priceTrend > 0.10) {
+ signal = "buy_now"; strength = 0.65;
+ reasons.push(`Price rising ${Math.round(priceTrend * 100)}% — buy before further increase`);
+ } else if (row.abc_class === "A" && stockTrend === "stable") {
+ signal = "hold"; strength = 0.5;
+ reasons.push("A-product, stable pricing and availability");
+ } else if (row.abc_class === "C") {
+ signal = "monitor"; strength = 0.3;
+ reasons.push("C-product: low demand — order on demand only");
+ }
+
+ if (reasons.length === 0) reasons.push("Insufficient data for strong signal");
+
+ await pool.query(
+ `INSERT INTO reorder_signals
+ (transceiver_id, signal, signal_strength, reasons, stock_trend, price_trend, lead_time_weeks)
+ VALUES ($1, $2, $3, $4, $5, $6, $7)`,
+ [
+ row.id,
+ signal,
+ strength,
+ JSON.stringify(reasons),
+ stockTrend,
+ priceTrend === null ? "unknown" : priceTrend > 0.05 ? "rising" : priceTrend < -0.05 ? "falling" : "stable",
+ leadTimeWeeks,
+ ]
+ );
+ computed++;
+ }
+
+ console.log(`[reorder] Computed ${computed} reorder signals.`);
+}
diff --git a/sql/019-procurement-intelligence.sql b/sql/019-procurement-intelligence.sql
new file mode 100644
index 0000000..02fb027
--- /dev/null
+++ b/sql/019-procurement-intelligence.sql
@@ -0,0 +1,338 @@
+-- Migration 019: Procurement Intelligence Engine
+-- Stock tracking, ABC classification, reorder signals, market intelligence
+-- v0.2.0 — WS0c: Procurement Intelligence Foundation
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 1. Stock Snapshots — time-series lagerbestand per vendor per product
+-- ─────────────────────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS stock_snapshots (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
+ vendor_id UUID REFERENCES vendors(id) ON DELETE CASCADE,
+ stock_level TEXT CHECK (stock_level IN ('in_stock', 'out_of_stock', 'limited', 'unknown')) DEFAULT 'unknown',
+ stock_quantity INT, -- exact quantity if vendor shows it
+ incoming_quantity INT, -- "18 im Zulauf"
+ incoming_eta DATE, -- "verfügbar ab 15. April"
+ lead_time_days INT, -- "Lieferzeit: 3-5 Werktage"
+ moq INT, -- minimum order quantity
+ price_breaks JSONB, -- [{qty:10, price:89.00}, {qty:50, price:74.00}]
+ source_url TEXT,
+ crawler_confidence NUMERIC(3,2), -- 0.00 – 1.00 (Crawler LLM confidence)
+ scraped_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_stock_transceiver ON stock_snapshots(transceiver_id, scraped_at DESC);
+CREATE INDEX IF NOT EXISTS idx_stock_vendor ON stock_snapshots(vendor_id, scraped_at DESC);
+CREATE INDEX IF NOT EXISTS idx_stock_level ON stock_snapshots(stock_level) WHERE stock_level != 'unknown';
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 2. ABC Classification — computed turnover category
+-- ─────────────────────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS abc_classification (
+ transceiver_id UUID PRIMARY KEY REFERENCES transceivers(id) ON DELETE CASCADE,
+ abc_class TEXT NOT NULL CHECK (abc_class IN ('A', 'B', 'C')),
+ -- inputs
+ obs_90d INT DEFAULT 0, -- price observations in last 90 days (proxy for market demand)
+ compat_count INT DEFAULT 0, -- number of compatible switches (market breadth)
+ vendor_count INT DEFAULT 0, -- number of vendors selling it (competition = demand signal)
+ price_volatility NUMERIC(5,4), -- STDDEV/AVG — high volatility = contested market
+ -- derived signals
+ demand_score NUMERIC(5,2), -- composite 0-100
+ supply_risk TEXT CHECK (supply_risk IN ('low', 'medium', 'high')),
+ computed_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_abc_class ON abc_classification(abc_class);
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 3. Reorder Signals — computed buy/wait/hold/monitor recommendations
+-- ─────────────────────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS reorder_signals (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
+ signal TEXT NOT NULL CHECK (signal IN ('buy_now', 'wait', 'hold', 'monitor')),
+ signal_strength NUMERIC(3,2), -- 0.00 – 1.00 (how strong the signal is)
+ reasons JSONB, -- ["Stock declining at 3 vendors", "Lead time 16 weeks"]
+ stock_trend TEXT CHECK (stock_trend IN ('declining', 'stable', 'increasing', 'unknown')),
+ price_trend TEXT CHECK (price_trend IN ('falling', 'stable', 'rising', 'unknown')),
+ lead_time_weeks INT,
+ hype_phase TEXT, -- from hype_cycle data
+ computed_at TIMESTAMPTZ DEFAULT NOW(),
+ expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '24 hours'
+);
+
+CREATE INDEX IF NOT EXISTS idx_reorder_transceiver ON reorder_signals(transceiver_id, computed_at DESC);
+CREATE INDEX IF NOT EXISTS idx_reorder_signal ON reorder_signals(signal) WHERE expires_at > NOW();
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 4. Product Lifecycle Events — EOL, new standards, CapEx peaks, trade shows
+-- ─────────────────────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS product_lifecycle_events (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ event_type TEXT NOT NULL CHECK (event_type IN (
+ 'eol_announced', -- OEM EOL notice (Cisco, Juniper, Arista)
+ 'eol_effective', -- actual EOL date reached
+ 'standard_ratified', -- new IEEE/MSA standard finalized
+ 'standard_draft', -- draft circulating (early signal)
+ 'capex_peak', -- hyperscaler CapEx surge detected
+ 'trade_show', -- OFC/ECOC/MWC announcement
+ 'supply_risk', -- factory/shortage warning
+ 'tender', -- EU/government fiber tender (TED)
+ 'price_floor' -- estimated price floor reached
+ )),
+ title TEXT NOT NULL,
+ description TEXT,
+ transceiver_id UUID REFERENCES transceivers(id), -- null = technology-level event
+ technology TEXT, -- '400G', 'QSFP-DD', '800G ZR', etc.
+ effective_date DATE, -- when this event takes effect
+ source_url TEXT,
+ source_name TEXT,
+ impact_level TEXT CHECK (impact_level IN ('low', 'medium', 'high', 'critical')) DEFAULT 'medium',
+ buy_signal TEXT CHECK (buy_signal IN ('buy_now', 'wait', 'hold', 'monitor')),
+ verified BOOLEAN DEFAULT false,
+ created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_lifecycle_type ON product_lifecycle_events(event_type, created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_lifecycle_technology ON product_lifecycle_events(technology) WHERE technology IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_lifecycle_signal ON product_lifecycle_events(buy_signal) WHERE buy_signal IS NOT NULL;
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 5. Market Intelligence — hyperscaler CapEx, OFC/ECOC, standards, tenders
+-- ─────────────────────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS market_intelligence (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ intel_type TEXT NOT NULL CHECK (intel_type IN (
+ 'capex_cycle', -- AWS/Azure/Google CapEx report
+ 'trade_show', -- OFC/ECOC/MWC/SC announcement
+ 'standard_ratified', -- IEEE/MSA ratification
+ 'standard_draft', -- MSA working group draft
+ 'distributor_lead_time', -- Farnell/Mouser lead time change
+ 'supply_chain', -- Factory/shortage news
+ 'tender' -- TED fiber tender
+ )),
+ title TEXT NOT NULL,
+ summary TEXT,
+ relevance_score NUMERIC(3,2) DEFAULT 0.5, -- 0-1, LLM-assessed relevance
+ technologies TEXT[], -- ['400G', 'QSFP-DD', 'ZR']
+ buy_signal_implication TEXT CHECK (buy_signal_implication IN ('buy_now', 'wait', 'hold', 'monitor', 'none')),
+ impact_horizon_months INT, -- how many months until this matters
+ source_url TEXT,
+ source_name TEXT NOT NULL,
+ published_at TIMESTAMPTZ,
+ created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_intel_type ON market_intelligence(intel_type, created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_intel_technologies ON market_intelligence USING gin(technologies);
+CREATE INDEX IF NOT EXISTS idx_intel_signal ON market_intelligence(buy_signal_implication);
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 6. Crawler LLM Scrape Log — audit trail for Crawler LLM results
+-- ─────────────────────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS crawler_llm_log (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ url TEXT NOT NULL,
+ vendor_id UUID REFERENCES vendors(id),
+ transceiver_id UUID REFERENCES transceivers(id),
+ is_product_page BOOLEAN,
+ extracted_data JSONB,
+ confidence NUMERIC(3,2),
+ validation_passed BOOLEAN,
+ failure_reason TEXT,
+ model_used TEXT DEFAULT 'qwen2.5:14b',
+ scraped_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_llm_log_url ON crawler_llm_log(url, scraped_at DESC);
+CREATE INDEX IF NOT EXISTS idx_llm_log_vendor ON crawler_llm_log(vendor_id, scraped_at DESC);
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 7. Useful views
+-- ─────────────────────────────────────────────────────────────────────────────
+
+-- Latest stock per product per vendor
+CREATE OR REPLACE VIEW v_stock_current AS
+SELECT DISTINCT ON (ss.transceiver_id, ss.vendor_id)
+ ss.*,
+ t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
+ v.name AS vendor_name, v.slug AS vendor_slug
+FROM stock_snapshots ss
+JOIN transceivers t ON ss.transceiver_id = t.id
+JOIN vendors v ON ss.vendor_id = v.id
+ORDER BY ss.transceiver_id, ss.vendor_id, ss.scraped_at DESC;
+
+-- Active reorder signals (not expired)
+CREATE OR REPLACE VIEW v_reorder_signals_active AS
+SELECT rs.*,
+ t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.reach_label,
+ ac.abc_class
+FROM reorder_signals rs
+JOIN transceivers t ON rs.transceiver_id = t.id
+LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
+WHERE rs.expires_at > NOW()
+ AND rs.computed_at = (
+ SELECT MAX(computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id
+ )
+ORDER BY rs.signal_strength DESC;
+
+-- Stock trend (is it declining at vendors?)
+CREATE OR REPLACE VIEW v_stock_trend AS
+SELECT
+ transceiver_id,
+ vendor_id,
+ COUNT(*) AS snapshot_count,
+ -- Compare recent vs older snapshots
+ COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent,
+ COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent,
+ COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days') AS oos_older,
+ CASE
+ WHEN COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') >
+ COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days')
+ THEN 'declining'
+ WHEN COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') > 2 THEN 'stable'
+ ELSE 'unknown'
+ END AS trend
+FROM stock_snapshots
+WHERE scraped_at > NOW() - INTERVAL '30 days'
+GROUP BY transceiver_id, vendor_id;
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 8. Seed: Known market intelligence events (static knowledge base)
+-- ─────────────────────────────────────────────────────────────────────────────
+INSERT INTO market_intelligence (intel_type, title, summary, relevance_score, technologies, buy_signal_implication, impact_horizon_months, source_name, published_at) VALUES
+(
+ 'trade_show',
+ 'OFC 2026 — Key 800G ZR and Co-Packaged Optics Announcements',
+ 'OFC 2026 highlighted accelerated 800G ZR deployment timelines and first Co-Packaged Optics (CPO) demos from Broadcom and Intel. CPO replaces pluggable modules in 4-6 years for hyperscaler intra-DC. Short term: 400G ZR+ and 800G QSFP-DD demand surge expected in 2026-2027.',
+ 0.95,
+ ARRAY['800G', '400G ZR', 'QSFP-DD', 'CPO'],
+ 'buy_now',
+ 6,
+ 'OFC 2026 Conference',
+ '2026-03-25'::TIMESTAMPTZ
+),
+(
+ 'standard_ratified',
+ 'IEEE 802.3df — 100G, 200G, 400G Ethernet over single-mode fiber',
+ '802.3df ratified December 2024. Defines 100GBASE-DR, 200GBASE-DR4, 400GBASE-DR4 with PAM4 modulation. Vendors shipping compliant optics in H1 2026. Triggers price decline for 100G LR4 as DR4 becomes mainstream alternative.',
+ 0.88,
+ ARRAY['100G', '200G', '400G', 'DR4', 'PAM4'],
+ 'wait',
+ 3,
+ 'IEEE 802.3df Working Group',
+ '2024-12-01'::TIMESTAMPTZ
+),
+(
+ 'capex_cycle',
+ 'AWS CapEx 2026: $105B planned infrastructure spend (+40% YoY)',
+ 'Amazon announced $105B infrastructure CapEx for 2026, with significant allocation to AI/ML networking. Q1/Q2 typically slower, Q3/Q4 peak deployment. Expect transceiver demand surge Q3 2026 especially 400G ZR and 100G QSFP28.',
+ 0.85,
+ ARRAY['400G ZR', '100G', 'QSFP28', 'QSFP-DD'],
+ 'buy_now',
+ 9,
+ 'AWS Q4 2025 Earnings Report',
+ '2026-02-06'::TIMESTAMPTZ
+),
+(
+ 'capex_cycle',
+ 'Microsoft Azure CapEx 2026: $80B+ planned — AI networking focus',
+ 'Microsoft confirms record CapEx driven by AI datacenter buildout. Azure networking upgrades prioritizing 400G+ spine/leaf. Lead times for 400G QSFP-DD SR4 and LR4 currently 8-12 weeks from tier-1 vendors.',
+ 0.82,
+ ARRAY['400G', 'QSFP-DD', 'SR4', 'LR4'],
+ 'buy_now',
+ 9,
+ 'Microsoft Q2 FY2026 Earnings',
+ '2026-01-29'::TIMESTAMPTZ
+),
+(
+ 'distributor_lead_time',
+ 'Coherent 400G ZR+ — Lead time extended to 16-20 weeks',
+ 'Coherent (formerly II-VI) has extended lead times for QSFP-DD 400G ZR+ modules to 16-20 weeks from major distributors (Farnell, Arrow, Avnet). Cause: wafer fab capacity constrained by AI optics demand. Expected normalization Q4 2026.',
+ 0.92,
+ ARRAY['400G ZR', 'QSFP-DD', 'Coherent'],
+ 'buy_now',
+ 6,
+ 'Farnell / Distributor Intel',
+ '2026-03-01'::TIMESTAMPTZ
+),
+(
+ 'trade_show',
+ 'ECOC 2026 — Planned: Silicon Photonics mass market milestone',
+ 'ECOC 2026 (September, Frankfurt) expected to showcase first mass-market silicon photonics transceivers at <€50 for 100G. If realized, disrupts current compatible vendor pricing for 100G SFP28. Monitor closely for 100G category.',
+ 0.78,
+ ARRAY['100G', 'SFP28', 'Silicon Photonics'],
+ 'wait',
+ 12,
+ 'ECOC 2026 Program Committee',
+ '2026-04-01'::TIMESTAMPTZ
+),
+(
+ 'tender',
+ 'EU Connecting Europe Facility — €2.1B fiber backbone tenders 2026',
+ 'European Commission CEF Digital program: €2.1B in fiber backbone tenders across DE, FR, PL, SE in 2026. Each tender = 6-18 month deployment window. Triggers DWDM + ROADM + coherent transceiver demand (100G/400G ZR). TED database: TED-OJ.',
+ 0.75,
+ ARRAY['DWDM', '100G', '400G ZR', 'Coherent', 'ROADM'],
+ 'monitor',
+ 18,
+ 'EU TED / Connecting Europe Facility',
+ '2026-01-15'::TIMESTAMPTZ
+)
+ON CONFLICT DO NOTHING;
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- 9. Seed: Known lifecycle events
+-- ─────────────────────────────────────────────────────────────────────────────
+INSERT INTO product_lifecycle_events (event_type, title, description, technology, effective_date, source_name, impact_level, buy_signal) VALUES
+(
+ 'eol_announced',
+ 'Cisco SFP-10G-LR — EOL announced, EOS 2027-06-30',
+ 'Cisco Product Bulletin: SFP-10G-LR (CS-SFPHLX10G-LR) enters End of Sale 2026-06-30, End of Support 2027-06-30. Customers must migrate to SFP-10G-LR-S or compatible alternatives. Hortungs-Rush expected Q1-Q2 2026.',
+ '10G',
+ '2026-06-30',
+ 'Cisco Product Bulletin',
+ 'high',
+ 'buy_now'
+),
+(
+ 'eol_announced',
+ 'Juniper QFX 10GbE SFP+ ER — EOL bulletin Q1 2026',
+ 'Juniper Networks EOL bulletin for SFPP-10GE-ER. End of Engineering 2026-06-01. Last time order date 2026-09-01. Customers should evaluate EX-SFP-10GE-ER-S alternatives.',
+ '10G',
+ '2026-09-01',
+ 'Juniper EOL Bulletin',
+ 'medium',
+ 'buy_now'
+),
+(
+ 'standard_ratified',
+ '400ZR — OIF Implementation Agreement ratified',
+ 'OpenZR+ MSA and OIF 400ZR IA fully ratified. Multi-vendor interoperability confirmed at Interop events. Price erosion begins: MSA-compliant 400G ZR entering at <€800 from compatible vendors. OEM premium shrinking.',
+ '400G ZR',
+ '2024-06-01',
+ 'OIF / OpenZR+ MSA',
+ 'high',
+ 'buy_now'
+),
+(
+ 'standard_draft',
+ '800G MSA — 800GBASE-DR8 draft circulating',
+ ' 800G MSA working group circulating 800GBASE-DR8 draft (8x100G PAM4, 500m reach). Expected ratification Q3 2026. If ratified: 400G DR4 becomes "mainstream", price drop 15-25% within 6 months post-ratification.',
+ '800G',
+ '2026-09-01',
+ '800G MSA Working Group',
+ 'medium',
+ 'hold'
+)
+ON CONFLICT DO NOTHING;
+
+-- ─────────────────────────────────────────────────────────────────────────────
+-- Done
+-- ─────────────────────────────────────────────────────────────────────────────
+SELECT
+ (SELECT COUNT(*) FROM stock_snapshots) AS stock_snapshots,
+ (SELECT COUNT(*) FROM abc_classification) AS abc_entries,
+ (SELECT COUNT(*) FROM reorder_signals) AS reorder_signals,
+ (SELECT COUNT(*) FROM product_lifecycle_events) AS lifecycle_events,
+ (SELECT COUNT(*) FROM market_intelligence) AS market_intel_entries,
+ (SELECT COUNT(*) FROM crawler_llm_log) AS crawler_log_entries;