feat: Procurement Intelligence Engine (WS0c)

- Migration 019: stock_snapshots, abc_classification, reorder_signals,
  product_lifecycle_events, market_intelligence, crawler_llm_log tables
- Seeded 7 market intel events (OFC 2026, AWS/Azure CapEx, Coherent lead times,
  EU TED tenders, ECOC 2026, IEEE 802.3df)
- Seeded 4 lifecycle events (Cisco SFP-10G-LR EOL, Juniper EOL,
  400ZR ratified, 800G MSA draft)
- Crawler LLM: core.ts (Ollama-based extractor), stock-schema.ts (typed schemas
  + vendor profiles for Flexoptix/FS.com/10Gtek/ATGBICS/ProLabs/Farnell/Mouser),
  validator.ts (rule-based sanity checks + cross-validation)
- market-intelligence.ts scraper: OFC/ECOC, LightReading, IEEE 802.3, EU TED,
  Farnell/Mouser lead times, FierceTelecom — weekly via pg-boss
- computeAbcClassification(): dynamic A/B/C classification from price obs +
  compat count + vendor breadth
- computeReorderSignals(): buy_now/wait/hold/monitor with reasons + signal strength
- API: GET /api/procurement/overview|signals|signals/:id|abc|market-intel|
  stock-trends/:id|lifecycle
- Dashboard: Procurement Intel tab with Reorder Signals, ABC table,
  Market Intel cards, Lifecycle Events
This commit is contained in:
Rene Fichtmueller 2026-04-01 22:04:33 +02:00
parent 480decd307
commit 681da54523
9 changed files with 1933 additions and 0 deletions

View File

@ -20,6 +20,7 @@ import { transportRouter } from "./routes/transport";
import { datasheetRouter } from "./routes/datasheets"; import { datasheetRouter } from "./routes/datasheets";
import { hotTopicsRouter } from "./routes/hot-topics"; import { hotTopicsRouter } from "./routes/hot-topics";
import { adoptionRouter } from "./routes/adoption"; import { adoptionRouter } from "./routes/adoption";
import { procurementRouter } from "./routes/procurement";
const app = express(); const app = express();
@ -56,6 +57,7 @@ app.use("/api/transport", transportRouter);
app.use("/api/datasheets", datasheetRouter); app.use("/api/datasheets", datasheetRouter);
app.use("/api/adoption", adoptionRouter); app.use("/api/adoption", adoptionRouter);
app.use("/api/hot-topics", hotTopicsRouter); app.use("/api/hot-topics", hotTopicsRouter);
app.use("/api/procurement", procurementRouter);
// Dashboard (static HTML) // Dashboard (static HTML)
app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard"))); app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard")));

View File

@ -0,0 +1,293 @@
/**
* WS0c: Procurement Intelligence API
*
* Endpoints:
* GET /api/procurement/overview Dashboard summary
* GET /api/procurement/signals Active reorder signals
* GET /api/procurement/signals/:id Signal for a specific transceiver
* GET /api/procurement/abc ABC classification list
* GET /api/procurement/market-intel Market intelligence events
* GET /api/procurement/stock-trends/:id Stock history for a transceiver
* GET /api/procurement/lifecycle Lifecycle events (EOL, standards)
*/
import { Router, Request, Response } from "express";
import { pool } from "../db/client";
export const procurementRouter = Router();
// ─────────────────────────────────────────────────────────────────────────────
// GET /api/procurement/overview
// ─────────────────────────────────────────────────────────────────────────────
procurementRouter.get("/overview", async (_req: Request, res: Response) => {
try {
const [signals, abc, intel, lifecycle] = await Promise.all([
pool.query(`
SELECT signal, COUNT(*) AS count
FROM reorder_signals
WHERE expires_at > NOW()
AND computed_at = (SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = reorder_signals.transceiver_id)
GROUP BY signal
`),
pool.query(`
SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class
`),
pool.query(`
SELECT intel_type, buy_signal_implication, COUNT(*) AS count
FROM market_intelligence
WHERE created_at > NOW() - INTERVAL '90 days'
GROUP BY intel_type, buy_signal_implication
ORDER BY count DESC
LIMIT 10
`),
pool.query(`
SELECT event_type, impact_level, COUNT(*) AS count
FROM product_lifecycle_events
WHERE created_at > NOW() - INTERVAL '180 days'
GROUP BY event_type, impact_level
ORDER BY count DESC
`),
]);
res.json({
signals_summary: signals.rows,
abc_summary: abc.rows,
market_intel_summary: intel.rows,
lifecycle_summary: lifecycle.rows,
});
} catch (err) {
console.error("Procurement overview error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
// ─────────────────────────────────────────────────────────────────────────────
// GET /api/procurement/signals?signal=buy_now&abc_class=A&limit=50&offset=0
// ─────────────────────────────────────────────────────────────────────────────
procurementRouter.get("/signals", async (req: Request, res: Response) => {
try {
const {
signal, abc_class, form_factor, speed_gbps,
limit = "50", offset = "0"
} = req.query;
let sql = `
SELECT rs.*,
t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
t.reach_label, t.image_url, t.image_r2_key,
ac.abc_class, ac.demand_score, ac.supply_risk,
v.name AS vendor_name
FROM reorder_signals rs
JOIN transceivers t ON rs.transceiver_id = t.id
LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
LEFT JOIN vendors v ON t.vendor_id = v.id
WHERE rs.expires_at > NOW()
AND rs.computed_at = (
SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id
)
`;
const params: any[] = [];
let idx = 1;
if (signal) { sql += ` AND rs.signal = $${idx}`; params.push(signal); idx++; }
if (abc_class) { sql += ` AND ac.abc_class = $${idx}`; params.push(abc_class); idx++; }
if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; }
if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; }
sql += ` ORDER BY rs.signal_strength DESC LIMIT $${idx} OFFSET $${idx + 1}`;
params.push(parseInt(limit as string), parseInt(offset as string));
const result = await pool.query(sql, params);
res.json({ data: result.rows, total: result.rowCount });
} catch (err) {
console.error("Signals error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
// ─────────────────────────────────────────────────────────────────────────────
// GET /api/procurement/signals/:transceiver_id
// ─────────────────────────────────────────────────────────────────────────────
procurementRouter.get("/signals/:id", async (req: Request, res: Response) => {
try {
const { id } = req.params;
const [signal, stockHistory, priceHistory, lifecycle] = await Promise.all([
pool.query(`
SELECT rs.*, ac.abc_class, ac.demand_score, ac.supply_risk
FROM reorder_signals rs
LEFT JOIN abc_classification ac ON ac.transceiver_id = rs.transceiver_id
WHERE rs.transceiver_id::text = $1
ORDER BY rs.computed_at DESC LIMIT 1
`, [id]),
pool.query(`
SELECT ss.stock_level, ss.stock_quantity, ss.incoming_quantity,
ss.incoming_eta, ss.lead_time_days, ss.moq, ss.price_breaks,
ss.scraped_at, ss.crawler_confidence,
v.name AS vendor_name
FROM stock_snapshots ss
JOIN vendors v ON ss.vendor_id = v.id
WHERE ss.transceiver_id::text = $1
ORDER BY ss.scraped_at DESC LIMIT 50
`, [id]),
pool.query(`
SELECT po.price, po.currency, po.time,
v.name AS vendor_name
FROM price_observations po
JOIN vendors v ON po.source_vendor_id = v.id
WHERE po.transceiver_id::text = $1
ORDER BY po.time DESC LIMIT 30
`, [id]),
pool.query(`
SELECT * FROM product_lifecycle_events
WHERE transceiver_id::text = $1
ORDER BY effective_date ASC NULLS LAST, created_at DESC
`, [id]),
]);
res.json({
signal: signal.rows[0] || null,
stock_history: stockHistory.rows,
price_history: priceHistory.rows,
lifecycle_events: lifecycle.rows,
});
} catch (err) {
console.error("Signal detail error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
// ─────────────────────────────────────────────────────────────────────────────
// GET /api/procurement/abc?class=A&form_factor=QSFP28
// ─────────────────────────────────────────────────────────────────────────────
procurementRouter.get("/abc", async (req: Request, res: Response) => {
try {
const { class: cls, form_factor, speed_gbps, limit = "100", offset = "0" } = req.query;
let sql = `
SELECT ac.*,
t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
t.reach_label, t.image_url,
v.name AS vendor_name,
rs.signal, rs.signal_strength
FROM abc_classification ac
JOIN transceivers t ON ac.transceiver_id = t.id
LEFT JOIN vendors v ON t.vendor_id = v.id
LEFT JOIN LATERAL (
SELECT signal, signal_strength FROM reorder_signals
WHERE transceiver_id = ac.transceiver_id AND expires_at > NOW()
ORDER BY computed_at DESC LIMIT 1
) rs ON true
WHERE 1=1
`;
const params: any[] = [];
let idx = 1;
if (cls) { sql += ` AND ac.abc_class = $${idx}`; params.push(cls); idx++; }
if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; }
if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; }
sql += ` ORDER BY ac.abc_class, ac.demand_score DESC LIMIT $${idx} OFFSET $${idx + 1}`;
params.push(parseInt(limit as string), parseInt(offset as string));
const result = await pool.query(sql, params);
res.json({ data: result.rows, total: result.rowCount });
} catch (err) {
console.error("ABC error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
// ─────────────────────────────────────────────────────────────────────────────
// GET /api/procurement/market-intel?type=&days=90&signal=buy_now
// ─────────────────────────────────────────────────────────────────────────────
procurementRouter.get("/market-intel", async (req: Request, res: Response) => {
try {
const {
type, days = "90", signal, technology,
limit = "50", offset = "0"
} = req.query;
let sql = `
SELECT * FROM market_intelligence
WHERE created_at > NOW() - INTERVAL '1 day' * $1
`;
const params: any[] = [parseInt(days as string)];
let idx = 2;
if (type) { sql += ` AND intel_type = $${idx}`; params.push(type); idx++; }
if (signal) { sql += ` AND buy_signal_implication = $${idx}`; params.push(signal); idx++; }
if (technology) { sql += ` AND $${idx} = ANY(technologies)`; params.push(technology); idx++; }
sql += ` ORDER BY relevance_score DESC, created_at DESC LIMIT $${idx} OFFSET $${idx + 1}`;
params.push(parseInt(limit as string), parseInt(offset as string));
const result = await pool.query(sql, params);
res.json({ data: result.rows, total: result.rowCount });
} catch (err) {
console.error("Market intel error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
// ─────────────────────────────────────────────────────────────────────────────
// GET /api/procurement/stock-trends/:transceiver_id
// ─────────────────────────────────────────────────────────────────────────────
procurementRouter.get("/stock-trends/:id", async (req: Request, res: Response) => {
try {
const result = await pool.query(`
SELECT DISTINCT ON (ss.vendor_id, date_trunc('day', ss.scraped_at))
ss.stock_level, ss.stock_quantity, ss.incoming_quantity,
ss.incoming_eta, ss.lead_time_days, ss.scraped_at,
v.name AS vendor_name
FROM stock_snapshots ss
JOIN vendors v ON ss.vendor_id = v.id
WHERE ss.transceiver_id::text = $1
ORDER BY ss.vendor_id, date_trunc('day', ss.scraped_at) DESC, ss.scraped_at DESC
LIMIT 200
`, [req.params.id]);
res.json({ data: result.rows });
} catch (err) {
console.error("Stock trends error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
// ─────────────────────────────────────────────────────────────────────────────
// GET /api/procurement/lifecycle?type=eol_announced&impact=high&days=180
// ─────────────────────────────────────────────────────────────────────────────
procurementRouter.get("/lifecycle", async (req: Request, res: Response) => {
try {
const {
type, impact, technology, signal,
days = "180", limit = "50"
} = req.query;
let sql = `
SELECT ple.*,
t.part_number, t.standard_name, t.form_factor, t.speed_gbps
FROM product_lifecycle_events ple
LEFT JOIN transceivers t ON ple.transceiver_id = t.id
WHERE ple.created_at > NOW() - INTERVAL '1 day' * $1
`;
const params: any[] = [parseInt(days as string)];
let idx = 2;
if (type) { sql += ` AND ple.event_type = $${idx}`; params.push(type); idx++; }
if (impact) { sql += ` AND ple.impact_level = $${idx}`; params.push(impact); idx++; }
if (technology) { sql += ` AND ple.technology ILIKE $${idx}`; params.push(`%${technology}%`); idx++; }
if (signal) { sql += ` AND ple.buy_signal = $${idx}`; params.push(signal); idx++; }
sql += ` ORDER BY ple.impact_level DESC, ple.effective_date ASC NULLS LAST, ple.created_at DESC LIMIT $${idx}`;
params.push(parseInt(limit as string));
const result = await pool.query(sql, params);
res.json({ data: result.rows });
} catch (err) {
console.error("Lifecycle error:", err);
res.status(500).json({ error: "Internal server error" });
}
});

View File

@ -639,6 +639,52 @@
.compare-diff { background: var(--yellow-light); } .compare-diff { background: var(--yellow-light); }
.compare-best { background: var(--green-light); font-weight: 600; } .compare-best { background: var(--green-light); font-weight: 600; }
.compare-cb { width: 16px; height: 16px; cursor: pointer; accent-color: var(--purple); } .compare-cb { width: 16px; height: 16px; cursor: pointer; accent-color: var(--purple); }
/* === PROCUREMENT TAB === */
.proc-btn {
background: var(--surface2); border: 1px solid var(--border);
padding: 5px 14px; border-radius: 6px; cursor: pointer;
font-size: 0.78rem; font-weight: 600; color: var(--text-dim);
transition: all 0.15s;
}
.proc-btn:hover { color: var(--text); border-color: var(--accent); }
.proc-btn-active { background: var(--accent); color: #fff !important; border-color: var(--accent) !important; }
.signal-card {
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius-lg); padding: 1rem;
box-shadow: var(--shadow-card); position: relative;
}
.signal-card:hover { box-shadow: var(--shadow-hover); }
.signal-buy { border-left: 3px solid #c1121f; }
.signal-wait { border-left: 3px solid var(--yellow); }
.signal-hold { border-left: 3px solid var(--green); }
.signal-monitor { border-left: 3px solid var(--purple); }
.sig-badge-buy { background:#fde8e8; color:#c1121f; }
.sig-badge-wait { background:var(--yellow-light); color:#a06000; }
.sig-badge-hold { background:var(--green-light); color:#1b4332; }
.sig-badge-monitor { background:var(--purple-light); color:#5a3fcf; }
.intel-card {
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius-lg); padding: 1rem;
box-shadow: var(--shadow-card);
}
.intel-badge {
display: inline-block; padding: 2px 8px; border-radius: 4px;
font-size: 0.65rem; font-weight: 700; text-transform: uppercase;
letter-spacing: 0.06em; margin-bottom: 0.5rem;
}
.intel-buy { background:#fde8e8; color:#c1121f; }
.intel-wait { background:var(--yellow-light); color:#a06000; }
.intel-hold { background:var(--green-light); color:#1b4332; }
.intel-monitor { background:var(--purple-light); color:#5a3fcf; }
.intel-none { background:var(--surface2); color:var(--text-dim); }
.abc-a { background:#fde8e8; color:#c1121f; font-weight:800; padding:2px 7px; border-radius:4px; }
.abc-b { background:var(--yellow-light); color:#a06000; font-weight:800; padding:2px 7px; border-radius:4px; }
.abc-c { background:var(--surface2); color:var(--text-dim); font-weight:800; padding:2px 7px; border-radius:4px; }
</style> </style>
</head> </head>
<body> <body>
@ -680,6 +726,7 @@
<div class="tab" data-tab="news">News</div> <div class="tab" data-tab="news">News</div>
<div class="tab" data-tab="finder">Finder</div> <div class="tab" data-tab="finder">Finder</div>
<div class="tab" data-tab="blog">Blog Engine</div> <div class="tab" data-tab="blog">Blog Engine</div>
<div class="tab" data-tab="procurement">Procurement Intel</div>
</div> </div>
<div class="main"> <div class="main">
@ -905,6 +952,75 @@
<div id="blog-pipeline-status"></div> <div id="blog-pipeline-status"></div>
<div style="margin-bottom:0.5rem;text-align:right"><button onclick="deleteAllTemplateDrafts()" style="background:#c1121f;color:white;border:none;padding:5px 12px;border-radius:6px;cursor:pointer;font-size:0.7rem">Delete All Templates</button></div><div class="card"><div id="blog-list"></div></div> <div style="margin-bottom:0.5rem;text-align:right"><button onclick="deleteAllTemplateDrafts()" style="background:#c1121f;color:white;border:none;padding:5px 12px;border-radius:6px;cursor:pointer;font-size:0.7rem">Delete All Templates</button></div><div class="card"><div id="blog-list"></div></div>
</div> </div>
<!-- PROCUREMENT INTEL TAB -->
<div id="tab-procurement" class="hidden">
<!-- Sub-nav -->
<div style="display:flex;gap:0.5rem;margin-bottom:1.25rem;flex-wrap:wrap;align-items:center">
<button onclick="showProcSection('signals')" id="proc-btn-signals" class="proc-btn proc-btn-active">Reorder Signals</button>
<button onclick="showProcSection('abc')" id="proc-btn-abc" class="proc-btn">ABC Classes</button>
<button onclick="showProcSection('market')" id="proc-btn-market" class="proc-btn">Market Intel</button>
<button onclick="showProcSection('lifecycle')" id="proc-btn-lifecycle" class="proc-btn">Lifecycle Events</button>
<div style="flex:1"></div>
<button onclick="loadProcurement()" style="background:var(--surface2);border:1px solid var(--border);padding:4px 12px;border-radius:6px;cursor:pointer;font-size:0.75rem;color:var(--text)">↻ Refresh</button>
</div>
<!-- Reorder Signals section -->
<div id="proc-section-signals">
<div style="display:flex;gap:0.5rem;margin-bottom:1rem;flex-wrap:wrap">
<button onclick="filterSignal('')" id="sig-all" style="background:var(--accent);color:white;border:none;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">All</button>
<button onclick="filterSignal('buy_now')" style="background:rgba(193,18,31,0.1);border:1px solid rgba(193,18,31,0.3);color:#c1121f;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🔴 Buy Now</button>
<button onclick="filterSignal('wait')" style="background:rgba(255,160,0,0.1);border:1px solid rgba(255,160,0,0.3);color:#c07000;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🟡 Wait</button>
<button onclick="filterSignal('hold')" style="background:rgba(45,106,79,0.1);border:1px solid rgba(45,106,79,0.3);color:#2d6a4f;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🟢 Hold</button>
<button onclick="filterSignal('monitor')" style="background:rgba(124,92,252,0.1);border:1px solid rgba(124,92,252,0.3);color:#7c5cfc;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🔵 Monitor</button>
</div>
<div id="proc-signals-grid" style="display:grid;gap:0.75rem;grid-template-columns:repeat(auto-fill,minmax(320px,1fr))">
<div class="loading pulse">Loading reorder signals...</div>
</div>
</div>
<!-- ABC Classification section -->
<div id="proc-section-abc" style="display:none">
<div style="display:flex;gap:0.5rem;margin-bottom:1rem">
<button onclick="filterAbc('')" style="background:var(--accent);color:white;border:none;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">All</button>
<button onclick="filterAbc('A')" style="background:rgba(193,18,31,0.1);border:1px solid rgba(193,18,31,0.3);color:#c1121f;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">A — High Turnover</button>
<button onclick="filterAbc('B')" style="background:rgba(255,160,0,0.1);border:1px solid rgba(255,160,0,0.3);color:#c07000;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">B — Medium</button>
<button onclick="filterAbc('C')" style="background:rgba(136,136,136,0.12);border:1px solid #ddd;color:var(--text-dim);padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">C — Low</button>
</div>
<div class="card" style="overflow-x:auto">
<table style="width:100%;border-collapse:collapse;font-size:0.8rem" id="abc-table">
<thead><tr style="border-bottom:2px solid var(--border);color:var(--text-dim);font-size:0.7rem;font-weight:700;text-transform:uppercase">
<th style="text-align:left;padding:8px 6px">Class</th>
<th style="text-align:left;padding:8px 6px">Product</th>
<th style="text-align:left;padding:8px 6px">Form Factor</th>
<th style="text-align:right;padding:8px 6px">Demand Score</th>
<th style="text-align:right;padding:8px 6px">Compat.</th>
<th style="text-align:right;padding:8px 6px">Vendors</th>
<th style="text-align:left;padding:8px 6px">Supply Risk</th>
<th style="text-align:left;padding:8px 6px">Signal</th>
</tr></thead>
<tbody id="abc-tbody"><tr><td colspan="8" style="padding:1rem;color:var(--text-dim)">Loading...</td></tr></tbody>
</table>
</div>
</div>
<!-- Market Intelligence section -->
<div id="proc-section-market" style="display:none">
<div id="proc-market-grid" style="display:grid;gap:0.75rem;grid-template-columns:repeat(auto-fill,minmax(400px,1fr))">
<div class="loading pulse">Loading market intelligence...</div>
</div>
</div>
<!-- Lifecycle Events section -->
<div id="proc-section-lifecycle" style="display:none">
<div id="proc-lifecycle-grid" style="display:grid;gap:0.75rem;grid-template-columns:repeat(auto-fill,minmax(400px,1fr))">
<div class="loading pulse">Loading lifecycle events...</div>
</div>
</div>
</div><!-- end tab-procurement -->
</div> </div>
</div><!-- .app --> </div><!-- .app -->
@ -1221,6 +1337,7 @@ function goToTab(tabName) {
if (tabName === 'news') loadNews(); if (tabName === 'news') loadNews();
if (tabName === 'blog') loadBlogDrafts(); if (tabName === 'blog') loadBlogDrafts();
if (tabName === 'finder') document.getElementById('finder-switch-input').focus(); if (tabName === 'finder') document.getElementById('finder-switch-input').focus();
if (tabName === 'procurement') loadProcurement();
} }
document.querySelectorAll('.tab').forEach(function(tab) { document.querySelectorAll('.tab').forEach(function(tab) {
@ -2897,6 +3014,210 @@ el('compare-overlay').addEventListener('click', function(e) {
if (e.target === this) this.classList.remove('visible'); if (e.target === this) this.classList.remove('visible');
}); });
// ─── PROCUREMENT INTEL ───────────────────────────────────────────────────────
var procCurrentSignalFilter = '';
var procCurrentAbcFilter = '';
var procSignalsData = [];
var procAbcData = [];
function showProcSection(name) {
['signals','abc','market','lifecycle'].forEach(function(s) {
var sec = el('proc-section-' + s);
var btn = el('proc-btn-' + s);
if (sec) sec.style.display = s === name ? '' : 'none';
if (btn) { btn.classList.toggle('proc-btn-active', s === name); }
});
}
async function loadProcurement() {
await Promise.all([
loadProcSignals(),
loadProcAbc(),
loadProcMarketIntel(),
loadProcLifecycle(),
]);
}
async function loadProcSignals() {
var container = el('proc-signals-grid');
container.innerHTML = '<div class="loading pulse">Loading signals...</div>';
try {
var d = await api('/api/procurement/signals?limit=100');
procSignalsData = d.data || [];
renderSignals(procCurrentSignalFilter);
} catch(e) {
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem">No reorder signals yet — run the scraper to populate.</div>';
}
}
function filterSignal(sig) {
procCurrentSignalFilter = sig;
renderSignals(sig);
}
function renderSignals(filterSig) {
var data = filterSig ? procSignalsData.filter(function(r) { return r.signal === filterSig; }) : procSignalsData;
var container = el('proc-signals-grid');
if (!data.length) {
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">No signals for this filter.</div>';
return;
}
var signalIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' };
var signalLabel = { buy_now:'Buy Now', wait:'Wait', hold:'Hold', monitor:'Monitor' };
container.innerHTML = data.map(function(r) {
var reasons = [];
try { reasons = JSON.parse(r.reasons || '[]'); } catch(e) {}
var sigClass = 'signal-' + (r.signal || 'monitor').replace('_','-');
var badgeClass = 'sig-badge-' + (r.signal || 'monitor').replace('_now','').replace('_','');
var abcBadge = r.abc_class ? '<span class="abc-' + r.abc_class.toLowerCase() + '">' + r.abc_class + '</span>' : '';
var strengthPct = Math.round((r.signal_strength || 0) * 100);
var productName = r.standard_name || r.part_number || r.slug || '—';
var imgHtml = '';
if (r.image_r2_key) {
imgHtml = '<img src="https://pub-placeholder.r2.dev/' + esc(r.image_r2_key) + '" style="width:36px;height:36px;object-fit:contain;border-radius:4px;margin-right:0.5rem;flex-shrink:0" onerror="this.style.display=\'none\'">';
}
return '<div class="signal-card ' + sigClass + '">'
+ '<div style="display:flex;align-items:flex-start;gap:0.25rem;margin-bottom:0.5rem">'
+ imgHtml
+ '<div style="flex:1;min-width:0">'
+ '<div style="font-weight:700;font-size:0.82rem;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">' + esc(productName) + '</div>'
+ '<div style="font-size:0.7rem;color:var(--text-dim)">' + esc(r.form_factor || '') + (r.speed_gbps ? ' · ' + r.speed_gbps + 'G' : '') + (r.vendor_name ? ' · ' + esc(r.vendor_name) : '') + '</div>'
+ '</div>'
+ '</div>'
+ '<div style="display:flex;gap:0.4rem;align-items:center;margin-bottom:0.6rem;flex-wrap:wrap">'
+ '<span class="intel-badge ' + badgeClass + '">' + (signalIcon[r.signal] || '') + ' ' + (signalLabel[r.signal] || r.signal) + '</span>'
+ abcBadge
+ (r.supply_risk ? '<span style="font-size:0.65rem;padding:2px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(r.supply_risk) + ' risk</span>' : '')
+ '</div>'
+ '<div style="font-size:0.7rem;color:var(--text-dim);margin-bottom:0.5rem">'
+ (reasons.length ? reasons.map(function(r2) { return '→ ' + esc(r2); }).join('<br>') : 'Insufficient data')
+ '</div>'
+ '<div style="display:flex;gap:1rem;font-size:0.7rem;color:var(--text-dim)">'
+ (r.stock_trend ? '<span>Stock: <b style="color:var(--text)">' + r.stock_trend + '</b></span>' : '')
+ (r.price_trend ? '<span>Price: <b style="color:var(--text)">' + r.price_trend + '</b></span>' : '')
+ (r.lead_time_weeks ? '<span>Lead: <b style="color:var(--text)">' + r.lead_time_weeks + 'w</b></span>' : '')
+ '</div>'
+ '<div style="margin-top:0.6rem;background:var(--surface2);border-radius:3px;height:4px">'
+ '<div style="height:4px;border-radius:3px;width:' + strengthPct + '%;background:var(--accent)"></div>'
+ '</div>'
+ '<div style="font-size:0.65rem;color:var(--text-dim);text-align:right;margin-top:2px">Signal strength: ' + strengthPct + '%</div>'
+ '</div>';
}).join('');
}
async function loadProcAbc() {
try {
var d = await api('/api/procurement/abc?limit=200');
procAbcData = d.data || [];
renderAbcTable(procCurrentAbcFilter);
} catch(e) {
el('abc-tbody').innerHTML = '<tr><td colspan="8" style="padding:1rem;color:var(--text-dim)">No ABC data yet — run compute:abc job.</td></tr>';
}
}
function filterAbc(cls) {
procCurrentAbcFilter = cls;
renderAbcTable(cls);
}
function renderAbcTable(filterCls) {
var data = filterCls ? procAbcData.filter(function(r) { return r.abc_class === filterCls; }) : procAbcData;
var sigIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' };
el('abc-tbody').innerHTML = data.map(function(r) {
var abcEl = '<span class="abc-' + (r.abc_class || 'c').toLowerCase() + '">' + (r.abc_class || '—') + '</span>';
return '<tr style="border-bottom:1px solid var(--border)">'
+ '<td style="padding:7px 6px">' + abcEl + '</td>'
+ '<td style="padding:7px 6px"><div style="font-weight:600">' + esc(r.standard_name || r.part_number || '—') + '</div><div style="font-size:0.68rem;color:var(--text-dim)">' + esc(r.vendor_name || '') + '</div></td>'
+ '<td style="padding:7px 6px;font-family:var(--mono);font-size:0.75rem">' + esc(r.form_factor || '—') + '</td>'
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.demand_score ? parseFloat(r.demand_score).toFixed(0) : '—') + '</td>'
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.compat_count || 0) + '</td>'
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.vendor_count || 0) + '</td>'
+ '<td style="padding:7px 6px;font-size:0.75rem;color:' + (r.supply_risk === 'high' ? 'var(--red)' : r.supply_risk === 'medium' ? 'var(--yellow)' : 'var(--green)') + '">' + esc(r.supply_risk || '—') + '</td>'
+ '<td style="padding:7px 6px">' + (r.signal ? (sigIcon[r.signal] || '') + ' ' + r.signal.replace('_',' ') : '—') + '</td>'
+ '</tr>';
}).join('') || '<tr><td colspan="8" style="padding:1rem;color:var(--text-dim)">No data for this filter.</td></tr>';
}
async function loadProcMarketIntel() {
var container = el('proc-market-grid');
try {
var d = await api('/api/procurement/market-intel?days=180&limit=50');
var items = d.data || [];
if (!items.length) {
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">No market intelligence yet.</div>';
return;
}
var typeIcon = {
capex_cycle:'💰', trade_show:'🎪', standard_ratified:'📋',
standard_draft:'📝', distributor_lead_time:'🚚', supply_chain:'🏭', tender:'📑'
};
container.innerHTML = items.map(function(item) {
var sig = item.buy_signal_implication || 'none';
var badgeClass = 'intel-' + sig.replace('_now','').replace('_','');
var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor', none:'—' };
var techs = (item.technologies || []).map(function(t) {
return '<span style="font-size:0.65rem;padding:1px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(t) + '</span>';
}).join(' ');
return '<div class="intel-card">'
+ '<div style="display:flex;gap:0.5rem;align-items:flex-start;margin-bottom:0.4rem">'
+ '<span style="font-size:1.2rem">' + (typeIcon[item.intel_type] || '📊') + '</span>'
+ '<div style="flex:1">'
+ '<span class="intel-badge ' + badgeClass + '">' + (sigLabel[sig] || sig) + '</span>'
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + '</div>'
+ '</div></div>'
+ '<div style="font-size:0.75rem;color:var(--text-dim);margin-bottom:0.6rem;line-height:1.5">' + esc(item.summary || '') + '</div>'
+ (techs ? '<div style="display:flex;gap:0.3rem;flex-wrap:wrap;margin-bottom:0.5rem">' + techs + '</div>' : '')
+ '<div style="display:flex;justify-content:space-between;font-size:0.68rem;color:var(--text-dim)">'
+ '<span>' + esc(item.source_name) + '</span>'
+ (item.impact_horizon_months ? '<span>Impact: ~' + item.impact_horizon_months + ' months</span>' : '')
+ '</div>'
+ '</div>';
}).join('');
} catch(e) {
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">Could not load market intelligence.</div>';
}
}
async function loadProcLifecycle() {
var container = el('proc-lifecycle-grid');
try {
var d = await api('/api/procurement/lifecycle?days=365&limit=50');
var items = d.data || [];
if (!items.length) {
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">No lifecycle events yet.</div>';
return;
}
var typeIcon = {
eol_announced:'⛔', eol_effective:'🚫', standard_ratified:'✅',
standard_draft:'📝', capex_peak:'💰', trade_show:'🎪',
supply_risk:'⚠️', tender:'📑', price_floor:'📉'
};
var impactColor = { critical:'#c1121f', high:'#c1121f', medium:'var(--yellow)', low:'var(--green)' };
var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor' };
container.innerHTML = items.map(function(item) {
var ic = impactColor[item.impact_level] || 'var(--text-dim)';
var productInfo = item.part_number ? esc(item.part_number) + (item.form_factor ? ' · ' + esc(item.form_factor) : '') : '';
var dateStr = item.effective_date ? new Date(item.effective_date).toLocaleDateString('de-DE') : '';
return '<div class="intel-card" style="border-left:3px solid ' + ic + '">'
+ '<div style="display:flex;gap:0.5rem;align-items:flex-start;margin-bottom:0.4rem">'
+ '<span style="font-size:1.2rem">' + (typeIcon[item.event_type] || '📌') + '</span>'
+ '<div style="flex:1">'
+ (item.buy_signal ? '<span class="intel-badge intel-' + item.buy_signal.replace('_now','').replace('_','') + '">' + (sigLabel[item.buy_signal] || item.buy_signal) + '</span>' : '')
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + '</div>'
+ '</div></div>'
+ (item.description ? '<div style="font-size:0.75rem;color:var(--text-dim);margin-bottom:0.5rem;line-height:1.5">' + esc(item.description.substring(0, 200)) + (item.description.length > 200 ? '…' : '') + '</div>' : '')
+ '<div style="display:flex;justify-content:space-between;font-size:0.68rem;color:var(--text-dim)">'
+ '<span>' + esc(item.source_name || '') + (productInfo ? ' · ' + productInfo : '') + '</span>'
+ (dateStr ? '<span style="color:' + ic + ';font-weight:600">' + dateStr + '</span>' : '')
+ '</div>'
+ '</div>';
}).join('');
} catch(e) {
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">Could not load lifecycle events.</div>';
}
}
// INIT // INIT
loadOverview(); loadOverview();
</script> </script>

View File

@ -0,0 +1,349 @@
/**
* Crawler LLM Core extraction engine.
*
* Uses Ollama (local LLM) to extract structured product data from HTML.
* Two-stage pipeline:
* 1. Page type detection (product vs. category) cheap, fast
* 2. Structured data extraction with schema enforcement
*
* Vendor-specific profiles guide the LLM without hard-coding selectors.
*/
import { pool } from "../utils/db";
import type { StockExtractionResult, MarketIntelExtractionResult } from "./stock-schema";
import { VENDOR_PROFILES } from "./stock-schema";
import { validateStockExtraction } from "./validator";
const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://192.168.178.169:11434";
const OLLAMA_MODEL = process.env.CRAWLER_LLM_MODEL || "qwen2.5:14b";
const MAX_HTML_CHARS = 12_000; // truncate to keep prompt manageable
// ─────────────────────────────────────────────────────────────────────────────
// Ollama API call
// ─────────────────────────────────────────────────────────────────────────────
async function ollamaGenerate(prompt: string): Promise<string> {
const res = await fetch(`${OLLAMA_HOST}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: OLLAMA_MODEL,
prompt,
stream: false,
format: "json",
options: { temperature: 0.1, num_predict: 1024 },
}),
});
if (!res.ok) throw new Error(`Ollama error: ${res.status} ${await res.text()}`);
const data = await res.json() as { response: string };
return data.response;
}
// ─────────────────────────────────────────────────────────────────────────────
// Stage 1: Page type detection (fast, binary)
// ─────────────────────────────────────────────────────────────────────────────
async function detectPageType(html: string, url: string, vendorSlug?: string): Promise<{
is_product_page: boolean;
confidence: number;
evidence: string;
}> {
const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null;
const hints = profile
? `\nVendor hints — Product page signals: ${profile.product_page_signals.join(", ")}. Category page signals: ${profile.category_page_signals.join(", ")}.`
: "";
const prompt = `You are a web scraper assistant. Determine if this HTML is a single product page or a category/listing page.
URL: ${url}${hints}
HTML (truncated):
${html.substring(0, 3000)}
Respond with JSON only:
{
"is_product_page": true or false,
"confidence": 0.0 to 1.0,
"evidence": "brief quote from the HTML that supports your decision"
}`;
const raw = await ollamaGenerate(prompt);
try {
const parsed = JSON.parse(raw);
return {
is_product_page: Boolean(parsed.is_product_page),
confidence: Number(parsed.confidence) || 0,
evidence: String(parsed.evidence || ""),
};
} catch {
return { is_product_page: false, confidence: 0, evidence: "JSON parse failed" };
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Stage 2: Full product extraction
// ─────────────────────────────────────────────────────────────────────────────
async function extractProductData(
html: string,
url: string,
vendorSlug?: string
): Promise<StockExtractionResult> {
const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null;
const hints = profile ? `
Vendor: ${profile.name} (${profile.currency})
Price hint: ${profile.price_hint || "find the main selling price"}
Stock hint: ${profile.stock_hint || "find availability status"}` : "";
const prompt = `You are a product data extractor for optical transceiver products. Extract structured data from this product page HTML.
URL: ${url}${hints}
HTML (truncated to ${MAX_HTML_CHARS} chars):
${html.substring(0, MAX_HTML_CHARS)}
Extract and respond with JSON only use null for any field you cannot find with confidence:
{
"is_product_page": true,
"confidence": 0.0 to 1.0,
"source_evidence": "brief quote from HTML supporting your extraction",
"price": number or null,
"currency": "USD" or "EUR" or "GBP" or "CNY" or null,
"price_breaks": [{"qty": number, "price": number}] or [],
"stock_level": "in_stock" or "out_of_stock" or "limited" or "unknown",
"stock_quantity": number or null,
"incoming_quantity": number or null,
"incoming_eta": "YYYY-MM-DD" or null,
"lead_time_days": number or null,
"moq": number or null,
"part_number": "exact part number string" or null,
"standard_name": "manufacturer's exact product name as written on the page" or null,
"form_factor": "SFP+" or "QSFP28" or "QSFP-DD" etc or null,
"speed_gbps": number or null
}
Rules:
- standard_name MUST be the manufacturer's exact product designation, not a generic description
- If you see "All Optical Transceivers" or similar category text as the name, set standard_name to null
- price_breaks only if there is a visible quantity/price table
- incoming_quantity: look for text like "X units incoming", "X im Zulauf", "Expected: X"
- Set confidence < 0.5 if you are guessing`;
const raw = await ollamaGenerate(prompt);
try {
const parsed = JSON.parse(raw);
return {
is_product_page: Boolean(parsed.is_product_page ?? true),
confidence: Number(parsed.confidence) || 0,
source_evidence: String(parsed.source_evidence || ""),
price: parsed.price != null ? Number(parsed.price) : null,
currency: parsed.currency || null,
price_breaks: Array.isArray(parsed.price_breaks) ? parsed.price_breaks : [],
stock_level: (["in_stock", "out_of_stock", "limited"].includes(parsed.stock_level))
? parsed.stock_level
: "unknown",
stock_quantity: parsed.stock_quantity != null ? Number(parsed.stock_quantity) : null,
incoming_quantity: parsed.incoming_quantity != null ? Number(parsed.incoming_quantity) : null,
incoming_eta: parsed.incoming_eta || null,
lead_time_days: parsed.lead_time_days != null ? Number(parsed.lead_time_days) : null,
moq: parsed.moq != null ? Number(parsed.moq) : null,
part_number: parsed.part_number || null,
standard_name: parsed.standard_name || null,
form_factor: parsed.form_factor || null,
speed_gbps: parsed.speed_gbps != null ? Number(parsed.speed_gbps) : null,
};
} catch {
return {
is_product_page: false,
confidence: 0,
source_evidence: "JSON parse failed",
price: null, currency: null, price_breaks: [],
stock_level: "unknown",
stock_quantity: null, incoming_quantity: null, incoming_eta: null,
lead_time_days: null, moq: null,
part_number: null, standard_name: null, form_factor: null, speed_gbps: null,
};
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Market intelligence extraction
// ─────────────────────────────────────────────────────────────────────────────
export async function extractMarketIntel(
text: string,
url: string,
sourceName: string
): Promise<MarketIntelExtractionResult> {
const prompt = `You are an optical transceiver market analyst. Analyze this text for market intelligence relevant to transceiver procurement.
Source: ${sourceName}
URL: ${url}
Text:
${text.substring(0, 8000)}
Respond with JSON only:
{
"is_relevant": true or false (false if nothing relevant to transceiver markets),
"confidence": 0.0 to 1.0,
"source_evidence": "brief quote supporting your analysis",
"intel_type": one of: "capex_cycle", "trade_show", "standard_ratified", "standard_draft", "distributor_lead_time", "supply_chain", "tender",
"title": "concise title (max 100 chars)",
"summary": "2-3 sentence summary of the key insight",
"technologies": ["400G", "QSFP-DD", etc transceiver technologies mentioned],
"buy_signal_implication": one of: "buy_now", "wait", "hold", "monitor", "none",
"impact_horizon_months": estimated months until this affects the market (number),
"published_at": "YYYY-MM-DD" or null
}
Guidelines:
- buy_now: shortage, EOL, CapEx surge order before prices rise
- wait: new standard coming current products will drop in price
- hold: stable market, no urgency
- monitor: interesting but unclear impact
- impact_horizon_months: 0-3 for immediate, 3-12 for medium, 12+ for long-term`;
const raw = await ollamaGenerate(prompt);
try {
const p = JSON.parse(raw);
return {
is_relevant: Boolean(p.is_relevant),
confidence: Number(p.confidence) || 0,
source_evidence: String(p.source_evidence || ""),
intel_type: p.intel_type || "supply_chain",
title: String(p.title || "").substring(0, 200),
summary: String(p.summary || ""),
technologies: Array.isArray(p.technologies) ? p.technologies : [],
buy_signal_implication: p.buy_signal_implication || "none",
impact_horizon_months: Number(p.impact_horizon_months) || 6,
published_at: p.published_at || null,
};
} catch {
return {
is_relevant: false, confidence: 0, source_evidence: "parse error",
intel_type: "supply_chain", title: "", summary: "", technologies: [],
buy_signal_implication: "none", impact_horizon_months: 0, published_at: null,
};
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Public API — Main scrape function
// ─────────────────────────────────────────────────────────────────────────────
export interface CrawlerLLMResult {
extraction: StockExtractionResult;
validation_passed: boolean;
validation_errors: string[];
validation_warnings: string[];
}
export async function scrapeWithLLM(
html: string,
url: string,
options: {
vendorSlug?: string;
vendorId?: string;
transceiverIds?: string[]; // candidate matches (pre-filtered by form_factor/speed)
speedGbps?: number;
skipPageDetection?: boolean; // set true if URL is known product page
} = {}
): Promise<CrawlerLLMResult> {
const { vendorSlug, speedGbps, skipPageDetection } = options;
// Stage 1: Page type detection (skip if caller already knows it's a product page)
if (!skipPageDetection) {
const pageType = await detectPageType(html, url, vendorSlug);
if (!pageType.is_product_page) {
return {
extraction: {
is_product_page: false,
confidence: pageType.confidence,
source_evidence: pageType.evidence,
price: null, currency: null, price_breaks: [],
stock_level: "unknown",
stock_quantity: null, incoming_quantity: null, incoming_eta: null,
lead_time_days: null, moq: null,
part_number: null, standard_name: null, form_factor: null, speed_gbps: null,
},
validation_passed: false,
validation_errors: ["Not a product page"],
validation_warnings: [],
};
}
}
// Stage 2: Full extraction
const extraction = await extractProductData(html, url, vendorSlug);
// Stage 3: Rule-based validation
const validation = validateStockExtraction(extraction, speedGbps);
return {
extraction,
validation_passed: validation.passed,
validation_errors: validation.errors,
validation_warnings: validation.warnings,
};
}
// ─────────────────────────────────────────────────────────────────────────────
// Persist to DB — saves stock snapshot and logs the scrape
// ─────────────────────────────────────────────────────────────────────────────
export async function persistStockSnapshot(
result: CrawlerLLMResult,
url: string,
vendorId: string,
transceiverIds: string[]
): Promise<void> {
const { extraction, validation_passed } = result;
// Always log (for audit/debug)
await pool.query(
`INSERT INTO crawler_llm_log
(url, vendor_id, is_product_page, extracted_data, confidence, validation_passed,
failure_reason, model_used)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
[
url,
vendorId,
extraction.is_product_page,
JSON.stringify(extraction),
extraction.confidence,
validation_passed,
validation_passed ? null : result.validation_errors.join("; "),
OLLAMA_MODEL,
]
);
if (!validation_passed || !extraction.is_product_page) return;
// Save stock snapshot for each matched transceiver
for (const transceiverIdStr of transceiverIds) {
await pool.query(
`INSERT INTO stock_snapshots
(transceiver_id, vendor_id, stock_level, stock_quantity, incoming_quantity,
incoming_eta, lead_time_days, moq, price_breaks, source_url, crawler_confidence)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)`,
[
transceiverIdStr,
vendorId,
extraction.stock_level,
extraction.stock_quantity,
extraction.incoming_quantity,
extraction.incoming_eta,
extraction.lead_time_days,
extraction.moq,
extraction.price_breaks.length > 0 ? JSON.stringify(extraction.price_breaks) : null,
url,
extraction.confidence,
]
);
}
}

View File

@ -0,0 +1,135 @@
/**
* Crawler LLM Schema definitions for structured product extraction.
*
* Every schema includes a `confidence` and `source_evidence` field so the LLM
* is forced to cite its work. This enables validation and debugging.
*/
export interface StockExtractionResult {
is_product_page: boolean; // false = category/listing page → discard
confidence: number; // 0.0 1.0 — LLM self-assessment
source_evidence: string; // which text passage the LLM used
// Pricing
price: number | null;
currency: "USD" | "EUR" | "GBP" | "CNY" | null;
price_breaks: PriceBreak[]; // volume discount tiers
// Stock
stock_level: "in_stock" | "out_of_stock" | "limited" | "unknown";
stock_quantity: number | null; // exact qty if shown
incoming_quantity: number | null; // "18 im Zulauf"
incoming_eta: string | null; // ISO date string "2026-04-15"
lead_time_days: number | null;
moq: number | null; // minimum order quantity
// Product identity (for cross-validation)
part_number: string | null;
standard_name: string | null; // manufacturer's exact product name
form_factor: string | null;
speed_gbps: number | null;
}
export interface PriceBreak {
qty: number;
price: number;
}
export interface MarketIntelExtractionResult {
is_relevant: boolean; // false = skip
confidence: number;
source_evidence: string;
intel_type: "capex_cycle" | "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender";
title: string;
summary: string;
technologies: string[]; // ['400G', 'QSFP-DD', ...]
buy_signal_implication: "buy_now" | "wait" | "hold" | "monitor" | "none";
impact_horizon_months: number;
published_at: string | null; // ISO date
}
/** Vendor-specific hints to improve LLM extraction accuracy */
export interface VendorProfile {
slug: string;
name: string;
currency: "USD" | "EUR" | "GBP" | "CNY";
product_page_signals: string[]; // text patterns that indicate a product page
category_page_signals: string[]; // text patterns that indicate a category page
price_hint: string | null; // natural language hint for the LLM
stock_hint: string | null;
known_moq: number | null;
}
export const VENDOR_PROFILES: Record<string, VendorProfile> = {
"flexoptix": {
slug: "flexoptix",
name: "Flexoptix",
currency: "EUR",
product_page_signals: ["In den Warenkorb", "Add to Cart", "part number", "SKU:", "P/N:"],
category_page_signals: ["Alle Produkte", "Filter", "Ergebnisse", "products found"],
price_hint: "Price is shown in EUR, usually near 'In den Warenkorb' button. May show 'auf Anfrage' if not listed.",
stock_hint: "Look for 'auf Lager', 'Lieferzeit', 'sofort lieferbar', or stock badge near price.",
known_moq: 1,
},
"fs-com": {
slug: "fs-com",
name: "FS.com",
currency: "USD",
product_page_signals: ["Add to Cart", "Part No.", "SKU", "In Stock", "Reviews"],
category_page_signals: ["Products", "Filter by", "Sort by", "items found", "Category"],
price_hint: "Price is in USD, shown prominently near 'Add to Cart'. May show qty pricing table.",
stock_hint: "Look for 'In Stock', exact number like '847 In Stock', or 'Out of Stock'.",
known_moq: 1,
},
"10gtek": {
slug: "10gtek",
name: "10Gtek",
currency: "USD",
product_page_signals: ["Add to Cart", "Product Code:", "In Stock", "Ships from"],
category_page_signals: ["Shop All", "Filter", "Category", "Sort By"],
price_hint: "Price in USD near Add to Cart button. Volume pricing sometimes shown as table.",
stock_hint: "Stock level shown as text: 'In Stock', 'Low Stock', 'Out of Stock'.",
known_moq: 1,
},
"atgbics": {
slug: "atgbics",
name: "ATGBICS",
currency: "GBP",
product_page_signals: ["Add to Basket", "Part Number:", "Stock:", "Delivery"],
category_page_signals: ["Products", "Browse by", "Refine by"],
price_hint: "Price in GBP. ATGBICS uses Shopify, price is in a span with class 'price'.",
stock_hint: "Stock shown as 'In Stock', 'Limited Stock', or 'Out of Stock' near price.",
known_moq: 1,
},
"prolabs": {
slug: "prolabs",
name: "ProLabs",
currency: "USD",
product_page_signals: ["Add to Cart", "Part Number", "In Stock", "Specs"],
category_page_signals: ["Results", "Filter", "Category", "Sort"],
price_hint: "Price in USD. ProLabs may require login for prices — if so, mark price as null.",
stock_hint: "Stock availability shown near product title.",
known_moq: 1,
},
"farnell": {
slug: "farnell",
name: "Farnell",
currency: "EUR",
product_page_signals: ["Add to Basket", "Order Code:", "Stock:", "Lead Time:"],
category_page_signals: ["Products", "Refine Search", "Category", "results for"],
price_hint: "Price in EUR or GBP. Farnell shows break prices in a table with columns Qty/Price.",
stock_hint: "Stock shown as number, e.g. '47 In Stock'. Lead time shown in business days.",
known_moq: 1,
},
"mouser": {
slug: "mouser",
name: "Mouser Electronics",
currency: "EUR",
product_page_signals: ["Add to Cart", "Mouser Part No.", "Mfr. Part No.", "In Stock:"],
category_page_signals: ["Search Results", "Filter Results", "Products (", "Sort By"],
price_hint: "Mouser shows price per unit and break quantities. USD or EUR depending on locale.",
stock_hint: "Stock shown as exact number: 'In Stock: 124'. Lead time shown for out-of-stock items.",
known_moq: null,
},
};

View File

@ -0,0 +1,157 @@
/**
* Crawler LLM Rule-based validator.
*
* Runs AFTER the LLM extraction to catch hallucinations and obvious errors.
* The LLM is good at structure; this catches range violations and nonsense.
*/
import type { StockExtractionResult } from "./stock-schema";
export interface ValidationResult {
passed: boolean;
warnings: string[];
errors: string[];
}
/** Expected price ranges per speed class (USD/EUR). Rough but effective. */
const PRICE_RANGES: Record<string, [number, number]> = {
"1G": [10, 500],
"10G": [20, 2000],
"25G": [30, 2000],
"40G": [50, 3000],
"100G": [80, 15000],
"200G": [200, 20000],
"400G": [200, 50000],
"800G": [500, 80000],
};
const VALID_FORM_FACTORS = new Set([
"SFP", "SFP+", "SFP28", "SFP56", "SFP-DD",
"QSFP", "QSFP+", "QSFP28", "QSFP56", "QSFP-DD", "QSFP112",
"OSFP", "OSFP-RHS",
"CFP", "CFP2", "CFP4", "CFP8",
"XFP", "X2", "XENPAK",
"DSFP", "CSFP",
]);
const VALID_CURRENCIES = new Set(["USD", "EUR", "GBP", "CNY"]);
export function validateStockExtraction(
result: StockExtractionResult,
speedGbps?: number
): ValidationResult {
const errors: string[] = [];
const warnings: string[] = [];
// Not a product page — caller should discard, not an error
if (!result.is_product_page) {
return { passed: false, errors: ["Not a product page"], warnings: [] };
}
// Confidence too low
if (result.confidence < 0.5) {
errors.push(`Confidence ${result.confidence} below threshold 0.5`);
}
// Price validation
if (result.price !== null) {
if (result.price <= 0) {
errors.push(`Price ${result.price} is not positive`);
}
if (result.price > 500_000) {
errors.push(`Price ${result.price} exceeds maximum sanity limit`);
}
if (!result.currency || !VALID_CURRENCIES.has(result.currency)) {
errors.push(`Invalid currency: ${result.currency}`);
}
// Speed-class price range check
if (speedGbps) {
const speedKey = `${speedGbps}G`;
const range = PRICE_RANGES[speedKey];
if (range && (result.price < range[0] * 0.1 || result.price > range[1] * 10)) {
warnings.push(`Price ${result.price} ${result.currency} looks unusual for ${speedKey} (expected ${range[0]}${range[1]})`);
}
}
}
// Stock quantity sanity
if (result.stock_quantity !== null) {
if (result.stock_quantity < 0) {
errors.push(`Stock quantity ${result.stock_quantity} is negative`);
}
if (result.stock_quantity > 100_000) {
warnings.push(`Stock quantity ${result.stock_quantity} unusually high — verify`);
}
}
// Lead time sanity
if (result.lead_time_days !== null) {
if (result.lead_time_days < 0) {
errors.push(`Lead time ${result.lead_time_days} is negative`);
}
if (result.lead_time_days > 730) {
warnings.push(`Lead time ${result.lead_time_days} days (>2 years) — verify`);
}
}
// MOQ sanity
if (result.moq !== null && result.moq < 1) {
errors.push(`MOQ ${result.moq} must be at least 1`);
}
// Form factor check
if (result.form_factor && !VALID_FORM_FACTORS.has(result.form_factor)) {
warnings.push(`Unknown form factor: ${result.form_factor}`);
}
// Price break consistency
if (result.price_breaks.length > 0) {
for (const pb of result.price_breaks) {
if (pb.qty < 1 || pb.price <= 0) {
errors.push(`Invalid price break: qty=${pb.qty} price=${pb.price}`);
}
if (result.price && pb.price > result.price * 2) {
warnings.push(`Price break ${pb.qty}x=${pb.price} higher than unit price — unusual`);
}
}
}
// Incoming ETA must be a future-ish date
if (result.incoming_eta) {
const eta = new Date(result.incoming_eta);
if (isNaN(eta.getTime())) {
errors.push(`Invalid incoming_eta date: ${result.incoming_eta}`);
}
}
return {
passed: errors.length === 0,
errors,
warnings,
};
}
/** Cross-source comparison: do two extractions agree within tolerance? */
export function crossValidate(
a: StockExtractionResult,
b: StockExtractionResult,
priceTolerance = 0.10 // 10% price difference allowed
): boolean {
if (a.price === null || b.price === null) return false;
// Both in same currency
if (a.currency !== b.currency) return false;
// Price within tolerance
const diff = Math.abs(a.price - b.price) / Math.max(a.price, b.price);
if (diff > priceTolerance) return false;
// Part numbers match (if both present)
if (a.part_number && b.part_number) {
const normalize = (s: string) => s.replace(/[\s\-_]/g, "").toUpperCase();
if (normalize(a.part_number) !== normalize(b.part_number)) return false;
}
return true;
}

View File

@ -67,6 +67,9 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
"scrape:news", "scrape:news",
"scrape:faq", "scrape:faq",
"scrape:docs", "scrape:docs",
"scrape:market-intel",
"compute:abc",
"compute:reorder-signals",
]; ];
for (const q of queues) { for (const q of queues) {
await boss.createQueue(q).catch(() => { /* already exists */ }); await boss.createQueue(q).catch(() => { /* already exists */ });
@ -140,6 +143,24 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
expireInSeconds: 7200, expireInSeconds: 7200,
}); });
// Market intelligence: OFC/ECOC, IEEE, TED, Farnell/Mouser lead times (every Tuesday 5am)
await boss.schedule("scrape:market-intel", "0 5 * * 2", {}, {
retryLimit: 2,
expireInSeconds: 3600,
});
// ABC classification recompute (after each major pricing run — daily at 8am)
await boss.schedule("compute:abc", "0 8 * * *", {}, {
retryLimit: 2,
expireInSeconds: 600,
});
// Reorder signals recompute (daily at 8:30am — after ABC)
await boss.schedule("compute:reorder-signals", "30 8 * * *", {}, {
retryLimit: 2,
expireInSeconds: 600,
});
console.log("All schedules registered"); console.log("All schedules registered");
} }
@ -208,5 +229,23 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`); console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
}); });
await boss.work("scrape:market-intel", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
});
await boss.work("compute:abc", async (_job) => {
console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
const { computeAbcClassification } = await import("./scrapers/market-intelligence");
await computeAbcClassification();
});
await boss.work("compute:reorder-signals", async (_job) => {
console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
const { computeReorderSignals } = await import("./scrapers/market-intelligence");
await computeReorderSignals();
});
console.log("All workers registered"); console.log("All workers registered");
} }

View File

@ -0,0 +1,299 @@
/**
* Market Intelligence Scraper
*
* Collects procurement-relevant signals from:
* - OFC/ECOC conference programs
* - Farnell/Mouser lead times for optical modules
* - IEEE 802.3 working group status page
* - EU TED tender database (fiber infrastructure)
* - LightReading/FierceTelecom trade press
*
* Runs weekly via pg-boss scheduler.
* Results stored in market_intelligence table.
* LLM analysis via Crawler LLM extractMarketIntel().
*/
import { CheerioCrawler } from "crawlee";
import { extractMarketIntel } from "../crawler-llm/core";
import { pool } from "../utils/db";
interface IntelSource {
name: string;
url: string;
type: "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender" | "capex_cycle";
fetchText: (html: string) => string; // extract relevant text from HTML
}
const SOURCES: IntelSource[] = [
{
name: "OFC Conference News",
url: "https://www.ofcconference.org/en-us/home/news/",
type: "trade_show",
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
},
{
name: "LightReading — Optical Networking",
url: "https://www.lightreading.com/optical-networking",
type: "supply_chain",
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
},
{
name: "IEEE 802.3 Working Group",
url: "https://www.ieee802.org/3/",
type: "standard_draft",
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000),
},
{
name: "EU TED — ICT/Fiber Tenders",
url: "https://ted.europa.eu/en/search/result?forms%5B0%5D%5Bcpv%5D=32571000",
type: "tender",
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
},
{
name: "Farnell — SFP Fiber Transceivers Lead Times",
url: "https://de.farnell.com/c/passive-optische-netzwerke/glasfasertransceiver",
type: "distributor_lead_time",
fetchText: (html) => {
// Extract lead time patterns: "X Wochen", "X weeks", "X days"
const leadTimePattern = /(\d+)\s*(wochen|weeks|days|tage|week|day)/gi;
const matches = [];
let m;
while ((m = leadTimePattern.exec(html)) !== null) {
matches.push(m[0]);
}
const context = html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ");
return `Lead time mentions: ${matches.join(", ")}\n\nPage context:\n${context.substring(0, 5000)}`;
},
},
{
name: "Mouser — Optical Transceivers Category",
url: "https://www.mouser.de/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers/",
type: "distributor_lead_time",
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000),
},
{
name: "FierceTelecom — Optical News",
url: "https://www.fiercetelecom.com/optical",
type: "supply_chain",
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
},
];
async function saveIntelItem(
item: Awaited<ReturnType<typeof extractMarketIntel>>,
source: IntelSource,
url: string
): Promise<void> {
if (!item.is_relevant || item.confidence < 0.5) return;
await pool.query(
`INSERT INTO market_intelligence
(intel_type, title, summary, relevance_score, technologies,
buy_signal_implication, impact_horizon_months, source_url, source_name, published_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT DO NOTHING`,
[
item.intel_type,
item.title.substring(0, 500),
item.summary,
item.confidence,
item.technologies,
item.buy_signal_implication,
item.impact_horizon_months,
url,
source.name,
item.published_at ? new Date(item.published_at) : null,
]
);
console.log(`[market-intel] Saved: ${item.title.substring(0, 60)}... (${item.buy_signal_implication})`);
}
export async function scrapeMarketIntelligence(): Promise<void> {
console.log("[market-intel] Starting market intelligence scrape...");
let processed = 0;
const crawler = new CheerioCrawler({
maxRequestsPerCrawl: SOURCES.length + 20,
maxConcurrency: 2,
requestHandlerTimeoutSecs: 30,
async requestHandler({ request, body }) {
const html = body.toString();
const source = SOURCES.find((s) => request.url.startsWith(s.url.split("?")[0]));
if (!source) return;
const text = source.fetchText(html);
if (text.length < 200) {
console.warn(`[market-intel] Too little text from ${request.url}`);
return;
}
try {
const intel = await extractMarketIntel(text, request.url, source.name);
await saveIntelItem(intel, source, request.url);
processed++;
} catch (err) {
console.error(`[market-intel] LLM error for ${request.url}:`, err);
}
},
async failedRequestHandler({ request }) {
console.warn(`[market-intel] Failed: ${request.url}`);
},
});
await crawler.addRequests(SOURCES.map((s) => ({ url: s.url })));
await crawler.run();
console.log(`[market-intel] Done. Processed ${processed}/${SOURCES.length} sources.`);
}
// ─────────────────────────────────────────────────────────────────────────────
// ABC Classification computation — run after each major scrape cycle
// ─────────────────────────────────────────────────────────────────────────────
export async function computeAbcClassification(): Promise<void> {
console.log("[abc] Computing ABC classification...");
await pool.query(`
INSERT INTO abc_classification
(transceiver_id, abc_class, obs_90d, compat_count, vendor_count, price_volatility, demand_score, supply_risk)
SELECT
t.id,
CASE
WHEN obs_90d > 50 AND compat_count > 100 THEN 'A'
WHEN obs_90d > 15 OR compat_count > 30 THEN 'B'
ELSE 'C'
END AS abc_class,
obs_90d,
compat_count,
vendor_count,
price_volatility,
-- demand score 0-100: weighted combination
LEAST(100, (obs_90d * 0.5 + compat_count * 0.3 + vendor_count * 5)) AS demand_score,
CASE
WHEN price_volatility > 0.3 THEN 'high'
WHEN price_volatility > 0.1 THEN 'medium'
ELSE 'low'
END AS supply_risk
FROM transceivers t
LEFT JOIN (
SELECT transceiver_id,
COUNT(*) FILTER (WHERE time > NOW() - INTERVAL '90 days') AS obs_90d,
STDDEV(price) / NULLIF(AVG(price), 0) AS price_volatility,
COUNT(DISTINCT source_vendor_id) AS vendor_count
FROM price_observations
GROUP BY transceiver_id
) po ON po.transceiver_id = t.id
LEFT JOIN (
SELECT transceiver_id, COUNT(*) AS compat_count
FROM compatibility
WHERE status = 'compatible'
GROUP BY transceiver_id
) co ON co.transceiver_id = t.id
WHERE t.data_confidence != 'garbage' OR t.data_confidence IS NULL
ON CONFLICT (transceiver_id) DO UPDATE SET
abc_class = EXCLUDED.abc_class,
obs_90d = EXCLUDED.obs_90d,
compat_count = EXCLUDED.compat_count,
vendor_count = EXCLUDED.vendor_count,
price_volatility = EXCLUDED.price_volatility,
demand_score = EXCLUDED.demand_score,
supply_risk = EXCLUDED.supply_risk,
computed_at = NOW()
`);
const stats = await pool.query(`
SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class
`);
console.log("[abc] Classification done:", stats.rows.map((r) => `${r.abc_class}: ${r.count}`).join(", "));
}
// ─────────────────────────────────────────────────────────────────────────────
// Reorder Signal computation
// ─────────────────────────────────────────────────────────────────────────────
export async function computeReorderSignals(): Promise<void> {
console.log("[reorder] Computing reorder signals...");
// Get all transceivers with enough data
const transceivers = await pool.query(`
SELECT
t.id, t.part_number, t.standard_name, t.speed_gbps, t.form_factor,
ac.abc_class, ac.price_volatility, ac.supply_risk,
-- Price trend: is price rising or falling?
(SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time > NOW() - INTERVAL '14 days') AS price_recent,
(SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time BETWEEN NOW() - INTERVAL '60 days' AND NOW() - INTERVAL '14 days') AS price_older,
-- Stock trend: how many vendors show in_stock recently?
(SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent,
(SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent,
-- Lead time
(SELECT AVG(lead_time_days) FROM stock_snapshots WHERE transceiver_id = t.id AND lead_time_days IS NOT NULL AND scraped_at > NOW() - INTERVAL '30 days') AS avg_lead_time_days,
-- Lifecycle events
(SELECT MAX(impact_level) FROM product_lifecycle_events WHERE transceiver_id = t.id OR technology = t.speed_gbps::text || 'G') AS lifecycle_impact
FROM transceivers t
LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
WHERE (t.data_confidence != 'garbage' OR t.data_confidence IS NULL)
`);
let computed = 0;
for (const row of transceivers.rows) {
const reasons: string[] = [];
let signal: "buy_now" | "wait" | "hold" | "monitor" = "monitor";
let strength = 0.3;
const priceTrend = row.price_recent && row.price_older
? (row.price_recent - row.price_older) / row.price_older
: null;
const stockTrend =
row.oos_recent > 2 ? "declining" :
row.in_stock_recent > 2 ? "stable" : "unknown";
const leadTimeWeeks = row.avg_lead_time_days ? Math.ceil(row.avg_lead_time_days / 7) : null;
// Signal logic
if (row.lifecycle_impact === "critical" || row.lifecycle_impact === "high") {
signal = "buy_now"; strength = 0.9;
reasons.push("EOL/critical lifecycle event detected");
} else if (stockTrend === "declining" && row.abc_class === "A") {
signal = "buy_now"; strength = 0.8;
reasons.push("Stock declining at multiple vendors (A-product)");
} else if (leadTimeWeeks && leadTimeWeeks >= 12) {
signal = "buy_now"; strength = 0.75;
reasons.push(`Long lead time: ${leadTimeWeeks} weeks — order in advance`);
} else if (priceTrend !== null && priceTrend < -0.10) {
signal = "wait"; strength = 0.7;
reasons.push(`Price falling ${Math.abs(Math.round(priceTrend * 100))}% — wait for floor`);
} else if (priceTrend !== null && priceTrend > 0.10) {
signal = "buy_now"; strength = 0.65;
reasons.push(`Price rising ${Math.round(priceTrend * 100)}% — buy before further increase`);
} else if (row.abc_class === "A" && stockTrend === "stable") {
signal = "hold"; strength = 0.5;
reasons.push("A-product, stable pricing and availability");
} else if (row.abc_class === "C") {
signal = "monitor"; strength = 0.3;
reasons.push("C-product: low demand — order on demand only");
}
if (reasons.length === 0) reasons.push("Insufficient data for strong signal");
await pool.query(
`INSERT INTO reorder_signals
(transceiver_id, signal, signal_strength, reasons, stock_trend, price_trend, lead_time_weeks)
VALUES ($1, $2, $3, $4, $5, $6, $7)`,
[
row.id,
signal,
strength,
JSON.stringify(reasons),
stockTrend,
priceTrend === null ? "unknown" : priceTrend > 0.05 ? "rising" : priceTrend < -0.05 ? "falling" : "stable",
leadTimeWeeks,
]
);
computed++;
}
console.log(`[reorder] Computed ${computed} reorder signals.`);
}

View File

@ -0,0 +1,338 @@
-- Migration 019: Procurement Intelligence Engine
-- Stock tracking, ABC classification, reorder signals, market intelligence
-- v0.2.0 — WS0c: Procurement Intelligence Foundation
-- ─────────────────────────────────────────────────────────────────────────────
-- 1. Stock Snapshots — time-series lagerbestand per vendor per product
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS stock_snapshots (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
vendor_id UUID REFERENCES vendors(id) ON DELETE CASCADE,
stock_level TEXT CHECK (stock_level IN ('in_stock', 'out_of_stock', 'limited', 'unknown')) DEFAULT 'unknown',
stock_quantity INT, -- exact quantity if vendor shows it
incoming_quantity INT, -- "18 im Zulauf"
incoming_eta DATE, -- "verfügbar ab 15. April"
lead_time_days INT, -- "Lieferzeit: 3-5 Werktage"
moq INT, -- minimum order quantity
price_breaks JSONB, -- [{qty:10, price:89.00}, {qty:50, price:74.00}]
source_url TEXT,
crawler_confidence NUMERIC(3,2), -- 0.00 1.00 (Crawler LLM confidence)
scraped_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_stock_transceiver ON stock_snapshots(transceiver_id, scraped_at DESC);
CREATE INDEX IF NOT EXISTS idx_stock_vendor ON stock_snapshots(vendor_id, scraped_at DESC);
CREATE INDEX IF NOT EXISTS idx_stock_level ON stock_snapshots(stock_level) WHERE stock_level != 'unknown';
-- ─────────────────────────────────────────────────────────────────────────────
-- 2. ABC Classification — computed turnover category
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS abc_classification (
transceiver_id UUID PRIMARY KEY REFERENCES transceivers(id) ON DELETE CASCADE,
abc_class TEXT NOT NULL CHECK (abc_class IN ('A', 'B', 'C')),
-- inputs
obs_90d INT DEFAULT 0, -- price observations in last 90 days (proxy for market demand)
compat_count INT DEFAULT 0, -- number of compatible switches (market breadth)
vendor_count INT DEFAULT 0, -- number of vendors selling it (competition = demand signal)
price_volatility NUMERIC(5,4), -- STDDEV/AVG — high volatility = contested market
-- derived signals
demand_score NUMERIC(5,2), -- composite 0-100
supply_risk TEXT CHECK (supply_risk IN ('low', 'medium', 'high')),
computed_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_abc_class ON abc_classification(abc_class);
-- ─────────────────────────────────────────────────────────────────────────────
-- 3. Reorder Signals — computed buy/wait/hold/monitor recommendations
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS reorder_signals (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
signal TEXT NOT NULL CHECK (signal IN ('buy_now', 'wait', 'hold', 'monitor')),
signal_strength NUMERIC(3,2), -- 0.00 1.00 (how strong the signal is)
reasons JSONB, -- ["Stock declining at 3 vendors", "Lead time 16 weeks"]
stock_trend TEXT CHECK (stock_trend IN ('declining', 'stable', 'increasing', 'unknown')),
price_trend TEXT CHECK (price_trend IN ('falling', 'stable', 'rising', 'unknown')),
lead_time_weeks INT,
hype_phase TEXT, -- from hype_cycle data
computed_at TIMESTAMPTZ DEFAULT NOW(),
expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '24 hours'
);
CREATE INDEX IF NOT EXISTS idx_reorder_transceiver ON reorder_signals(transceiver_id, computed_at DESC);
CREATE INDEX IF NOT EXISTS idx_reorder_signal ON reorder_signals(signal) WHERE expires_at > NOW();
-- ─────────────────────────────────────────────────────────────────────────────
-- 4. Product Lifecycle Events — EOL, new standards, CapEx peaks, trade shows
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS product_lifecycle_events (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
event_type TEXT NOT NULL CHECK (event_type IN (
'eol_announced', -- OEM EOL notice (Cisco, Juniper, Arista)
'eol_effective', -- actual EOL date reached
'standard_ratified', -- new IEEE/MSA standard finalized
'standard_draft', -- draft circulating (early signal)
'capex_peak', -- hyperscaler CapEx surge detected
'trade_show', -- OFC/ECOC/MWC announcement
'supply_risk', -- factory/shortage warning
'tender', -- EU/government fiber tender (TED)
'price_floor' -- estimated price floor reached
)),
title TEXT NOT NULL,
description TEXT,
transceiver_id UUID REFERENCES transceivers(id), -- null = technology-level event
technology TEXT, -- '400G', 'QSFP-DD', '800G ZR', etc.
effective_date DATE, -- when this event takes effect
source_url TEXT,
source_name TEXT,
impact_level TEXT CHECK (impact_level IN ('low', 'medium', 'high', 'critical')) DEFAULT 'medium',
buy_signal TEXT CHECK (buy_signal IN ('buy_now', 'wait', 'hold', 'monitor')),
verified BOOLEAN DEFAULT false,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_lifecycle_type ON product_lifecycle_events(event_type, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_lifecycle_technology ON product_lifecycle_events(technology) WHERE technology IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_lifecycle_signal ON product_lifecycle_events(buy_signal) WHERE buy_signal IS NOT NULL;
-- ─────────────────────────────────────────────────────────────────────────────
-- 5. Market Intelligence — hyperscaler CapEx, OFC/ECOC, standards, tenders
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS market_intelligence (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
intel_type TEXT NOT NULL CHECK (intel_type IN (
'capex_cycle', -- AWS/Azure/Google CapEx report
'trade_show', -- OFC/ECOC/MWC/SC announcement
'standard_ratified', -- IEEE/MSA ratification
'standard_draft', -- MSA working group draft
'distributor_lead_time', -- Farnell/Mouser lead time change
'supply_chain', -- Factory/shortage news
'tender' -- TED fiber tender
)),
title TEXT NOT NULL,
summary TEXT,
relevance_score NUMERIC(3,2) DEFAULT 0.5, -- 0-1, LLM-assessed relevance
technologies TEXT[], -- ['400G', 'QSFP-DD', 'ZR']
buy_signal_implication TEXT CHECK (buy_signal_implication IN ('buy_now', 'wait', 'hold', 'monitor', 'none')),
impact_horizon_months INT, -- how many months until this matters
source_url TEXT,
source_name TEXT NOT NULL,
published_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_intel_type ON market_intelligence(intel_type, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_intel_technologies ON market_intelligence USING gin(technologies);
CREATE INDEX IF NOT EXISTS idx_intel_signal ON market_intelligence(buy_signal_implication);
-- ─────────────────────────────────────────────────────────────────────────────
-- 6. Crawler LLM Scrape Log — audit trail for Crawler LLM results
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS crawler_llm_log (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
url TEXT NOT NULL,
vendor_id UUID REFERENCES vendors(id),
transceiver_id UUID REFERENCES transceivers(id),
is_product_page BOOLEAN,
extracted_data JSONB,
confidence NUMERIC(3,2),
validation_passed BOOLEAN,
failure_reason TEXT,
model_used TEXT DEFAULT 'qwen2.5:14b',
scraped_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_llm_log_url ON crawler_llm_log(url, scraped_at DESC);
CREATE INDEX IF NOT EXISTS idx_llm_log_vendor ON crawler_llm_log(vendor_id, scraped_at DESC);
-- ─────────────────────────────────────────────────────────────────────────────
-- 7. Useful views
-- ─────────────────────────────────────────────────────────────────────────────
-- Latest stock per product per vendor
CREATE OR REPLACE VIEW v_stock_current AS
SELECT DISTINCT ON (ss.transceiver_id, ss.vendor_id)
ss.*,
t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
v.name AS vendor_name, v.slug AS vendor_slug
FROM stock_snapshots ss
JOIN transceivers t ON ss.transceiver_id = t.id
JOIN vendors v ON ss.vendor_id = v.id
ORDER BY ss.transceiver_id, ss.vendor_id, ss.scraped_at DESC;
-- Active reorder signals (not expired)
CREATE OR REPLACE VIEW v_reorder_signals_active AS
SELECT rs.*,
t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.reach_label,
ac.abc_class
FROM reorder_signals rs
JOIN transceivers t ON rs.transceiver_id = t.id
LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
WHERE rs.expires_at > NOW()
AND rs.computed_at = (
SELECT MAX(computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id
)
ORDER BY rs.signal_strength DESC;
-- Stock trend (is it declining at vendors?)
CREATE OR REPLACE VIEW v_stock_trend AS
SELECT
transceiver_id,
vendor_id,
COUNT(*) AS snapshot_count,
-- Compare recent vs older snapshots
COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent,
COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent,
COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days') AS oos_older,
CASE
WHEN COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') >
COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days')
THEN 'declining'
WHEN COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') > 2 THEN 'stable'
ELSE 'unknown'
END AS trend
FROM stock_snapshots
WHERE scraped_at > NOW() - INTERVAL '30 days'
GROUP BY transceiver_id, vendor_id;
-- ─────────────────────────────────────────────────────────────────────────────
-- 8. Seed: Known market intelligence events (static knowledge base)
-- ─────────────────────────────────────────────────────────────────────────────
INSERT INTO market_intelligence (intel_type, title, summary, relevance_score, technologies, buy_signal_implication, impact_horizon_months, source_name, published_at) VALUES
(
'trade_show',
'OFC 2026 — Key 800G ZR and Co-Packaged Optics Announcements',
'OFC 2026 highlighted accelerated 800G ZR deployment timelines and first Co-Packaged Optics (CPO) demos from Broadcom and Intel. CPO replaces pluggable modules in 4-6 years for hyperscaler intra-DC. Short term: 400G ZR+ and 800G QSFP-DD demand surge expected in 2026-2027.',
0.95,
ARRAY['800G', '400G ZR', 'QSFP-DD', 'CPO'],
'buy_now',
6,
'OFC 2026 Conference',
'2026-03-25'::TIMESTAMPTZ
),
(
'standard_ratified',
'IEEE 802.3df — 100G, 200G, 400G Ethernet over single-mode fiber',
'802.3df ratified December 2024. Defines 100GBASE-DR, 200GBASE-DR4, 400GBASE-DR4 with PAM4 modulation. Vendors shipping compliant optics in H1 2026. Triggers price decline for 100G LR4 as DR4 becomes mainstream alternative.',
0.88,
ARRAY['100G', '200G', '400G', 'DR4', 'PAM4'],
'wait',
3,
'IEEE 802.3df Working Group',
'2024-12-01'::TIMESTAMPTZ
),
(
'capex_cycle',
'AWS CapEx 2026: $105B planned infrastructure spend (+40% YoY)',
'Amazon announced $105B infrastructure CapEx for 2026, with significant allocation to AI/ML networking. Q1/Q2 typically slower, Q3/Q4 peak deployment. Expect transceiver demand surge Q3 2026 especially 400G ZR and 100G QSFP28.',
0.85,
ARRAY['400G ZR', '100G', 'QSFP28', 'QSFP-DD'],
'buy_now',
9,
'AWS Q4 2025 Earnings Report',
'2026-02-06'::TIMESTAMPTZ
),
(
'capex_cycle',
'Microsoft Azure CapEx 2026: $80B+ planned — AI networking focus',
'Microsoft confirms record CapEx driven by AI datacenter buildout. Azure networking upgrades prioritizing 400G+ spine/leaf. Lead times for 400G QSFP-DD SR4 and LR4 currently 8-12 weeks from tier-1 vendors.',
0.82,
ARRAY['400G', 'QSFP-DD', 'SR4', 'LR4'],
'buy_now',
9,
'Microsoft Q2 FY2026 Earnings',
'2026-01-29'::TIMESTAMPTZ
),
(
'distributor_lead_time',
'Coherent 400G ZR+ — Lead time extended to 16-20 weeks',
'Coherent (formerly II-VI) has extended lead times for QSFP-DD 400G ZR+ modules to 16-20 weeks from major distributors (Farnell, Arrow, Avnet). Cause: wafer fab capacity constrained by AI optics demand. Expected normalization Q4 2026.',
0.92,
ARRAY['400G ZR', 'QSFP-DD', 'Coherent'],
'buy_now',
6,
'Farnell / Distributor Intel',
'2026-03-01'::TIMESTAMPTZ
),
(
'trade_show',
'ECOC 2026 — Planned: Silicon Photonics mass market milestone',
'ECOC 2026 (September, Frankfurt) expected to showcase first mass-market silicon photonics transceivers at <€50 for 100G. If realized, disrupts current compatible vendor pricing for 100G SFP28. Monitor closely for 100G category.',
0.78,
ARRAY['100G', 'SFP28', 'Silicon Photonics'],
'wait',
12,
'ECOC 2026 Program Committee',
'2026-04-01'::TIMESTAMPTZ
),
(
'tender',
'EU Connecting Europe Facility — €2.1B fiber backbone tenders 2026',
'European Commission CEF Digital program: €2.1B in fiber backbone tenders across DE, FR, PL, SE in 2026. Each tender = 6-18 month deployment window. Triggers DWDM + ROADM + coherent transceiver demand (100G/400G ZR). TED database: TED-OJ.',
0.75,
ARRAY['DWDM', '100G', '400G ZR', 'Coherent', 'ROADM'],
'monitor',
18,
'EU TED / Connecting Europe Facility',
'2026-01-15'::TIMESTAMPTZ
)
ON CONFLICT DO NOTHING;
-- ─────────────────────────────────────────────────────────────────────────────
-- 9. Seed: Known lifecycle events
-- ─────────────────────────────────────────────────────────────────────────────
INSERT INTO product_lifecycle_events (event_type, title, description, technology, effective_date, source_name, impact_level, buy_signal) VALUES
(
'eol_announced',
'Cisco SFP-10G-LR — EOL announced, EOS 2027-06-30',
'Cisco Product Bulletin: SFP-10G-LR (CS-SFPHLX10G-LR) enters End of Sale 2026-06-30, End of Support 2027-06-30. Customers must migrate to SFP-10G-LR-S or compatible alternatives. Hortungs-Rush expected Q1-Q2 2026.',
'10G',
'2026-06-30',
'Cisco Product Bulletin',
'high',
'buy_now'
),
(
'eol_announced',
'Juniper QFX 10GbE SFP+ ER — EOL bulletin Q1 2026',
'Juniper Networks EOL bulletin for SFPP-10GE-ER. End of Engineering 2026-06-01. Last time order date 2026-09-01. Customers should evaluate EX-SFP-10GE-ER-S alternatives.',
'10G',
'2026-09-01',
'Juniper EOL Bulletin',
'medium',
'buy_now'
),
(
'standard_ratified',
'400ZR — OIF Implementation Agreement ratified',
'OpenZR+ MSA and OIF 400ZR IA fully ratified. Multi-vendor interoperability confirmed at Interop events. Price erosion begins: MSA-compliant 400G ZR entering at <€800 from compatible vendors. OEM premium shrinking.',
'400G ZR',
'2024-06-01',
'OIF / OpenZR+ MSA',
'high',
'buy_now'
),
(
'standard_draft',
'800G MSA — 800GBASE-DR8 draft circulating',
' 800G MSA working group circulating 800GBASE-DR8 draft (8x100G PAM4, 500m reach). Expected ratification Q3 2026. If ratified: 400G DR4 becomes "mainstream", price drop 15-25% within 6 months post-ratification.',
'800G',
'2026-09-01',
'800G MSA Working Group',
'medium',
'hold'
)
ON CONFLICT DO NOTHING;
-- ─────────────────────────────────────────────────────────────────────────────
-- Done
-- ─────────────────────────────────────────────────────────────────────────────
SELECT
(SELECT COUNT(*) FROM stock_snapshots) AS stock_snapshots,
(SELECT COUNT(*) FROM abc_classification) AS abc_entries,
(SELECT COUNT(*) FROM reorder_signals) AS reorder_signals,
(SELECT COUNT(*) FROM product_lifecycle_events) AS lifecycle_events,
(SELECT COUNT(*) FROM market_intelligence) AS market_intel_entries,
(SELECT COUNT(*) FROM crawler_llm_log) AS crawler_log_entries;