feat: Procurement Intelligence Engine (WS0c)
- Migration 019: stock_snapshots, abc_classification, reorder_signals, product_lifecycle_events, market_intelligence, crawler_llm_log tables - Seeded 7 market intel events (OFC 2026, AWS/Azure CapEx, Coherent lead times, EU TED tenders, ECOC 2026, IEEE 802.3df) - Seeded 4 lifecycle events (Cisco SFP-10G-LR EOL, Juniper EOL, 400ZR ratified, 800G MSA draft) - Crawler LLM: core.ts (Ollama-based extractor), stock-schema.ts (typed schemas + vendor profiles for Flexoptix/FS.com/10Gtek/ATGBICS/ProLabs/Farnell/Mouser), validator.ts (rule-based sanity checks + cross-validation) - market-intelligence.ts scraper: OFC/ECOC, LightReading, IEEE 802.3, EU TED, Farnell/Mouser lead times, FierceTelecom — weekly via pg-boss - computeAbcClassification(): dynamic A/B/C classification from price obs + compat count + vendor breadth - computeReorderSignals(): buy_now/wait/hold/monitor with reasons + signal strength - API: GET /api/procurement/overview|signals|signals/:id|abc|market-intel| stock-trends/:id|lifecycle - Dashboard: Procurement Intel tab with Reorder Signals, ABC table, Market Intel cards, Lifecycle Events
This commit is contained in:
parent
480decd307
commit
681da54523
@ -20,6 +20,7 @@ import { transportRouter } from "./routes/transport";
|
||||
import { datasheetRouter } from "./routes/datasheets";
|
||||
import { hotTopicsRouter } from "./routes/hot-topics";
|
||||
import { adoptionRouter } from "./routes/adoption";
|
||||
import { procurementRouter } from "./routes/procurement";
|
||||
|
||||
const app = express();
|
||||
|
||||
@ -56,6 +57,7 @@ app.use("/api/transport", transportRouter);
|
||||
app.use("/api/datasheets", datasheetRouter);
|
||||
app.use("/api/adoption", adoptionRouter);
|
||||
app.use("/api/hot-topics", hotTopicsRouter);
|
||||
app.use("/api/procurement", procurementRouter);
|
||||
|
||||
// Dashboard (static HTML)
|
||||
app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard")));
|
||||
|
||||
293
packages/api/src/routes/procurement.ts
Normal file
293
packages/api/src/routes/procurement.ts
Normal file
@ -0,0 +1,293 @@
|
||||
/**
|
||||
* WS0c: Procurement Intelligence API
|
||||
*
|
||||
* Endpoints:
|
||||
* GET /api/procurement/overview — Dashboard summary
|
||||
* GET /api/procurement/signals — Active reorder signals
|
||||
* GET /api/procurement/signals/:id — Signal for a specific transceiver
|
||||
* GET /api/procurement/abc — ABC classification list
|
||||
* GET /api/procurement/market-intel — Market intelligence events
|
||||
* GET /api/procurement/stock-trends/:id — Stock history for a transceiver
|
||||
* GET /api/procurement/lifecycle — Lifecycle events (EOL, standards)
|
||||
*/
|
||||
import { Router, Request, Response } from "express";
|
||||
import { pool } from "../db/client";
|
||||
|
||||
export const procurementRouter = Router();
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /api/procurement/overview
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
procurementRouter.get("/overview", async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const [signals, abc, intel, lifecycle] = await Promise.all([
|
||||
pool.query(`
|
||||
SELECT signal, COUNT(*) AS count
|
||||
FROM reorder_signals
|
||||
WHERE expires_at > NOW()
|
||||
AND computed_at = (SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = reorder_signals.transceiver_id)
|
||||
GROUP BY signal
|
||||
`),
|
||||
pool.query(`
|
||||
SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class
|
||||
`),
|
||||
pool.query(`
|
||||
SELECT intel_type, buy_signal_implication, COUNT(*) AS count
|
||||
FROM market_intelligence
|
||||
WHERE created_at > NOW() - INTERVAL '90 days'
|
||||
GROUP BY intel_type, buy_signal_implication
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
`),
|
||||
pool.query(`
|
||||
SELECT event_type, impact_level, COUNT(*) AS count
|
||||
FROM product_lifecycle_events
|
||||
WHERE created_at > NOW() - INTERVAL '180 days'
|
||||
GROUP BY event_type, impact_level
|
||||
ORDER BY count DESC
|
||||
`),
|
||||
]);
|
||||
|
||||
res.json({
|
||||
signals_summary: signals.rows,
|
||||
abc_summary: abc.rows,
|
||||
market_intel_summary: intel.rows,
|
||||
lifecycle_summary: lifecycle.rows,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("Procurement overview error:", err);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /api/procurement/signals?signal=buy_now&abc_class=A&limit=50&offset=0
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
procurementRouter.get("/signals", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
signal, abc_class, form_factor, speed_gbps,
|
||||
limit = "50", offset = "0"
|
||||
} = req.query;
|
||||
|
||||
let sql = `
|
||||
SELECT rs.*,
|
||||
t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
|
||||
t.reach_label, t.image_url, t.image_r2_key,
|
||||
ac.abc_class, ac.demand_score, ac.supply_risk,
|
||||
v.name AS vendor_name
|
||||
FROM reorder_signals rs
|
||||
JOIN transceivers t ON rs.transceiver_id = t.id
|
||||
LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
|
||||
LEFT JOIN vendors v ON t.vendor_id = v.id
|
||||
WHERE rs.expires_at > NOW()
|
||||
AND rs.computed_at = (
|
||||
SELECT MAX(r2.computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id
|
||||
)
|
||||
`;
|
||||
const params: any[] = [];
|
||||
let idx = 1;
|
||||
|
||||
if (signal) { sql += ` AND rs.signal = $${idx}`; params.push(signal); idx++; }
|
||||
if (abc_class) { sql += ` AND ac.abc_class = $${idx}`; params.push(abc_class); idx++; }
|
||||
if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; }
|
||||
if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; }
|
||||
|
||||
sql += ` ORDER BY rs.signal_strength DESC LIMIT $${idx} OFFSET $${idx + 1}`;
|
||||
params.push(parseInt(limit as string), parseInt(offset as string));
|
||||
|
||||
const result = await pool.query(sql, params);
|
||||
res.json({ data: result.rows, total: result.rowCount });
|
||||
} catch (err) {
|
||||
console.error("Signals error:", err);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /api/procurement/signals/:transceiver_id
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
procurementRouter.get("/signals/:id", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
|
||||
const [signal, stockHistory, priceHistory, lifecycle] = await Promise.all([
|
||||
pool.query(`
|
||||
SELECT rs.*, ac.abc_class, ac.demand_score, ac.supply_risk
|
||||
FROM reorder_signals rs
|
||||
LEFT JOIN abc_classification ac ON ac.transceiver_id = rs.transceiver_id
|
||||
WHERE rs.transceiver_id::text = $1
|
||||
ORDER BY rs.computed_at DESC LIMIT 1
|
||||
`, [id]),
|
||||
|
||||
pool.query(`
|
||||
SELECT ss.stock_level, ss.stock_quantity, ss.incoming_quantity,
|
||||
ss.incoming_eta, ss.lead_time_days, ss.moq, ss.price_breaks,
|
||||
ss.scraped_at, ss.crawler_confidence,
|
||||
v.name AS vendor_name
|
||||
FROM stock_snapshots ss
|
||||
JOIN vendors v ON ss.vendor_id = v.id
|
||||
WHERE ss.transceiver_id::text = $1
|
||||
ORDER BY ss.scraped_at DESC LIMIT 50
|
||||
`, [id]),
|
||||
|
||||
pool.query(`
|
||||
SELECT po.price, po.currency, po.time,
|
||||
v.name AS vendor_name
|
||||
FROM price_observations po
|
||||
JOIN vendors v ON po.source_vendor_id = v.id
|
||||
WHERE po.transceiver_id::text = $1
|
||||
ORDER BY po.time DESC LIMIT 30
|
||||
`, [id]),
|
||||
|
||||
pool.query(`
|
||||
SELECT * FROM product_lifecycle_events
|
||||
WHERE transceiver_id::text = $1
|
||||
ORDER BY effective_date ASC NULLS LAST, created_at DESC
|
||||
`, [id]),
|
||||
]);
|
||||
|
||||
res.json({
|
||||
signal: signal.rows[0] || null,
|
||||
stock_history: stockHistory.rows,
|
||||
price_history: priceHistory.rows,
|
||||
lifecycle_events: lifecycle.rows,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("Signal detail error:", err);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /api/procurement/abc?class=A&form_factor=QSFP28
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
procurementRouter.get("/abc", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { class: cls, form_factor, speed_gbps, limit = "100", offset = "0" } = req.query;
|
||||
|
||||
let sql = `
|
||||
SELECT ac.*,
|
||||
t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
|
||||
t.reach_label, t.image_url,
|
||||
v.name AS vendor_name,
|
||||
rs.signal, rs.signal_strength
|
||||
FROM abc_classification ac
|
||||
JOIN transceivers t ON ac.transceiver_id = t.id
|
||||
LEFT JOIN vendors v ON t.vendor_id = v.id
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT signal, signal_strength FROM reorder_signals
|
||||
WHERE transceiver_id = ac.transceiver_id AND expires_at > NOW()
|
||||
ORDER BY computed_at DESC LIMIT 1
|
||||
) rs ON true
|
||||
WHERE 1=1
|
||||
`;
|
||||
const params: any[] = [];
|
||||
let idx = 1;
|
||||
|
||||
if (cls) { sql += ` AND ac.abc_class = $${idx}`; params.push(cls); idx++; }
|
||||
if (form_factor) { sql += ` AND t.form_factor = $${idx}`; params.push(form_factor); idx++; }
|
||||
if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed_gbps as string)); idx++; }
|
||||
|
||||
sql += ` ORDER BY ac.abc_class, ac.demand_score DESC LIMIT $${idx} OFFSET $${idx + 1}`;
|
||||
params.push(parseInt(limit as string), parseInt(offset as string));
|
||||
|
||||
const result = await pool.query(sql, params);
|
||||
res.json({ data: result.rows, total: result.rowCount });
|
||||
} catch (err) {
|
||||
console.error("ABC error:", err);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /api/procurement/market-intel?type=&days=90&signal=buy_now
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
procurementRouter.get("/market-intel", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
type, days = "90", signal, technology,
|
||||
limit = "50", offset = "0"
|
||||
} = req.query;
|
||||
|
||||
let sql = `
|
||||
SELECT * FROM market_intelligence
|
||||
WHERE created_at > NOW() - INTERVAL '1 day' * $1
|
||||
`;
|
||||
const params: any[] = [parseInt(days as string)];
|
||||
let idx = 2;
|
||||
|
||||
if (type) { sql += ` AND intel_type = $${idx}`; params.push(type); idx++; }
|
||||
if (signal) { sql += ` AND buy_signal_implication = $${idx}`; params.push(signal); idx++; }
|
||||
if (technology) { sql += ` AND $${idx} = ANY(technologies)`; params.push(technology); idx++; }
|
||||
|
||||
sql += ` ORDER BY relevance_score DESC, created_at DESC LIMIT $${idx} OFFSET $${idx + 1}`;
|
||||
params.push(parseInt(limit as string), parseInt(offset as string));
|
||||
|
||||
const result = await pool.query(sql, params);
|
||||
res.json({ data: result.rows, total: result.rowCount });
|
||||
} catch (err) {
|
||||
console.error("Market intel error:", err);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /api/procurement/stock-trends/:transceiver_id
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
procurementRouter.get("/stock-trends/:id", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT DISTINCT ON (ss.vendor_id, date_trunc('day', ss.scraped_at))
|
||||
ss.stock_level, ss.stock_quantity, ss.incoming_quantity,
|
||||
ss.incoming_eta, ss.lead_time_days, ss.scraped_at,
|
||||
v.name AS vendor_name
|
||||
FROM stock_snapshots ss
|
||||
JOIN vendors v ON ss.vendor_id = v.id
|
||||
WHERE ss.transceiver_id::text = $1
|
||||
ORDER BY ss.vendor_id, date_trunc('day', ss.scraped_at) DESC, ss.scraped_at DESC
|
||||
LIMIT 200
|
||||
`, [req.params.id]);
|
||||
|
||||
res.json({ data: result.rows });
|
||||
} catch (err) {
|
||||
console.error("Stock trends error:", err);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /api/procurement/lifecycle?type=eol_announced&impact=high&days=180
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
procurementRouter.get("/lifecycle", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
type, impact, technology, signal,
|
||||
days = "180", limit = "50"
|
||||
} = req.query;
|
||||
|
||||
let sql = `
|
||||
SELECT ple.*,
|
||||
t.part_number, t.standard_name, t.form_factor, t.speed_gbps
|
||||
FROM product_lifecycle_events ple
|
||||
LEFT JOIN transceivers t ON ple.transceiver_id = t.id
|
||||
WHERE ple.created_at > NOW() - INTERVAL '1 day' * $1
|
||||
`;
|
||||
const params: any[] = [parseInt(days as string)];
|
||||
let idx = 2;
|
||||
|
||||
if (type) { sql += ` AND ple.event_type = $${idx}`; params.push(type); idx++; }
|
||||
if (impact) { sql += ` AND ple.impact_level = $${idx}`; params.push(impact); idx++; }
|
||||
if (technology) { sql += ` AND ple.technology ILIKE $${idx}`; params.push(`%${technology}%`); idx++; }
|
||||
if (signal) { sql += ` AND ple.buy_signal = $${idx}`; params.push(signal); idx++; }
|
||||
|
||||
sql += ` ORDER BY ple.impact_level DESC, ple.effective_date ASC NULLS LAST, ple.created_at DESC LIMIT $${idx}`;
|
||||
params.push(parseInt(limit as string));
|
||||
|
||||
const result = await pool.query(sql, params);
|
||||
res.json({ data: result.rows });
|
||||
} catch (err) {
|
||||
console.error("Lifecycle error:", err);
|
||||
res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
});
|
||||
@ -639,6 +639,52 @@
|
||||
.compare-diff { background: var(--yellow-light); }
|
||||
.compare-best { background: var(--green-light); font-weight: 600; }
|
||||
.compare-cb { width: 16px; height: 16px; cursor: pointer; accent-color: var(--purple); }
|
||||
|
||||
/* === PROCUREMENT TAB === */
|
||||
.proc-btn {
|
||||
background: var(--surface2); border: 1px solid var(--border);
|
||||
padding: 5px 14px; border-radius: 6px; cursor: pointer;
|
||||
font-size: 0.78rem; font-weight: 600; color: var(--text-dim);
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.proc-btn:hover { color: var(--text); border-color: var(--accent); }
|
||||
.proc-btn-active { background: var(--accent); color: #fff !important; border-color: var(--accent) !important; }
|
||||
|
||||
.signal-card {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: var(--radius-lg); padding: 1rem;
|
||||
box-shadow: var(--shadow-card); position: relative;
|
||||
}
|
||||
.signal-card:hover { box-shadow: var(--shadow-hover); }
|
||||
.signal-buy { border-left: 3px solid #c1121f; }
|
||||
.signal-wait { border-left: 3px solid var(--yellow); }
|
||||
.signal-hold { border-left: 3px solid var(--green); }
|
||||
.signal-monitor { border-left: 3px solid var(--purple); }
|
||||
|
||||
.sig-badge-buy { background:#fde8e8; color:#c1121f; }
|
||||
.sig-badge-wait { background:var(--yellow-light); color:#a06000; }
|
||||
.sig-badge-hold { background:var(--green-light); color:#1b4332; }
|
||||
.sig-badge-monitor { background:var(--purple-light); color:#5a3fcf; }
|
||||
|
||||
.intel-card {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: var(--radius-lg); padding: 1rem;
|
||||
box-shadow: var(--shadow-card);
|
||||
}
|
||||
.intel-badge {
|
||||
display: inline-block; padding: 2px 8px; border-radius: 4px;
|
||||
font-size: 0.65rem; font-weight: 700; text-transform: uppercase;
|
||||
letter-spacing: 0.06em; margin-bottom: 0.5rem;
|
||||
}
|
||||
.intel-buy { background:#fde8e8; color:#c1121f; }
|
||||
.intel-wait { background:var(--yellow-light); color:#a06000; }
|
||||
.intel-hold { background:var(--green-light); color:#1b4332; }
|
||||
.intel-monitor { background:var(--purple-light); color:#5a3fcf; }
|
||||
.intel-none { background:var(--surface2); color:var(--text-dim); }
|
||||
|
||||
.abc-a { background:#fde8e8; color:#c1121f; font-weight:800; padding:2px 7px; border-radius:4px; }
|
||||
.abc-b { background:var(--yellow-light); color:#a06000; font-weight:800; padding:2px 7px; border-radius:4px; }
|
||||
.abc-c { background:var(--surface2); color:var(--text-dim); font-weight:800; padding:2px 7px; border-radius:4px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@ -680,6 +726,7 @@
|
||||
<div class="tab" data-tab="news">News</div>
|
||||
<div class="tab" data-tab="finder">Finder</div>
|
||||
<div class="tab" data-tab="blog">Blog Engine</div>
|
||||
<div class="tab" data-tab="procurement">Procurement Intel</div>
|
||||
</div>
|
||||
|
||||
<div class="main">
|
||||
@ -905,6 +952,75 @@
|
||||
<div id="blog-pipeline-status"></div>
|
||||
<div style="margin-bottom:0.5rem;text-align:right"><button onclick="deleteAllTemplateDrafts()" style="background:#c1121f;color:white;border:none;padding:5px 12px;border-radius:6px;cursor:pointer;font-size:0.7rem">Delete All Templates</button></div><div class="card"><div id="blog-list"></div></div>
|
||||
</div>
|
||||
|
||||
<!-- PROCUREMENT INTEL TAB -->
|
||||
<div id="tab-procurement" class="hidden">
|
||||
|
||||
<!-- Sub-nav -->
|
||||
<div style="display:flex;gap:0.5rem;margin-bottom:1.25rem;flex-wrap:wrap;align-items:center">
|
||||
<button onclick="showProcSection('signals')" id="proc-btn-signals" class="proc-btn proc-btn-active">Reorder Signals</button>
|
||||
<button onclick="showProcSection('abc')" id="proc-btn-abc" class="proc-btn">ABC Classes</button>
|
||||
<button onclick="showProcSection('market')" id="proc-btn-market" class="proc-btn">Market Intel</button>
|
||||
<button onclick="showProcSection('lifecycle')" id="proc-btn-lifecycle" class="proc-btn">Lifecycle Events</button>
|
||||
<div style="flex:1"></div>
|
||||
<button onclick="loadProcurement()" style="background:var(--surface2);border:1px solid var(--border);padding:4px 12px;border-radius:6px;cursor:pointer;font-size:0.75rem;color:var(--text)">↻ Refresh</button>
|
||||
</div>
|
||||
|
||||
<!-- Reorder Signals section -->
|
||||
<div id="proc-section-signals">
|
||||
<div style="display:flex;gap:0.5rem;margin-bottom:1rem;flex-wrap:wrap">
|
||||
<button onclick="filterSignal('')" id="sig-all" style="background:var(--accent);color:white;border:none;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">All</button>
|
||||
<button onclick="filterSignal('buy_now')" style="background:rgba(193,18,31,0.1);border:1px solid rgba(193,18,31,0.3);color:#c1121f;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🔴 Buy Now</button>
|
||||
<button onclick="filterSignal('wait')" style="background:rgba(255,160,0,0.1);border:1px solid rgba(255,160,0,0.3);color:#c07000;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🟡 Wait</button>
|
||||
<button onclick="filterSignal('hold')" style="background:rgba(45,106,79,0.1);border:1px solid rgba(45,106,79,0.3);color:#2d6a4f;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🟢 Hold</button>
|
||||
<button onclick="filterSignal('monitor')" style="background:rgba(124,92,252,0.1);border:1px solid rgba(124,92,252,0.3);color:#7c5cfc;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">🔵 Monitor</button>
|
||||
</div>
|
||||
<div id="proc-signals-grid" style="display:grid;gap:0.75rem;grid-template-columns:repeat(auto-fill,minmax(320px,1fr))">
|
||||
<div class="loading pulse">Loading reorder signals...</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ABC Classification section -->
|
||||
<div id="proc-section-abc" style="display:none">
|
||||
<div style="display:flex;gap:0.5rem;margin-bottom:1rem">
|
||||
<button onclick="filterAbc('')" style="background:var(--accent);color:white;border:none;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">All</button>
|
||||
<button onclick="filterAbc('A')" style="background:rgba(193,18,31,0.1);border:1px solid rgba(193,18,31,0.3);color:#c1121f;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">A — High Turnover</button>
|
||||
<button onclick="filterAbc('B')" style="background:rgba(255,160,0,0.1);border:1px solid rgba(255,160,0,0.3);color:#c07000;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">B — Medium</button>
|
||||
<button onclick="filterAbc('C')" style="background:rgba(136,136,136,0.12);border:1px solid #ddd;color:var(--text-dim);padding:3px 10px;border-radius:4px;cursor:pointer;font-size:0.75rem">C — Low</button>
|
||||
</div>
|
||||
<div class="card" style="overflow-x:auto">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:0.8rem" id="abc-table">
|
||||
<thead><tr style="border-bottom:2px solid var(--border);color:var(--text-dim);font-size:0.7rem;font-weight:700;text-transform:uppercase">
|
||||
<th style="text-align:left;padding:8px 6px">Class</th>
|
||||
<th style="text-align:left;padding:8px 6px">Product</th>
|
||||
<th style="text-align:left;padding:8px 6px">Form Factor</th>
|
||||
<th style="text-align:right;padding:8px 6px">Demand Score</th>
|
||||
<th style="text-align:right;padding:8px 6px">Compat.</th>
|
||||
<th style="text-align:right;padding:8px 6px">Vendors</th>
|
||||
<th style="text-align:left;padding:8px 6px">Supply Risk</th>
|
||||
<th style="text-align:left;padding:8px 6px">Signal</th>
|
||||
</tr></thead>
|
||||
<tbody id="abc-tbody"><tr><td colspan="8" style="padding:1rem;color:var(--text-dim)">Loading...</td></tr></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Market Intelligence section -->
|
||||
<div id="proc-section-market" style="display:none">
|
||||
<div id="proc-market-grid" style="display:grid;gap:0.75rem;grid-template-columns:repeat(auto-fill,minmax(400px,1fr))">
|
||||
<div class="loading pulse">Loading market intelligence...</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Lifecycle Events section -->
|
||||
<div id="proc-section-lifecycle" style="display:none">
|
||||
<div id="proc-lifecycle-grid" style="display:grid;gap:0.75rem;grid-template-columns:repeat(auto-fill,minmax(400px,1fr))">
|
||||
<div class="loading pulse">Loading lifecycle events...</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div><!-- end tab-procurement -->
|
||||
|
||||
</div>
|
||||
|
||||
</div><!-- .app -->
|
||||
@ -1221,6 +1337,7 @@ function goToTab(tabName) {
|
||||
if (tabName === 'news') loadNews();
|
||||
if (tabName === 'blog') loadBlogDrafts();
|
||||
if (tabName === 'finder') document.getElementById('finder-switch-input').focus();
|
||||
if (tabName === 'procurement') loadProcurement();
|
||||
}
|
||||
|
||||
document.querySelectorAll('.tab').forEach(function(tab) {
|
||||
@ -2897,6 +3014,210 @@ el('compare-overlay').addEventListener('click', function(e) {
|
||||
if (e.target === this) this.classList.remove('visible');
|
||||
});
|
||||
|
||||
// ─── PROCUREMENT INTEL ───────────────────────────────────────────────────────
|
||||
|
||||
var procCurrentSignalFilter = '';
|
||||
var procCurrentAbcFilter = '';
|
||||
var procSignalsData = [];
|
||||
var procAbcData = [];
|
||||
|
||||
function showProcSection(name) {
|
||||
['signals','abc','market','lifecycle'].forEach(function(s) {
|
||||
var sec = el('proc-section-' + s);
|
||||
var btn = el('proc-btn-' + s);
|
||||
if (sec) sec.style.display = s === name ? '' : 'none';
|
||||
if (btn) { btn.classList.toggle('proc-btn-active', s === name); }
|
||||
});
|
||||
}
|
||||
|
||||
async function loadProcurement() {
|
||||
await Promise.all([
|
||||
loadProcSignals(),
|
||||
loadProcAbc(),
|
||||
loadProcMarketIntel(),
|
||||
loadProcLifecycle(),
|
||||
]);
|
||||
}
|
||||
|
||||
async function loadProcSignals() {
|
||||
var container = el('proc-signals-grid');
|
||||
container.innerHTML = '<div class="loading pulse">Loading signals...</div>';
|
||||
try {
|
||||
var d = await api('/api/procurement/signals?limit=100');
|
||||
procSignalsData = d.data || [];
|
||||
renderSignals(procCurrentSignalFilter);
|
||||
} catch(e) {
|
||||
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem">No reorder signals yet — run the scraper to populate.</div>';
|
||||
}
|
||||
}
|
||||
|
||||
function filterSignal(sig) {
|
||||
procCurrentSignalFilter = sig;
|
||||
renderSignals(sig);
|
||||
}
|
||||
|
||||
function renderSignals(filterSig) {
|
||||
var data = filterSig ? procSignalsData.filter(function(r) { return r.signal === filterSig; }) : procSignalsData;
|
||||
var container = el('proc-signals-grid');
|
||||
if (!data.length) {
|
||||
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">No signals for this filter.</div>';
|
||||
return;
|
||||
}
|
||||
var signalIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' };
|
||||
var signalLabel = { buy_now:'Buy Now', wait:'Wait', hold:'Hold', monitor:'Monitor' };
|
||||
container.innerHTML = data.map(function(r) {
|
||||
var reasons = [];
|
||||
try { reasons = JSON.parse(r.reasons || '[]'); } catch(e) {}
|
||||
var sigClass = 'signal-' + (r.signal || 'monitor').replace('_','-');
|
||||
var badgeClass = 'sig-badge-' + (r.signal || 'monitor').replace('_now','').replace('_','');
|
||||
var abcBadge = r.abc_class ? '<span class="abc-' + r.abc_class.toLowerCase() + '">' + r.abc_class + '</span>' : '';
|
||||
var strengthPct = Math.round((r.signal_strength || 0) * 100);
|
||||
var productName = r.standard_name || r.part_number || r.slug || '—';
|
||||
var imgHtml = '';
|
||||
if (r.image_r2_key) {
|
||||
imgHtml = '<img src="https://pub-placeholder.r2.dev/' + esc(r.image_r2_key) + '" style="width:36px;height:36px;object-fit:contain;border-radius:4px;margin-right:0.5rem;flex-shrink:0" onerror="this.style.display=\'none\'">';
|
||||
}
|
||||
return '<div class="signal-card ' + sigClass + '">'
|
||||
+ '<div style="display:flex;align-items:flex-start;gap:0.25rem;margin-bottom:0.5rem">'
|
||||
+ imgHtml
|
||||
+ '<div style="flex:1;min-width:0">'
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">' + esc(productName) + '</div>'
|
||||
+ '<div style="font-size:0.7rem;color:var(--text-dim)">' + esc(r.form_factor || '') + (r.speed_gbps ? ' · ' + r.speed_gbps + 'G' : '') + (r.vendor_name ? ' · ' + esc(r.vendor_name) : '') + '</div>'
|
||||
+ '</div>'
|
||||
+ '</div>'
|
||||
+ '<div style="display:flex;gap:0.4rem;align-items:center;margin-bottom:0.6rem;flex-wrap:wrap">'
|
||||
+ '<span class="intel-badge ' + badgeClass + '">' + (signalIcon[r.signal] || '') + ' ' + (signalLabel[r.signal] || r.signal) + '</span>'
|
||||
+ abcBadge
|
||||
+ (r.supply_risk ? '<span style="font-size:0.65rem;padding:2px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(r.supply_risk) + ' risk</span>' : '')
|
||||
+ '</div>'
|
||||
+ '<div style="font-size:0.7rem;color:var(--text-dim);margin-bottom:0.5rem">'
|
||||
+ (reasons.length ? reasons.map(function(r2) { return '→ ' + esc(r2); }).join('<br>') : 'Insufficient data')
|
||||
+ '</div>'
|
||||
+ '<div style="display:flex;gap:1rem;font-size:0.7rem;color:var(--text-dim)">'
|
||||
+ (r.stock_trend ? '<span>Stock: <b style="color:var(--text)">' + r.stock_trend + '</b></span>' : '')
|
||||
+ (r.price_trend ? '<span>Price: <b style="color:var(--text)">' + r.price_trend + '</b></span>' : '')
|
||||
+ (r.lead_time_weeks ? '<span>Lead: <b style="color:var(--text)">' + r.lead_time_weeks + 'w</b></span>' : '')
|
||||
+ '</div>'
|
||||
+ '<div style="margin-top:0.6rem;background:var(--surface2);border-radius:3px;height:4px">'
|
||||
+ '<div style="height:4px;border-radius:3px;width:' + strengthPct + '%;background:var(--accent)"></div>'
|
||||
+ '</div>'
|
||||
+ '<div style="font-size:0.65rem;color:var(--text-dim);text-align:right;margin-top:2px">Signal strength: ' + strengthPct + '%</div>'
|
||||
+ '</div>';
|
||||
}).join('');
|
||||
}
|
||||
|
||||
async function loadProcAbc() {
|
||||
try {
|
||||
var d = await api('/api/procurement/abc?limit=200');
|
||||
procAbcData = d.data || [];
|
||||
renderAbcTable(procCurrentAbcFilter);
|
||||
} catch(e) {
|
||||
el('abc-tbody').innerHTML = '<tr><td colspan="8" style="padding:1rem;color:var(--text-dim)">No ABC data yet — run compute:abc job.</td></tr>';
|
||||
}
|
||||
}
|
||||
|
||||
function filterAbc(cls) {
|
||||
procCurrentAbcFilter = cls;
|
||||
renderAbcTable(cls);
|
||||
}
|
||||
|
||||
function renderAbcTable(filterCls) {
|
||||
var data = filterCls ? procAbcData.filter(function(r) { return r.abc_class === filterCls; }) : procAbcData;
|
||||
var sigIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' };
|
||||
el('abc-tbody').innerHTML = data.map(function(r) {
|
||||
var abcEl = '<span class="abc-' + (r.abc_class || 'c').toLowerCase() + '">' + (r.abc_class || '—') + '</span>';
|
||||
return '<tr style="border-bottom:1px solid var(--border)">'
|
||||
+ '<td style="padding:7px 6px">' + abcEl + '</td>'
|
||||
+ '<td style="padding:7px 6px"><div style="font-weight:600">' + esc(r.standard_name || r.part_number || '—') + '</div><div style="font-size:0.68rem;color:var(--text-dim)">' + esc(r.vendor_name || '') + '</div></td>'
|
||||
+ '<td style="padding:7px 6px;font-family:var(--mono);font-size:0.75rem">' + esc(r.form_factor || '—') + '</td>'
|
||||
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.demand_score ? parseFloat(r.demand_score).toFixed(0) : '—') + '</td>'
|
||||
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.compat_count || 0) + '</td>'
|
||||
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.vendor_count || 0) + '</td>'
|
||||
+ '<td style="padding:7px 6px;font-size:0.75rem;color:' + (r.supply_risk === 'high' ? 'var(--red)' : r.supply_risk === 'medium' ? 'var(--yellow)' : 'var(--green)') + '">' + esc(r.supply_risk || '—') + '</td>'
|
||||
+ '<td style="padding:7px 6px">' + (r.signal ? (sigIcon[r.signal] || '') + ' ' + r.signal.replace('_',' ') : '—') + '</td>'
|
||||
+ '</tr>';
|
||||
}).join('') || '<tr><td colspan="8" style="padding:1rem;color:var(--text-dim)">No data for this filter.</td></tr>';
|
||||
}
|
||||
|
||||
async function loadProcMarketIntel() {
|
||||
var container = el('proc-market-grid');
|
||||
try {
|
||||
var d = await api('/api/procurement/market-intel?days=180&limit=50');
|
||||
var items = d.data || [];
|
||||
if (!items.length) {
|
||||
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">No market intelligence yet.</div>';
|
||||
return;
|
||||
}
|
||||
var typeIcon = {
|
||||
capex_cycle:'💰', trade_show:'🎪', standard_ratified:'📋',
|
||||
standard_draft:'📝', distributor_lead_time:'🚚', supply_chain:'🏭', tender:'📑'
|
||||
};
|
||||
container.innerHTML = items.map(function(item) {
|
||||
var sig = item.buy_signal_implication || 'none';
|
||||
var badgeClass = 'intel-' + sig.replace('_now','').replace('_','');
|
||||
var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor', none:'—' };
|
||||
var techs = (item.technologies || []).map(function(t) {
|
||||
return '<span style="font-size:0.65rem;padding:1px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(t) + '</span>';
|
||||
}).join(' ');
|
||||
return '<div class="intel-card">'
|
||||
+ '<div style="display:flex;gap:0.5rem;align-items:flex-start;margin-bottom:0.4rem">'
|
||||
+ '<span style="font-size:1.2rem">' + (typeIcon[item.intel_type] || '📊') + '</span>'
|
||||
+ '<div style="flex:1">'
|
||||
+ '<span class="intel-badge ' + badgeClass + '">' + (sigLabel[sig] || sig) + '</span>'
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + '</div>'
|
||||
+ '</div></div>'
|
||||
+ '<div style="font-size:0.75rem;color:var(--text-dim);margin-bottom:0.6rem;line-height:1.5">' + esc(item.summary || '') + '</div>'
|
||||
+ (techs ? '<div style="display:flex;gap:0.3rem;flex-wrap:wrap;margin-bottom:0.5rem">' + techs + '</div>' : '')
|
||||
+ '<div style="display:flex;justify-content:space-between;font-size:0.68rem;color:var(--text-dim)">'
|
||||
+ '<span>' + esc(item.source_name) + '</span>'
|
||||
+ (item.impact_horizon_months ? '<span>Impact: ~' + item.impact_horizon_months + ' months</span>' : '')
|
||||
+ '</div>'
|
||||
+ '</div>';
|
||||
}).join('');
|
||||
} catch(e) {
|
||||
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">Could not load market intelligence.</div>';
|
||||
}
|
||||
}
|
||||
|
||||
async function loadProcLifecycle() {
|
||||
var container = el('proc-lifecycle-grid');
|
||||
try {
|
||||
var d = await api('/api/procurement/lifecycle?days=365&limit=50');
|
||||
var items = d.data || [];
|
||||
if (!items.length) {
|
||||
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">No lifecycle events yet.</div>';
|
||||
return;
|
||||
}
|
||||
var typeIcon = {
|
||||
eol_announced:'⛔', eol_effective:'🚫', standard_ratified:'✅',
|
||||
standard_draft:'📝', capex_peak:'💰', trade_show:'🎪',
|
||||
supply_risk:'⚠️', tender:'📑', price_floor:'📉'
|
||||
};
|
||||
var impactColor = { critical:'#c1121f', high:'#c1121f', medium:'var(--yellow)', low:'var(--green)' };
|
||||
var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor' };
|
||||
container.innerHTML = items.map(function(item) {
|
||||
var ic = impactColor[item.impact_level] || 'var(--text-dim)';
|
||||
var productInfo = item.part_number ? esc(item.part_number) + (item.form_factor ? ' · ' + esc(item.form_factor) : '') : '';
|
||||
var dateStr = item.effective_date ? new Date(item.effective_date).toLocaleDateString('de-DE') : '';
|
||||
return '<div class="intel-card" style="border-left:3px solid ' + ic + '">'
|
||||
+ '<div style="display:flex;gap:0.5rem;align-items:flex-start;margin-bottom:0.4rem">'
|
||||
+ '<span style="font-size:1.2rem">' + (typeIcon[item.event_type] || '📌') + '</span>'
|
||||
+ '<div style="flex:1">'
|
||||
+ (item.buy_signal ? '<span class="intel-badge intel-' + item.buy_signal.replace('_now','').replace('_','') + '">' + (sigLabel[item.buy_signal] || item.buy_signal) + '</span>' : '')
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + '</div>'
|
||||
+ '</div></div>'
|
||||
+ (item.description ? '<div style="font-size:0.75rem;color:var(--text-dim);margin-bottom:0.5rem;line-height:1.5">' + esc(item.description.substring(0, 200)) + (item.description.length > 200 ? '…' : '') + '</div>' : '')
|
||||
+ '<div style="display:flex;justify-content:space-between;font-size:0.68rem;color:var(--text-dim)">'
|
||||
+ '<span>' + esc(item.source_name || '') + (productInfo ? ' · ' + productInfo : '') + '</span>'
|
||||
+ (dateStr ? '<span style="color:' + ic + ';font-weight:600">' + dateStr + '</span>' : '')
|
||||
+ '</div>'
|
||||
+ '</div>';
|
||||
}).join('');
|
||||
} catch(e) {
|
||||
container.innerHTML = '<div style="color:var(--text-dim);padding:1rem;grid-column:1/-1">Could not load lifecycle events.</div>';
|
||||
}
|
||||
}
|
||||
|
||||
// INIT
|
||||
loadOverview();
|
||||
</script>
|
||||
|
||||
349
packages/scraper/src/crawler-llm/core.ts
Normal file
349
packages/scraper/src/crawler-llm/core.ts
Normal file
@ -0,0 +1,349 @@
|
||||
/**
|
||||
* Crawler LLM — Core extraction engine.
|
||||
*
|
||||
* Uses Ollama (local LLM) to extract structured product data from HTML.
|
||||
* Two-stage pipeline:
|
||||
* 1. Page type detection (product vs. category) — cheap, fast
|
||||
* 2. Structured data extraction with schema enforcement
|
||||
*
|
||||
* Vendor-specific profiles guide the LLM without hard-coding selectors.
|
||||
*/
|
||||
|
||||
import { pool } from "../utils/db";
|
||||
import type { StockExtractionResult, MarketIntelExtractionResult } from "./stock-schema";
|
||||
import { VENDOR_PROFILES } from "./stock-schema";
|
||||
import { validateStockExtraction } from "./validator";
|
||||
|
||||
const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://192.168.178.169:11434";
|
||||
const OLLAMA_MODEL = process.env.CRAWLER_LLM_MODEL || "qwen2.5:14b";
|
||||
const MAX_HTML_CHARS = 12_000; // truncate to keep prompt manageable
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Ollama API call
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function ollamaGenerate(prompt: string): Promise<string> {
|
||||
const res = await fetch(`${OLLAMA_HOST}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: OLLAMA_MODEL,
|
||||
prompt,
|
||||
stream: false,
|
||||
format: "json",
|
||||
options: { temperature: 0.1, num_predict: 1024 },
|
||||
}),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Ollama error: ${res.status} ${await res.text()}`);
|
||||
const data = await res.json() as { response: string };
|
||||
return data.response;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Stage 1: Page type detection (fast, binary)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function detectPageType(html: string, url: string, vendorSlug?: string): Promise<{
|
||||
is_product_page: boolean;
|
||||
confidence: number;
|
||||
evidence: string;
|
||||
}> {
|
||||
const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null;
|
||||
const hints = profile
|
||||
? `\nVendor hints — Product page signals: ${profile.product_page_signals.join(", ")}. Category page signals: ${profile.category_page_signals.join(", ")}.`
|
||||
: "";
|
||||
|
||||
const prompt = `You are a web scraper assistant. Determine if this HTML is a single product page or a category/listing page.
|
||||
|
||||
URL: ${url}${hints}
|
||||
|
||||
HTML (truncated):
|
||||
${html.substring(0, 3000)}
|
||||
|
||||
Respond with JSON only:
|
||||
{
|
||||
"is_product_page": true or false,
|
||||
"confidence": 0.0 to 1.0,
|
||||
"evidence": "brief quote from the HTML that supports your decision"
|
||||
}`;
|
||||
|
||||
const raw = await ollamaGenerate(prompt);
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
return {
|
||||
is_product_page: Boolean(parsed.is_product_page),
|
||||
confidence: Number(parsed.confidence) || 0,
|
||||
evidence: String(parsed.evidence || ""),
|
||||
};
|
||||
} catch {
|
||||
return { is_product_page: false, confidence: 0, evidence: "JSON parse failed" };
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Stage 2: Full product extraction
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function extractProductData(
|
||||
html: string,
|
||||
url: string,
|
||||
vendorSlug?: string
|
||||
): Promise<StockExtractionResult> {
|
||||
const profile = vendorSlug ? VENDOR_PROFILES[vendorSlug] : null;
|
||||
const hints = profile ? `
|
||||
Vendor: ${profile.name} (${profile.currency})
|
||||
Price hint: ${profile.price_hint || "find the main selling price"}
|
||||
Stock hint: ${profile.stock_hint || "find availability status"}` : "";
|
||||
|
||||
const prompt = `You are a product data extractor for optical transceiver products. Extract structured data from this product page HTML.
|
||||
|
||||
URL: ${url}${hints}
|
||||
|
||||
HTML (truncated to ${MAX_HTML_CHARS} chars):
|
||||
${html.substring(0, MAX_HTML_CHARS)}
|
||||
|
||||
Extract and respond with JSON only — use null for any field you cannot find with confidence:
|
||||
{
|
||||
"is_product_page": true,
|
||||
"confidence": 0.0 to 1.0,
|
||||
"source_evidence": "brief quote from HTML supporting your extraction",
|
||||
|
||||
"price": number or null,
|
||||
"currency": "USD" or "EUR" or "GBP" or "CNY" or null,
|
||||
"price_breaks": [{"qty": number, "price": number}] or [],
|
||||
|
||||
"stock_level": "in_stock" or "out_of_stock" or "limited" or "unknown",
|
||||
"stock_quantity": number or null,
|
||||
"incoming_quantity": number or null,
|
||||
"incoming_eta": "YYYY-MM-DD" or null,
|
||||
"lead_time_days": number or null,
|
||||
"moq": number or null,
|
||||
|
||||
"part_number": "exact part number string" or null,
|
||||
"standard_name": "manufacturer's exact product name as written on the page" or null,
|
||||
"form_factor": "SFP+" or "QSFP28" or "QSFP-DD" etc or null,
|
||||
"speed_gbps": number or null
|
||||
}
|
||||
|
||||
Rules:
|
||||
- standard_name MUST be the manufacturer's exact product designation, not a generic description
|
||||
- If you see "All Optical Transceivers" or similar category text as the name, set standard_name to null
|
||||
- price_breaks only if there is a visible quantity/price table
|
||||
- incoming_quantity: look for text like "X units incoming", "X im Zulauf", "Expected: X"
|
||||
- Set confidence < 0.5 if you are guessing`;
|
||||
|
||||
const raw = await ollamaGenerate(prompt);
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
return {
|
||||
is_product_page: Boolean(parsed.is_product_page ?? true),
|
||||
confidence: Number(parsed.confidence) || 0,
|
||||
source_evidence: String(parsed.source_evidence || ""),
|
||||
price: parsed.price != null ? Number(parsed.price) : null,
|
||||
currency: parsed.currency || null,
|
||||
price_breaks: Array.isArray(parsed.price_breaks) ? parsed.price_breaks : [],
|
||||
stock_level: (["in_stock", "out_of_stock", "limited"].includes(parsed.stock_level))
|
||||
? parsed.stock_level
|
||||
: "unknown",
|
||||
stock_quantity: parsed.stock_quantity != null ? Number(parsed.stock_quantity) : null,
|
||||
incoming_quantity: parsed.incoming_quantity != null ? Number(parsed.incoming_quantity) : null,
|
||||
incoming_eta: parsed.incoming_eta || null,
|
||||
lead_time_days: parsed.lead_time_days != null ? Number(parsed.lead_time_days) : null,
|
||||
moq: parsed.moq != null ? Number(parsed.moq) : null,
|
||||
part_number: parsed.part_number || null,
|
||||
standard_name: parsed.standard_name || null,
|
||||
form_factor: parsed.form_factor || null,
|
||||
speed_gbps: parsed.speed_gbps != null ? Number(parsed.speed_gbps) : null,
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
is_product_page: false,
|
||||
confidence: 0,
|
||||
source_evidence: "JSON parse failed",
|
||||
price: null, currency: null, price_breaks: [],
|
||||
stock_level: "unknown",
|
||||
stock_quantity: null, incoming_quantity: null, incoming_eta: null,
|
||||
lead_time_days: null, moq: null,
|
||||
part_number: null, standard_name: null, form_factor: null, speed_gbps: null,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Market intelligence extraction
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export async function extractMarketIntel(
|
||||
text: string,
|
||||
url: string,
|
||||
sourceName: string
|
||||
): Promise<MarketIntelExtractionResult> {
|
||||
const prompt = `You are an optical transceiver market analyst. Analyze this text for market intelligence relevant to transceiver procurement.
|
||||
|
||||
Source: ${sourceName}
|
||||
URL: ${url}
|
||||
|
||||
Text:
|
||||
${text.substring(0, 8000)}
|
||||
|
||||
Respond with JSON only:
|
||||
{
|
||||
"is_relevant": true or false (false if nothing relevant to transceiver markets),
|
||||
"confidence": 0.0 to 1.0,
|
||||
"source_evidence": "brief quote supporting your analysis",
|
||||
|
||||
"intel_type": one of: "capex_cycle", "trade_show", "standard_ratified", "standard_draft", "distributor_lead_time", "supply_chain", "tender",
|
||||
"title": "concise title (max 100 chars)",
|
||||
"summary": "2-3 sentence summary of the key insight",
|
||||
"technologies": ["400G", "QSFP-DD", etc — transceiver technologies mentioned],
|
||||
"buy_signal_implication": one of: "buy_now", "wait", "hold", "monitor", "none",
|
||||
"impact_horizon_months": estimated months until this affects the market (number),
|
||||
"published_at": "YYYY-MM-DD" or null
|
||||
}
|
||||
|
||||
Guidelines:
|
||||
- buy_now: shortage, EOL, CapEx surge → order before prices rise
|
||||
- wait: new standard coming → current products will drop in price
|
||||
- hold: stable market, no urgency
|
||||
- monitor: interesting but unclear impact
|
||||
- impact_horizon_months: 0-3 for immediate, 3-12 for medium, 12+ for long-term`;
|
||||
|
||||
const raw = await ollamaGenerate(prompt);
|
||||
try {
|
||||
const p = JSON.parse(raw);
|
||||
return {
|
||||
is_relevant: Boolean(p.is_relevant),
|
||||
confidence: Number(p.confidence) || 0,
|
||||
source_evidence: String(p.source_evidence || ""),
|
||||
intel_type: p.intel_type || "supply_chain",
|
||||
title: String(p.title || "").substring(0, 200),
|
||||
summary: String(p.summary || ""),
|
||||
technologies: Array.isArray(p.technologies) ? p.technologies : [],
|
||||
buy_signal_implication: p.buy_signal_implication || "none",
|
||||
impact_horizon_months: Number(p.impact_horizon_months) || 6,
|
||||
published_at: p.published_at || null,
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
is_relevant: false, confidence: 0, source_evidence: "parse error",
|
||||
intel_type: "supply_chain", title: "", summary: "", technologies: [],
|
||||
buy_signal_implication: "none", impact_horizon_months: 0, published_at: null,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Public API — Main scrape function
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface CrawlerLLMResult {
|
||||
extraction: StockExtractionResult;
|
||||
validation_passed: boolean;
|
||||
validation_errors: string[];
|
||||
validation_warnings: string[];
|
||||
}
|
||||
|
||||
export async function scrapeWithLLM(
|
||||
html: string,
|
||||
url: string,
|
||||
options: {
|
||||
vendorSlug?: string;
|
||||
vendorId?: string;
|
||||
transceiverIds?: string[]; // candidate matches (pre-filtered by form_factor/speed)
|
||||
speedGbps?: number;
|
||||
skipPageDetection?: boolean; // set true if URL is known product page
|
||||
} = {}
|
||||
): Promise<CrawlerLLMResult> {
|
||||
const { vendorSlug, speedGbps, skipPageDetection } = options;
|
||||
|
||||
// Stage 1: Page type detection (skip if caller already knows it's a product page)
|
||||
if (!skipPageDetection) {
|
||||
const pageType = await detectPageType(html, url, vendorSlug);
|
||||
if (!pageType.is_product_page) {
|
||||
return {
|
||||
extraction: {
|
||||
is_product_page: false,
|
||||
confidence: pageType.confidence,
|
||||
source_evidence: pageType.evidence,
|
||||
price: null, currency: null, price_breaks: [],
|
||||
stock_level: "unknown",
|
||||
stock_quantity: null, incoming_quantity: null, incoming_eta: null,
|
||||
lead_time_days: null, moq: null,
|
||||
part_number: null, standard_name: null, form_factor: null, speed_gbps: null,
|
||||
},
|
||||
validation_passed: false,
|
||||
validation_errors: ["Not a product page"],
|
||||
validation_warnings: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 2: Full extraction
|
||||
const extraction = await extractProductData(html, url, vendorSlug);
|
||||
|
||||
// Stage 3: Rule-based validation
|
||||
const validation = validateStockExtraction(extraction, speedGbps);
|
||||
|
||||
return {
|
||||
extraction,
|
||||
validation_passed: validation.passed,
|
||||
validation_errors: validation.errors,
|
||||
validation_warnings: validation.warnings,
|
||||
};
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Persist to DB — saves stock snapshot and logs the scrape
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export async function persistStockSnapshot(
|
||||
result: CrawlerLLMResult,
|
||||
url: string,
|
||||
vendorId: string,
|
||||
transceiverIds: string[]
|
||||
): Promise<void> {
|
||||
const { extraction, validation_passed } = result;
|
||||
|
||||
// Always log (for audit/debug)
|
||||
await pool.query(
|
||||
`INSERT INTO crawler_llm_log
|
||||
(url, vendor_id, is_product_page, extracted_data, confidence, validation_passed,
|
||||
failure_reason, model_used)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
||||
[
|
||||
url,
|
||||
vendorId,
|
||||
extraction.is_product_page,
|
||||
JSON.stringify(extraction),
|
||||
extraction.confidence,
|
||||
validation_passed,
|
||||
validation_passed ? null : result.validation_errors.join("; "),
|
||||
OLLAMA_MODEL,
|
||||
]
|
||||
);
|
||||
|
||||
if (!validation_passed || !extraction.is_product_page) return;
|
||||
|
||||
// Save stock snapshot for each matched transceiver
|
||||
for (const transceiverIdStr of transceiverIds) {
|
||||
await pool.query(
|
||||
`INSERT INTO stock_snapshots
|
||||
(transceiver_id, vendor_id, stock_level, stock_quantity, incoming_quantity,
|
||||
incoming_eta, lead_time_days, moq, price_breaks, source_url, crawler_confidence)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)`,
|
||||
[
|
||||
transceiverIdStr,
|
||||
vendorId,
|
||||
extraction.stock_level,
|
||||
extraction.stock_quantity,
|
||||
extraction.incoming_quantity,
|
||||
extraction.incoming_eta,
|
||||
extraction.lead_time_days,
|
||||
extraction.moq,
|
||||
extraction.price_breaks.length > 0 ? JSON.stringify(extraction.price_breaks) : null,
|
||||
url,
|
||||
extraction.confidence,
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
135
packages/scraper/src/crawler-llm/stock-schema.ts
Normal file
135
packages/scraper/src/crawler-llm/stock-schema.ts
Normal file
@ -0,0 +1,135 @@
|
||||
/**
|
||||
* Crawler LLM — Schema definitions for structured product extraction.
|
||||
*
|
||||
* Every schema includes a `confidence` and `source_evidence` field so the LLM
|
||||
* is forced to cite its work. This enables validation and debugging.
|
||||
*/
|
||||
|
||||
export interface StockExtractionResult {
|
||||
is_product_page: boolean; // false = category/listing page → discard
|
||||
confidence: number; // 0.0 – 1.0 — LLM self-assessment
|
||||
source_evidence: string; // which text passage the LLM used
|
||||
|
||||
// Pricing
|
||||
price: number | null;
|
||||
currency: "USD" | "EUR" | "GBP" | "CNY" | null;
|
||||
price_breaks: PriceBreak[]; // volume discount tiers
|
||||
|
||||
// Stock
|
||||
stock_level: "in_stock" | "out_of_stock" | "limited" | "unknown";
|
||||
stock_quantity: number | null; // exact qty if shown
|
||||
incoming_quantity: number | null; // "18 im Zulauf"
|
||||
incoming_eta: string | null; // ISO date string "2026-04-15"
|
||||
lead_time_days: number | null;
|
||||
moq: number | null; // minimum order quantity
|
||||
|
||||
// Product identity (for cross-validation)
|
||||
part_number: string | null;
|
||||
standard_name: string | null; // manufacturer's exact product name
|
||||
form_factor: string | null;
|
||||
speed_gbps: number | null;
|
||||
}
|
||||
|
||||
export interface PriceBreak {
|
||||
qty: number;
|
||||
price: number;
|
||||
}
|
||||
|
||||
export interface MarketIntelExtractionResult {
|
||||
is_relevant: boolean; // false = skip
|
||||
confidence: number;
|
||||
source_evidence: string;
|
||||
|
||||
intel_type: "capex_cycle" | "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender";
|
||||
title: string;
|
||||
summary: string;
|
||||
technologies: string[]; // ['400G', 'QSFP-DD', ...]
|
||||
buy_signal_implication: "buy_now" | "wait" | "hold" | "monitor" | "none";
|
||||
impact_horizon_months: number;
|
||||
published_at: string | null; // ISO date
|
||||
}
|
||||
|
||||
/** Vendor-specific hints to improve LLM extraction accuracy */
|
||||
export interface VendorProfile {
|
||||
slug: string;
|
||||
name: string;
|
||||
currency: "USD" | "EUR" | "GBP" | "CNY";
|
||||
product_page_signals: string[]; // text patterns that indicate a product page
|
||||
category_page_signals: string[]; // text patterns that indicate a category page
|
||||
price_hint: string | null; // natural language hint for the LLM
|
||||
stock_hint: string | null;
|
||||
known_moq: number | null;
|
||||
}
|
||||
|
||||
export const VENDOR_PROFILES: Record<string, VendorProfile> = {
|
||||
"flexoptix": {
|
||||
slug: "flexoptix",
|
||||
name: "Flexoptix",
|
||||
currency: "EUR",
|
||||
product_page_signals: ["In den Warenkorb", "Add to Cart", "part number", "SKU:", "P/N:"],
|
||||
category_page_signals: ["Alle Produkte", "Filter", "Ergebnisse", "products found"],
|
||||
price_hint: "Price is shown in EUR, usually near 'In den Warenkorb' button. May show 'auf Anfrage' if not listed.",
|
||||
stock_hint: "Look for 'auf Lager', 'Lieferzeit', 'sofort lieferbar', or stock badge near price.",
|
||||
known_moq: 1,
|
||||
},
|
||||
"fs-com": {
|
||||
slug: "fs-com",
|
||||
name: "FS.com",
|
||||
currency: "USD",
|
||||
product_page_signals: ["Add to Cart", "Part No.", "SKU", "In Stock", "Reviews"],
|
||||
category_page_signals: ["Products", "Filter by", "Sort by", "items found", "Category"],
|
||||
price_hint: "Price is in USD, shown prominently near 'Add to Cart'. May show qty pricing table.",
|
||||
stock_hint: "Look for 'In Stock', exact number like '847 In Stock', or 'Out of Stock'.",
|
||||
known_moq: 1,
|
||||
},
|
||||
"10gtek": {
|
||||
slug: "10gtek",
|
||||
name: "10Gtek",
|
||||
currency: "USD",
|
||||
product_page_signals: ["Add to Cart", "Product Code:", "In Stock", "Ships from"],
|
||||
category_page_signals: ["Shop All", "Filter", "Category", "Sort By"],
|
||||
price_hint: "Price in USD near Add to Cart button. Volume pricing sometimes shown as table.",
|
||||
stock_hint: "Stock level shown as text: 'In Stock', 'Low Stock', 'Out of Stock'.",
|
||||
known_moq: 1,
|
||||
},
|
||||
"atgbics": {
|
||||
slug: "atgbics",
|
||||
name: "ATGBICS",
|
||||
currency: "GBP",
|
||||
product_page_signals: ["Add to Basket", "Part Number:", "Stock:", "Delivery"],
|
||||
category_page_signals: ["Products", "Browse by", "Refine by"],
|
||||
price_hint: "Price in GBP. ATGBICS uses Shopify, price is in a span with class 'price'.",
|
||||
stock_hint: "Stock shown as 'In Stock', 'Limited Stock', or 'Out of Stock' near price.",
|
||||
known_moq: 1,
|
||||
},
|
||||
"prolabs": {
|
||||
slug: "prolabs",
|
||||
name: "ProLabs",
|
||||
currency: "USD",
|
||||
product_page_signals: ["Add to Cart", "Part Number", "In Stock", "Specs"],
|
||||
category_page_signals: ["Results", "Filter", "Category", "Sort"],
|
||||
price_hint: "Price in USD. ProLabs may require login for prices — if so, mark price as null.",
|
||||
stock_hint: "Stock availability shown near product title.",
|
||||
known_moq: 1,
|
||||
},
|
||||
"farnell": {
|
||||
slug: "farnell",
|
||||
name: "Farnell",
|
||||
currency: "EUR",
|
||||
product_page_signals: ["Add to Basket", "Order Code:", "Stock:", "Lead Time:"],
|
||||
category_page_signals: ["Products", "Refine Search", "Category", "results for"],
|
||||
price_hint: "Price in EUR or GBP. Farnell shows break prices in a table with columns Qty/Price.",
|
||||
stock_hint: "Stock shown as number, e.g. '47 In Stock'. Lead time shown in business days.",
|
||||
known_moq: 1,
|
||||
},
|
||||
"mouser": {
|
||||
slug: "mouser",
|
||||
name: "Mouser Electronics",
|
||||
currency: "EUR",
|
||||
product_page_signals: ["Add to Cart", "Mouser Part No.", "Mfr. Part No.", "In Stock:"],
|
||||
category_page_signals: ["Search Results", "Filter Results", "Products (", "Sort By"],
|
||||
price_hint: "Mouser shows price per unit and break quantities. USD or EUR depending on locale.",
|
||||
stock_hint: "Stock shown as exact number: 'In Stock: 124'. Lead time shown for out-of-stock items.",
|
||||
known_moq: null,
|
||||
},
|
||||
};
|
||||
157
packages/scraper/src/crawler-llm/validator.ts
Normal file
157
packages/scraper/src/crawler-llm/validator.ts
Normal file
@ -0,0 +1,157 @@
|
||||
/**
|
||||
* Crawler LLM — Rule-based validator.
|
||||
*
|
||||
* Runs AFTER the LLM extraction to catch hallucinations and obvious errors.
|
||||
* The LLM is good at structure; this catches range violations and nonsense.
|
||||
*/
|
||||
|
||||
import type { StockExtractionResult } from "./stock-schema";
|
||||
|
||||
export interface ValidationResult {
|
||||
passed: boolean;
|
||||
warnings: string[];
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
/** Expected price ranges per speed class (USD/EUR). Rough but effective. */
|
||||
const PRICE_RANGES: Record<string, [number, number]> = {
|
||||
"1G": [10, 500],
|
||||
"10G": [20, 2000],
|
||||
"25G": [30, 2000],
|
||||
"40G": [50, 3000],
|
||||
"100G": [80, 15000],
|
||||
"200G": [200, 20000],
|
||||
"400G": [200, 50000],
|
||||
"800G": [500, 80000],
|
||||
};
|
||||
|
||||
const VALID_FORM_FACTORS = new Set([
|
||||
"SFP", "SFP+", "SFP28", "SFP56", "SFP-DD",
|
||||
"QSFP", "QSFP+", "QSFP28", "QSFP56", "QSFP-DD", "QSFP112",
|
||||
"OSFP", "OSFP-RHS",
|
||||
"CFP", "CFP2", "CFP4", "CFP8",
|
||||
"XFP", "X2", "XENPAK",
|
||||
"DSFP", "CSFP",
|
||||
]);
|
||||
|
||||
const VALID_CURRENCIES = new Set(["USD", "EUR", "GBP", "CNY"]);
|
||||
|
||||
export function validateStockExtraction(
|
||||
result: StockExtractionResult,
|
||||
speedGbps?: number
|
||||
): ValidationResult {
|
||||
const errors: string[] = [];
|
||||
const warnings: string[] = [];
|
||||
|
||||
// Not a product page — caller should discard, not an error
|
||||
if (!result.is_product_page) {
|
||||
return { passed: false, errors: ["Not a product page"], warnings: [] };
|
||||
}
|
||||
|
||||
// Confidence too low
|
||||
if (result.confidence < 0.5) {
|
||||
errors.push(`Confidence ${result.confidence} below threshold 0.5`);
|
||||
}
|
||||
|
||||
// Price validation
|
||||
if (result.price !== null) {
|
||||
if (result.price <= 0) {
|
||||
errors.push(`Price ${result.price} is not positive`);
|
||||
}
|
||||
if (result.price > 500_000) {
|
||||
errors.push(`Price ${result.price} exceeds maximum sanity limit`);
|
||||
}
|
||||
if (!result.currency || !VALID_CURRENCIES.has(result.currency)) {
|
||||
errors.push(`Invalid currency: ${result.currency}`);
|
||||
}
|
||||
|
||||
// Speed-class price range check
|
||||
if (speedGbps) {
|
||||
const speedKey = `${speedGbps}G`;
|
||||
const range = PRICE_RANGES[speedKey];
|
||||
if (range && (result.price < range[0] * 0.1 || result.price > range[1] * 10)) {
|
||||
warnings.push(`Price ${result.price} ${result.currency} looks unusual for ${speedKey} (expected ${range[0]}–${range[1]})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stock quantity sanity
|
||||
if (result.stock_quantity !== null) {
|
||||
if (result.stock_quantity < 0) {
|
||||
errors.push(`Stock quantity ${result.stock_quantity} is negative`);
|
||||
}
|
||||
if (result.stock_quantity > 100_000) {
|
||||
warnings.push(`Stock quantity ${result.stock_quantity} unusually high — verify`);
|
||||
}
|
||||
}
|
||||
|
||||
// Lead time sanity
|
||||
if (result.lead_time_days !== null) {
|
||||
if (result.lead_time_days < 0) {
|
||||
errors.push(`Lead time ${result.lead_time_days} is negative`);
|
||||
}
|
||||
if (result.lead_time_days > 730) {
|
||||
warnings.push(`Lead time ${result.lead_time_days} days (>2 years) — verify`);
|
||||
}
|
||||
}
|
||||
|
||||
// MOQ sanity
|
||||
if (result.moq !== null && result.moq < 1) {
|
||||
errors.push(`MOQ ${result.moq} must be at least 1`);
|
||||
}
|
||||
|
||||
// Form factor check
|
||||
if (result.form_factor && !VALID_FORM_FACTORS.has(result.form_factor)) {
|
||||
warnings.push(`Unknown form factor: ${result.form_factor}`);
|
||||
}
|
||||
|
||||
// Price break consistency
|
||||
if (result.price_breaks.length > 0) {
|
||||
for (const pb of result.price_breaks) {
|
||||
if (pb.qty < 1 || pb.price <= 0) {
|
||||
errors.push(`Invalid price break: qty=${pb.qty} price=${pb.price}`);
|
||||
}
|
||||
if (result.price && pb.price > result.price * 2) {
|
||||
warnings.push(`Price break ${pb.qty}x=${pb.price} higher than unit price — unusual`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Incoming ETA must be a future-ish date
|
||||
if (result.incoming_eta) {
|
||||
const eta = new Date(result.incoming_eta);
|
||||
if (isNaN(eta.getTime())) {
|
||||
errors.push(`Invalid incoming_eta date: ${result.incoming_eta}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
passed: errors.length === 0,
|
||||
errors,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
/** Cross-source comparison: do two extractions agree within tolerance? */
|
||||
export function crossValidate(
|
||||
a: StockExtractionResult,
|
||||
b: StockExtractionResult,
|
||||
priceTolerance = 0.10 // 10% price difference allowed
|
||||
): boolean {
|
||||
if (a.price === null || b.price === null) return false;
|
||||
|
||||
// Both in same currency
|
||||
if (a.currency !== b.currency) return false;
|
||||
|
||||
// Price within tolerance
|
||||
const diff = Math.abs(a.price - b.price) / Math.max(a.price, b.price);
|
||||
if (diff > priceTolerance) return false;
|
||||
|
||||
// Part numbers match (if both present)
|
||||
if (a.part_number && b.part_number) {
|
||||
const normalize = (s: string) => s.replace(/[\s\-_]/g, "").toUpperCase();
|
||||
if (normalize(a.part_number) !== normalize(b.part_number)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -67,6 +67,9 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
||||
"scrape:news",
|
||||
"scrape:faq",
|
||||
"scrape:docs",
|
||||
"scrape:market-intel",
|
||||
"compute:abc",
|
||||
"compute:reorder-signals",
|
||||
];
|
||||
for (const q of queues) {
|
||||
await boss.createQueue(q).catch(() => { /* already exists */ });
|
||||
@ -140,6 +143,24 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
||||
expireInSeconds: 7200,
|
||||
});
|
||||
|
||||
// Market intelligence: OFC/ECOC, IEEE, TED, Farnell/Mouser lead times (every Tuesday 5am)
|
||||
await boss.schedule("scrape:market-intel", "0 5 * * 2", {}, {
|
||||
retryLimit: 2,
|
||||
expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// ABC classification recompute (after each major pricing run — daily at 8am)
|
||||
await boss.schedule("compute:abc", "0 8 * * *", {}, {
|
||||
retryLimit: 2,
|
||||
expireInSeconds: 600,
|
||||
});
|
||||
|
||||
// Reorder signals recompute (daily at 8:30am — after ABC)
|
||||
await boss.schedule("compute:reorder-signals", "30 8 * * *", {}, {
|
||||
retryLimit: 2,
|
||||
expireInSeconds: 600,
|
||||
});
|
||||
|
||||
console.log("All schedules registered");
|
||||
}
|
||||
|
||||
@ -208,5 +229,23 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
|
||||
});
|
||||
|
||||
await boss.work("scrape:market-intel", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
|
||||
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
|
||||
await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
|
||||
});
|
||||
|
||||
await boss.work("compute:abc", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
|
||||
const { computeAbcClassification } = await import("./scrapers/market-intelligence");
|
||||
await computeAbcClassification();
|
||||
});
|
||||
|
||||
await boss.work("compute:reorder-signals", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
|
||||
const { computeReorderSignals } = await import("./scrapers/market-intelligence");
|
||||
await computeReorderSignals();
|
||||
});
|
||||
|
||||
console.log("All workers registered");
|
||||
}
|
||||
|
||||
299
packages/scraper/src/scrapers/market-intelligence.ts
Normal file
299
packages/scraper/src/scrapers/market-intelligence.ts
Normal file
@ -0,0 +1,299 @@
|
||||
/**
|
||||
* Market Intelligence Scraper
|
||||
*
|
||||
* Collects procurement-relevant signals from:
|
||||
* - OFC/ECOC conference programs
|
||||
* - Farnell/Mouser lead times for optical modules
|
||||
* - IEEE 802.3 working group status page
|
||||
* - EU TED tender database (fiber infrastructure)
|
||||
* - LightReading/FierceTelecom trade press
|
||||
*
|
||||
* Runs weekly via pg-boss scheduler.
|
||||
* Results stored in market_intelligence table.
|
||||
* LLM analysis via Crawler LLM extractMarketIntel().
|
||||
*/
|
||||
|
||||
import { CheerioCrawler } from "crawlee";
|
||||
import { extractMarketIntel } from "../crawler-llm/core";
|
||||
import { pool } from "../utils/db";
|
||||
|
||||
interface IntelSource {
|
||||
name: string;
|
||||
url: string;
|
||||
type: "trade_show" | "standard_ratified" | "standard_draft" | "distributor_lead_time" | "supply_chain" | "tender" | "capex_cycle";
|
||||
fetchText: (html: string) => string; // extract relevant text from HTML
|
||||
}
|
||||
|
||||
const SOURCES: IntelSource[] = [
|
||||
{
|
||||
name: "OFC Conference News",
|
||||
url: "https://www.ofcconference.org/en-us/home/news/",
|
||||
type: "trade_show",
|
||||
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
|
||||
},
|
||||
{
|
||||
name: "LightReading — Optical Networking",
|
||||
url: "https://www.lightreading.com/optical-networking",
|
||||
type: "supply_chain",
|
||||
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
|
||||
},
|
||||
{
|
||||
name: "IEEE 802.3 Working Group",
|
||||
url: "https://www.ieee802.org/3/",
|
||||
type: "standard_draft",
|
||||
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000),
|
||||
},
|
||||
{
|
||||
name: "EU TED — ICT/Fiber Tenders",
|
||||
url: "https://ted.europa.eu/en/search/result?forms%5B0%5D%5Bcpv%5D=32571000",
|
||||
type: "tender",
|
||||
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
|
||||
},
|
||||
{
|
||||
name: "Farnell — SFP Fiber Transceivers Lead Times",
|
||||
url: "https://de.farnell.com/c/passive-optische-netzwerke/glasfasertransceiver",
|
||||
type: "distributor_lead_time",
|
||||
fetchText: (html) => {
|
||||
// Extract lead time patterns: "X Wochen", "X weeks", "X days"
|
||||
const leadTimePattern = /(\d+)\s*(wochen|weeks|days|tage|week|day)/gi;
|
||||
const matches = [];
|
||||
let m;
|
||||
while ((m = leadTimePattern.exec(html)) !== null) {
|
||||
matches.push(m[0]);
|
||||
}
|
||||
const context = html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ");
|
||||
return `Lead time mentions: ${matches.join(", ")}\n\nPage context:\n${context.substring(0, 5000)}`;
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Mouser — Optical Transceivers Category",
|
||||
url: "https://www.mouser.de/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers/",
|
||||
type: "distributor_lead_time",
|
||||
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 6000),
|
||||
},
|
||||
{
|
||||
name: "FierceTelecom — Optical News",
|
||||
url: "https://www.fiercetelecom.com/optical",
|
||||
type: "supply_chain",
|
||||
fetchText: (html) => html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").substring(0, 8000),
|
||||
},
|
||||
];
|
||||
|
||||
async function saveIntelItem(
|
||||
item: Awaited<ReturnType<typeof extractMarketIntel>>,
|
||||
source: IntelSource,
|
||||
url: string
|
||||
): Promise<void> {
|
||||
if (!item.is_relevant || item.confidence < 0.5) return;
|
||||
|
||||
await pool.query(
|
||||
`INSERT INTO market_intelligence
|
||||
(intel_type, title, summary, relevance_score, technologies,
|
||||
buy_signal_implication, impact_horizon_months, source_url, source_name, published_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||||
ON CONFLICT DO NOTHING`,
|
||||
[
|
||||
item.intel_type,
|
||||
item.title.substring(0, 500),
|
||||
item.summary,
|
||||
item.confidence,
|
||||
item.technologies,
|
||||
item.buy_signal_implication,
|
||||
item.impact_horizon_months,
|
||||
url,
|
||||
source.name,
|
||||
item.published_at ? new Date(item.published_at) : null,
|
||||
]
|
||||
);
|
||||
console.log(`[market-intel] Saved: ${item.title.substring(0, 60)}... (${item.buy_signal_implication})`);
|
||||
}
|
||||
|
||||
export async function scrapeMarketIntelligence(): Promise<void> {
|
||||
console.log("[market-intel] Starting market intelligence scrape...");
|
||||
let processed = 0;
|
||||
|
||||
const crawler = new CheerioCrawler({
|
||||
maxRequestsPerCrawl: SOURCES.length + 20,
|
||||
maxConcurrency: 2,
|
||||
requestHandlerTimeoutSecs: 30,
|
||||
|
||||
async requestHandler({ request, body }) {
|
||||
const html = body.toString();
|
||||
const source = SOURCES.find((s) => request.url.startsWith(s.url.split("?")[0]));
|
||||
if (!source) return;
|
||||
|
||||
const text = source.fetchText(html);
|
||||
if (text.length < 200) {
|
||||
console.warn(`[market-intel] Too little text from ${request.url}`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const intel = await extractMarketIntel(text, request.url, source.name);
|
||||
await saveIntelItem(intel, source, request.url);
|
||||
processed++;
|
||||
} catch (err) {
|
||||
console.error(`[market-intel] LLM error for ${request.url}:`, err);
|
||||
}
|
||||
},
|
||||
|
||||
async failedRequestHandler({ request }) {
|
||||
console.warn(`[market-intel] Failed: ${request.url}`);
|
||||
},
|
||||
});
|
||||
|
||||
await crawler.addRequests(SOURCES.map((s) => ({ url: s.url })));
|
||||
await crawler.run();
|
||||
|
||||
console.log(`[market-intel] Done. Processed ${processed}/${SOURCES.length} sources.`);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// ABC Classification computation — run after each major scrape cycle
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export async function computeAbcClassification(): Promise<void> {
|
||||
console.log("[abc] Computing ABC classification...");
|
||||
|
||||
await pool.query(`
|
||||
INSERT INTO abc_classification
|
||||
(transceiver_id, abc_class, obs_90d, compat_count, vendor_count, price_volatility, demand_score, supply_risk)
|
||||
SELECT
|
||||
t.id,
|
||||
CASE
|
||||
WHEN obs_90d > 50 AND compat_count > 100 THEN 'A'
|
||||
WHEN obs_90d > 15 OR compat_count > 30 THEN 'B'
|
||||
ELSE 'C'
|
||||
END AS abc_class,
|
||||
obs_90d,
|
||||
compat_count,
|
||||
vendor_count,
|
||||
price_volatility,
|
||||
-- demand score 0-100: weighted combination
|
||||
LEAST(100, (obs_90d * 0.5 + compat_count * 0.3 + vendor_count * 5)) AS demand_score,
|
||||
CASE
|
||||
WHEN price_volatility > 0.3 THEN 'high'
|
||||
WHEN price_volatility > 0.1 THEN 'medium'
|
||||
ELSE 'low'
|
||||
END AS supply_risk
|
||||
FROM transceivers t
|
||||
LEFT JOIN (
|
||||
SELECT transceiver_id,
|
||||
COUNT(*) FILTER (WHERE time > NOW() - INTERVAL '90 days') AS obs_90d,
|
||||
STDDEV(price) / NULLIF(AVG(price), 0) AS price_volatility,
|
||||
COUNT(DISTINCT source_vendor_id) AS vendor_count
|
||||
FROM price_observations
|
||||
GROUP BY transceiver_id
|
||||
) po ON po.transceiver_id = t.id
|
||||
LEFT JOIN (
|
||||
SELECT transceiver_id, COUNT(*) AS compat_count
|
||||
FROM compatibility
|
||||
WHERE status = 'compatible'
|
||||
GROUP BY transceiver_id
|
||||
) co ON co.transceiver_id = t.id
|
||||
WHERE t.data_confidence != 'garbage' OR t.data_confidence IS NULL
|
||||
ON CONFLICT (transceiver_id) DO UPDATE SET
|
||||
abc_class = EXCLUDED.abc_class,
|
||||
obs_90d = EXCLUDED.obs_90d,
|
||||
compat_count = EXCLUDED.compat_count,
|
||||
vendor_count = EXCLUDED.vendor_count,
|
||||
price_volatility = EXCLUDED.price_volatility,
|
||||
demand_score = EXCLUDED.demand_score,
|
||||
supply_risk = EXCLUDED.supply_risk,
|
||||
computed_at = NOW()
|
||||
`);
|
||||
|
||||
const stats = await pool.query(`
|
||||
SELECT abc_class, COUNT(*) AS count FROM abc_classification GROUP BY abc_class ORDER BY abc_class
|
||||
`);
|
||||
console.log("[abc] Classification done:", stats.rows.map((r) => `${r.abc_class}: ${r.count}`).join(", "));
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Reorder Signal computation
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export async function computeReorderSignals(): Promise<void> {
|
||||
console.log("[reorder] Computing reorder signals...");
|
||||
|
||||
// Get all transceivers with enough data
|
||||
const transceivers = await pool.query(`
|
||||
SELECT
|
||||
t.id, t.part_number, t.standard_name, t.speed_gbps, t.form_factor,
|
||||
ac.abc_class, ac.price_volatility, ac.supply_risk,
|
||||
-- Price trend: is price rising or falling?
|
||||
(SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time > NOW() - INTERVAL '14 days') AS price_recent,
|
||||
(SELECT AVG(price) FROM price_observations WHERE transceiver_id = t.id AND time BETWEEN NOW() - INTERVAL '60 days' AND NOW() - INTERVAL '14 days') AS price_older,
|
||||
-- Stock trend: how many vendors show in_stock recently?
|
||||
(SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent,
|
||||
(SELECT COUNT(*) FROM stock_snapshots WHERE transceiver_id = t.id AND stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent,
|
||||
-- Lead time
|
||||
(SELECT AVG(lead_time_days) FROM stock_snapshots WHERE transceiver_id = t.id AND lead_time_days IS NOT NULL AND scraped_at > NOW() - INTERVAL '30 days') AS avg_lead_time_days,
|
||||
-- Lifecycle events
|
||||
(SELECT MAX(impact_level) FROM product_lifecycle_events WHERE transceiver_id = t.id OR technology = t.speed_gbps::text || 'G') AS lifecycle_impact
|
||||
FROM transceivers t
|
||||
LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
|
||||
WHERE (t.data_confidence != 'garbage' OR t.data_confidence IS NULL)
|
||||
`);
|
||||
|
||||
let computed = 0;
|
||||
for (const row of transceivers.rows) {
|
||||
const reasons: string[] = [];
|
||||
let signal: "buy_now" | "wait" | "hold" | "monitor" = "monitor";
|
||||
let strength = 0.3;
|
||||
|
||||
const priceTrend = row.price_recent && row.price_older
|
||||
? (row.price_recent - row.price_older) / row.price_older
|
||||
: null;
|
||||
|
||||
const stockTrend =
|
||||
row.oos_recent > 2 ? "declining" :
|
||||
row.in_stock_recent > 2 ? "stable" : "unknown";
|
||||
|
||||
const leadTimeWeeks = row.avg_lead_time_days ? Math.ceil(row.avg_lead_time_days / 7) : null;
|
||||
|
||||
// Signal logic
|
||||
if (row.lifecycle_impact === "critical" || row.lifecycle_impact === "high") {
|
||||
signal = "buy_now"; strength = 0.9;
|
||||
reasons.push("EOL/critical lifecycle event detected");
|
||||
} else if (stockTrend === "declining" && row.abc_class === "A") {
|
||||
signal = "buy_now"; strength = 0.8;
|
||||
reasons.push("Stock declining at multiple vendors (A-product)");
|
||||
} else if (leadTimeWeeks && leadTimeWeeks >= 12) {
|
||||
signal = "buy_now"; strength = 0.75;
|
||||
reasons.push(`Long lead time: ${leadTimeWeeks} weeks — order in advance`);
|
||||
} else if (priceTrend !== null && priceTrend < -0.10) {
|
||||
signal = "wait"; strength = 0.7;
|
||||
reasons.push(`Price falling ${Math.abs(Math.round(priceTrend * 100))}% — wait for floor`);
|
||||
} else if (priceTrend !== null && priceTrend > 0.10) {
|
||||
signal = "buy_now"; strength = 0.65;
|
||||
reasons.push(`Price rising ${Math.round(priceTrend * 100)}% — buy before further increase`);
|
||||
} else if (row.abc_class === "A" && stockTrend === "stable") {
|
||||
signal = "hold"; strength = 0.5;
|
||||
reasons.push("A-product, stable pricing and availability");
|
||||
} else if (row.abc_class === "C") {
|
||||
signal = "monitor"; strength = 0.3;
|
||||
reasons.push("C-product: low demand — order on demand only");
|
||||
}
|
||||
|
||||
if (reasons.length === 0) reasons.push("Insufficient data for strong signal");
|
||||
|
||||
await pool.query(
|
||||
`INSERT INTO reorder_signals
|
||||
(transceiver_id, signal, signal_strength, reasons, stock_trend, price_trend, lead_time_weeks)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7)`,
|
||||
[
|
||||
row.id,
|
||||
signal,
|
||||
strength,
|
||||
JSON.stringify(reasons),
|
||||
stockTrend,
|
||||
priceTrend === null ? "unknown" : priceTrend > 0.05 ? "rising" : priceTrend < -0.05 ? "falling" : "stable",
|
||||
leadTimeWeeks,
|
||||
]
|
||||
);
|
||||
computed++;
|
||||
}
|
||||
|
||||
console.log(`[reorder] Computed ${computed} reorder signals.`);
|
||||
}
|
||||
338
sql/019-procurement-intelligence.sql
Normal file
338
sql/019-procurement-intelligence.sql
Normal file
@ -0,0 +1,338 @@
|
||||
-- Migration 019: Procurement Intelligence Engine
|
||||
-- Stock tracking, ABC classification, reorder signals, market intelligence
|
||||
-- v0.2.0 — WS0c: Procurement Intelligence Foundation
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 1. Stock Snapshots — time-series lagerbestand per vendor per product
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS stock_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
|
||||
vendor_id UUID REFERENCES vendors(id) ON DELETE CASCADE,
|
||||
stock_level TEXT CHECK (stock_level IN ('in_stock', 'out_of_stock', 'limited', 'unknown')) DEFAULT 'unknown',
|
||||
stock_quantity INT, -- exact quantity if vendor shows it
|
||||
incoming_quantity INT, -- "18 im Zulauf"
|
||||
incoming_eta DATE, -- "verfügbar ab 15. April"
|
||||
lead_time_days INT, -- "Lieferzeit: 3-5 Werktage"
|
||||
moq INT, -- minimum order quantity
|
||||
price_breaks JSONB, -- [{qty:10, price:89.00}, {qty:50, price:74.00}]
|
||||
source_url TEXT,
|
||||
crawler_confidence NUMERIC(3,2), -- 0.00 – 1.00 (Crawler LLM confidence)
|
||||
scraped_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_stock_transceiver ON stock_snapshots(transceiver_id, scraped_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_stock_vendor ON stock_snapshots(vendor_id, scraped_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_stock_level ON stock_snapshots(stock_level) WHERE stock_level != 'unknown';
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 2. ABC Classification — computed turnover category
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS abc_classification (
|
||||
transceiver_id UUID PRIMARY KEY REFERENCES transceivers(id) ON DELETE CASCADE,
|
||||
abc_class TEXT NOT NULL CHECK (abc_class IN ('A', 'B', 'C')),
|
||||
-- inputs
|
||||
obs_90d INT DEFAULT 0, -- price observations in last 90 days (proxy for market demand)
|
||||
compat_count INT DEFAULT 0, -- number of compatible switches (market breadth)
|
||||
vendor_count INT DEFAULT 0, -- number of vendors selling it (competition = demand signal)
|
||||
price_volatility NUMERIC(5,4), -- STDDEV/AVG — high volatility = contested market
|
||||
-- derived signals
|
||||
demand_score NUMERIC(5,2), -- composite 0-100
|
||||
supply_risk TEXT CHECK (supply_risk IN ('low', 'medium', 'high')),
|
||||
computed_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_abc_class ON abc_classification(abc_class);
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 3. Reorder Signals — computed buy/wait/hold/monitor recommendations
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS reorder_signals (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
|
||||
signal TEXT NOT NULL CHECK (signal IN ('buy_now', 'wait', 'hold', 'monitor')),
|
||||
signal_strength NUMERIC(3,2), -- 0.00 – 1.00 (how strong the signal is)
|
||||
reasons JSONB, -- ["Stock declining at 3 vendors", "Lead time 16 weeks"]
|
||||
stock_trend TEXT CHECK (stock_trend IN ('declining', 'stable', 'increasing', 'unknown')),
|
||||
price_trend TEXT CHECK (price_trend IN ('falling', 'stable', 'rising', 'unknown')),
|
||||
lead_time_weeks INT,
|
||||
hype_phase TEXT, -- from hype_cycle data
|
||||
computed_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '24 hours'
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_reorder_transceiver ON reorder_signals(transceiver_id, computed_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_reorder_signal ON reorder_signals(signal) WHERE expires_at > NOW();
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 4. Product Lifecycle Events — EOL, new standards, CapEx peaks, trade shows
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS product_lifecycle_events (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
event_type TEXT NOT NULL CHECK (event_type IN (
|
||||
'eol_announced', -- OEM EOL notice (Cisco, Juniper, Arista)
|
||||
'eol_effective', -- actual EOL date reached
|
||||
'standard_ratified', -- new IEEE/MSA standard finalized
|
||||
'standard_draft', -- draft circulating (early signal)
|
||||
'capex_peak', -- hyperscaler CapEx surge detected
|
||||
'trade_show', -- OFC/ECOC/MWC announcement
|
||||
'supply_risk', -- factory/shortage warning
|
||||
'tender', -- EU/government fiber tender (TED)
|
||||
'price_floor' -- estimated price floor reached
|
||||
)),
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
transceiver_id UUID REFERENCES transceivers(id), -- null = technology-level event
|
||||
technology TEXT, -- '400G', 'QSFP-DD', '800G ZR', etc.
|
||||
effective_date DATE, -- when this event takes effect
|
||||
source_url TEXT,
|
||||
source_name TEXT,
|
||||
impact_level TEXT CHECK (impact_level IN ('low', 'medium', 'high', 'critical')) DEFAULT 'medium',
|
||||
buy_signal TEXT CHECK (buy_signal IN ('buy_now', 'wait', 'hold', 'monitor')),
|
||||
verified BOOLEAN DEFAULT false,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_lifecycle_type ON product_lifecycle_events(event_type, created_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_lifecycle_technology ON product_lifecycle_events(technology) WHERE technology IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_lifecycle_signal ON product_lifecycle_events(buy_signal) WHERE buy_signal IS NOT NULL;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 5. Market Intelligence — hyperscaler CapEx, OFC/ECOC, standards, tenders
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS market_intelligence (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
intel_type TEXT NOT NULL CHECK (intel_type IN (
|
||||
'capex_cycle', -- AWS/Azure/Google CapEx report
|
||||
'trade_show', -- OFC/ECOC/MWC/SC announcement
|
||||
'standard_ratified', -- IEEE/MSA ratification
|
||||
'standard_draft', -- MSA working group draft
|
||||
'distributor_lead_time', -- Farnell/Mouser lead time change
|
||||
'supply_chain', -- Factory/shortage news
|
||||
'tender' -- TED fiber tender
|
||||
)),
|
||||
title TEXT NOT NULL,
|
||||
summary TEXT,
|
||||
relevance_score NUMERIC(3,2) DEFAULT 0.5, -- 0-1, LLM-assessed relevance
|
||||
technologies TEXT[], -- ['400G', 'QSFP-DD', 'ZR']
|
||||
buy_signal_implication TEXT CHECK (buy_signal_implication IN ('buy_now', 'wait', 'hold', 'monitor', 'none')),
|
||||
impact_horizon_months INT, -- how many months until this matters
|
||||
source_url TEXT,
|
||||
source_name TEXT NOT NULL,
|
||||
published_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_intel_type ON market_intelligence(intel_type, created_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_intel_technologies ON market_intelligence USING gin(technologies);
|
||||
CREATE INDEX IF NOT EXISTS idx_intel_signal ON market_intelligence(buy_signal_implication);
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 6. Crawler LLM Scrape Log — audit trail for Crawler LLM results
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS crawler_llm_log (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
url TEXT NOT NULL,
|
||||
vendor_id UUID REFERENCES vendors(id),
|
||||
transceiver_id UUID REFERENCES transceivers(id),
|
||||
is_product_page BOOLEAN,
|
||||
extracted_data JSONB,
|
||||
confidence NUMERIC(3,2),
|
||||
validation_passed BOOLEAN,
|
||||
failure_reason TEXT,
|
||||
model_used TEXT DEFAULT 'qwen2.5:14b',
|
||||
scraped_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_llm_log_url ON crawler_llm_log(url, scraped_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_llm_log_vendor ON crawler_llm_log(vendor_id, scraped_at DESC);
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 7. Useful views
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
-- Latest stock per product per vendor
|
||||
CREATE OR REPLACE VIEW v_stock_current AS
|
||||
SELECT DISTINCT ON (ss.transceiver_id, ss.vendor_id)
|
||||
ss.*,
|
||||
t.part_number, t.standard_name, t.form_factor, t.speed_gbps,
|
||||
v.name AS vendor_name, v.slug AS vendor_slug
|
||||
FROM stock_snapshots ss
|
||||
JOIN transceivers t ON ss.transceiver_id = t.id
|
||||
JOIN vendors v ON ss.vendor_id = v.id
|
||||
ORDER BY ss.transceiver_id, ss.vendor_id, ss.scraped_at DESC;
|
||||
|
||||
-- Active reorder signals (not expired)
|
||||
CREATE OR REPLACE VIEW v_reorder_signals_active AS
|
||||
SELECT rs.*,
|
||||
t.part_number, t.standard_name, t.form_factor, t.speed_gbps, t.reach_label,
|
||||
ac.abc_class
|
||||
FROM reorder_signals rs
|
||||
JOIN transceivers t ON rs.transceiver_id = t.id
|
||||
LEFT JOIN abc_classification ac ON ac.transceiver_id = t.id
|
||||
WHERE rs.expires_at > NOW()
|
||||
AND rs.computed_at = (
|
||||
SELECT MAX(computed_at) FROM reorder_signals r2 WHERE r2.transceiver_id = rs.transceiver_id
|
||||
)
|
||||
ORDER BY rs.signal_strength DESC;
|
||||
|
||||
-- Stock trend (is it declining at vendors?)
|
||||
CREATE OR REPLACE VIEW v_stock_trend AS
|
||||
SELECT
|
||||
transceiver_id,
|
||||
vendor_id,
|
||||
COUNT(*) AS snapshot_count,
|
||||
-- Compare recent vs older snapshots
|
||||
COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS oos_recent,
|
||||
COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') AS in_stock_recent,
|
||||
COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days') AS oos_older,
|
||||
CASE
|
||||
WHEN COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at > NOW() - INTERVAL '7 days') >
|
||||
COUNT(*) FILTER (WHERE stock_level = 'out_of_stock' AND scraped_at BETWEEN NOW() - INTERVAL '30 days' AND NOW() - INTERVAL '7 days')
|
||||
THEN 'declining'
|
||||
WHEN COUNT(*) FILTER (WHERE stock_level = 'in_stock' AND scraped_at > NOW() - INTERVAL '7 days') > 2 THEN 'stable'
|
||||
ELSE 'unknown'
|
||||
END AS trend
|
||||
FROM stock_snapshots
|
||||
WHERE scraped_at > NOW() - INTERVAL '30 days'
|
||||
GROUP BY transceiver_id, vendor_id;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 8. Seed: Known market intelligence events (static knowledge base)
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
INSERT INTO market_intelligence (intel_type, title, summary, relevance_score, technologies, buy_signal_implication, impact_horizon_months, source_name, published_at) VALUES
|
||||
(
|
||||
'trade_show',
|
||||
'OFC 2026 — Key 800G ZR and Co-Packaged Optics Announcements',
|
||||
'OFC 2026 highlighted accelerated 800G ZR deployment timelines and first Co-Packaged Optics (CPO) demos from Broadcom and Intel. CPO replaces pluggable modules in 4-6 years for hyperscaler intra-DC. Short term: 400G ZR+ and 800G QSFP-DD demand surge expected in 2026-2027.',
|
||||
0.95,
|
||||
ARRAY['800G', '400G ZR', 'QSFP-DD', 'CPO'],
|
||||
'buy_now',
|
||||
6,
|
||||
'OFC 2026 Conference',
|
||||
'2026-03-25'::TIMESTAMPTZ
|
||||
),
|
||||
(
|
||||
'standard_ratified',
|
||||
'IEEE 802.3df — 100G, 200G, 400G Ethernet over single-mode fiber',
|
||||
'802.3df ratified December 2024. Defines 100GBASE-DR, 200GBASE-DR4, 400GBASE-DR4 with PAM4 modulation. Vendors shipping compliant optics in H1 2026. Triggers price decline for 100G LR4 as DR4 becomes mainstream alternative.',
|
||||
0.88,
|
||||
ARRAY['100G', '200G', '400G', 'DR4', 'PAM4'],
|
||||
'wait',
|
||||
3,
|
||||
'IEEE 802.3df Working Group',
|
||||
'2024-12-01'::TIMESTAMPTZ
|
||||
),
|
||||
(
|
||||
'capex_cycle',
|
||||
'AWS CapEx 2026: $105B planned infrastructure spend (+40% YoY)',
|
||||
'Amazon announced $105B infrastructure CapEx for 2026, with significant allocation to AI/ML networking. Q1/Q2 typically slower, Q3/Q4 peak deployment. Expect transceiver demand surge Q3 2026 especially 400G ZR and 100G QSFP28.',
|
||||
0.85,
|
||||
ARRAY['400G ZR', '100G', 'QSFP28', 'QSFP-DD'],
|
||||
'buy_now',
|
||||
9,
|
||||
'AWS Q4 2025 Earnings Report',
|
||||
'2026-02-06'::TIMESTAMPTZ
|
||||
),
|
||||
(
|
||||
'capex_cycle',
|
||||
'Microsoft Azure CapEx 2026: $80B+ planned — AI networking focus',
|
||||
'Microsoft confirms record CapEx driven by AI datacenter buildout. Azure networking upgrades prioritizing 400G+ spine/leaf. Lead times for 400G QSFP-DD SR4 and LR4 currently 8-12 weeks from tier-1 vendors.',
|
||||
0.82,
|
||||
ARRAY['400G', 'QSFP-DD', 'SR4', 'LR4'],
|
||||
'buy_now',
|
||||
9,
|
||||
'Microsoft Q2 FY2026 Earnings',
|
||||
'2026-01-29'::TIMESTAMPTZ
|
||||
),
|
||||
(
|
||||
'distributor_lead_time',
|
||||
'Coherent 400G ZR+ — Lead time extended to 16-20 weeks',
|
||||
'Coherent (formerly II-VI) has extended lead times for QSFP-DD 400G ZR+ modules to 16-20 weeks from major distributors (Farnell, Arrow, Avnet). Cause: wafer fab capacity constrained by AI optics demand. Expected normalization Q4 2026.',
|
||||
0.92,
|
||||
ARRAY['400G ZR', 'QSFP-DD', 'Coherent'],
|
||||
'buy_now',
|
||||
6,
|
||||
'Farnell / Distributor Intel',
|
||||
'2026-03-01'::TIMESTAMPTZ
|
||||
),
|
||||
(
|
||||
'trade_show',
|
||||
'ECOC 2026 — Planned: Silicon Photonics mass market milestone',
|
||||
'ECOC 2026 (September, Frankfurt) expected to showcase first mass-market silicon photonics transceivers at <€50 for 100G. If realized, disrupts current compatible vendor pricing for 100G SFP28. Monitor closely for 100G category.',
|
||||
0.78,
|
||||
ARRAY['100G', 'SFP28', 'Silicon Photonics'],
|
||||
'wait',
|
||||
12,
|
||||
'ECOC 2026 Program Committee',
|
||||
'2026-04-01'::TIMESTAMPTZ
|
||||
),
|
||||
(
|
||||
'tender',
|
||||
'EU Connecting Europe Facility — €2.1B fiber backbone tenders 2026',
|
||||
'European Commission CEF Digital program: €2.1B in fiber backbone tenders across DE, FR, PL, SE in 2026. Each tender = 6-18 month deployment window. Triggers DWDM + ROADM + coherent transceiver demand (100G/400G ZR). TED database: TED-OJ.',
|
||||
0.75,
|
||||
ARRAY['DWDM', '100G', '400G ZR', 'Coherent', 'ROADM'],
|
||||
'monitor',
|
||||
18,
|
||||
'EU TED / Connecting Europe Facility',
|
||||
'2026-01-15'::TIMESTAMPTZ
|
||||
)
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- 9. Seed: Known lifecycle events
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
INSERT INTO product_lifecycle_events (event_type, title, description, technology, effective_date, source_name, impact_level, buy_signal) VALUES
|
||||
(
|
||||
'eol_announced',
|
||||
'Cisco SFP-10G-LR — EOL announced, EOS 2027-06-30',
|
||||
'Cisco Product Bulletin: SFP-10G-LR (CS-SFPHLX10G-LR) enters End of Sale 2026-06-30, End of Support 2027-06-30. Customers must migrate to SFP-10G-LR-S or compatible alternatives. Hortungs-Rush expected Q1-Q2 2026.',
|
||||
'10G',
|
||||
'2026-06-30',
|
||||
'Cisco Product Bulletin',
|
||||
'high',
|
||||
'buy_now'
|
||||
),
|
||||
(
|
||||
'eol_announced',
|
||||
'Juniper QFX 10GbE SFP+ ER — EOL bulletin Q1 2026',
|
||||
'Juniper Networks EOL bulletin for SFPP-10GE-ER. End of Engineering 2026-06-01. Last time order date 2026-09-01. Customers should evaluate EX-SFP-10GE-ER-S alternatives.',
|
||||
'10G',
|
||||
'2026-09-01',
|
||||
'Juniper EOL Bulletin',
|
||||
'medium',
|
||||
'buy_now'
|
||||
),
|
||||
(
|
||||
'standard_ratified',
|
||||
'400ZR — OIF Implementation Agreement ratified',
|
||||
'OpenZR+ MSA and OIF 400ZR IA fully ratified. Multi-vendor interoperability confirmed at Interop events. Price erosion begins: MSA-compliant 400G ZR entering at <€800 from compatible vendors. OEM premium shrinking.',
|
||||
'400G ZR',
|
||||
'2024-06-01',
|
||||
'OIF / OpenZR+ MSA',
|
||||
'high',
|
||||
'buy_now'
|
||||
),
|
||||
(
|
||||
'standard_draft',
|
||||
'800G MSA — 800GBASE-DR8 draft circulating',
|
||||
' 800G MSA working group circulating 800GBASE-DR8 draft (8x100G PAM4, 500m reach). Expected ratification Q3 2026. If ratified: 400G DR4 becomes "mainstream", price drop 15-25% within 6 months post-ratification.',
|
||||
'800G',
|
||||
'2026-09-01',
|
||||
'800G MSA Working Group',
|
||||
'medium',
|
||||
'hold'
|
||||
)
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- Done
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM stock_snapshots) AS stock_snapshots,
|
||||
(SELECT COUNT(*) FROM abc_classification) AS abc_entries,
|
||||
(SELECT COUNT(*) FROM reorder_signals) AS reorder_signals,
|
||||
(SELECT COUNT(*) FROM product_lifecycle_events) AS lifecycle_events,
|
||||
(SELECT COUNT(*) FROM market_intelligence) AS market_intel_entries,
|
||||
(SELECT COUNT(*) FROM crawler_llm_log) AS crawler_log_entries;
|
||||
Loading…
x
Reference in New Issue
Block a user