fix: crawler intelligence hot topics now reads market_intelligence table

- Fix trend_score → relevance_score (correct column name in news_articles)
- Hot Topics source: market_intelligence table (not news_articles)
  → 15 items immediately available (capex_cycle, supply_chain, standards, etc.)
- KB summary: grouped by intel_type with count + top_relevance + latest date
- knowledge_base table ref → market_intelligence in /api/health status query
- Hot topics cards: intel_type badge + buy_signal_implication + score %
- Dashboard KB table: Intel Type / Items / Top Relevance / Latest columns
This commit is contained in:
Rene Fichtmueller 2026-04-02 15:22:09 +02:00
parent 6a89b5468b
commit e3b53343d4
2 changed files with 223 additions and 12 deletions

View File

@ -0,0 +1,192 @@
import { Router, Request, Response } from "express";
import { pool } from "../db/client";
import { readdirSync, statSync } from "fs";
import { join } from "path";
export const scraperRouter = Router();
// List of all known scrapers with metadata
const SCRAPERS = [
{ name: "fs-com", label: "FS.com", category: "vendor", url: "https://www.fs.com" },
{ name: "cisco-tmg", label: "Cisco TMG", category: "vendor", url: "https://tmg.cisco.com" },
{ name: "flexoptix-catalog", label: "Flexoptix Catalog", category: "vendor", url: "https://www.flexoptix.net" },
{ name: "flexoptix-vendors", label: "Flexoptix Vendors", category: "vendor", url: "https://www.flexoptix.net" },
{ name: "flexoptix-supported-vendors", label: "Flexoptix Supported", category: "vendor", url: "https://www.flexoptix.net" },
{ name: "champion-one", label: "Champion ONE", category: "vendor", url: "https://www.champione.com" },
{ name: "fluxlight", label: "Fluxlight", category: "vendor", url: "https://www.fluxlight.com" },
{ name: "gbics", label: "GBICS", category: "vendor", url: "https://www.gbics.com" },
{ name: "atgbics", label: "ATGBICS", category: "vendor", url: "https://www.atgbics.com" },
{ name: "blueoptics", label: "BlueOptics", category: "vendor", url: "https://www.blue-optics.net" },
{ name: "ascentoptics", label: "Ascent Optics", category: "vendor", url: "https://www.ascentoptics.com" },
{ name: "fiber24", label: "Fiber24", category: "vendor", url: "https://www.fiber24.de" },
{ name: "comms-express", label: "Comms Express", category: "vendor", url: "https://www.comms-express.com" },
{ name: "gaotek", label: "GaoTek", category: "vendor", url: "https://www.gaotek.com" },
{ name: "edgecore", label: "Edgecore", category: "vendor", url: "https://www.edge-core.com" },
{ name: "ebay-enricher", label: "eBay Prices", category: "pricing", url: "https://www.ebay.com" },
{ name: "ebay-velocity", label: "eBay Velocity", category: "pricing", url: "https://www.ebay.com" },
{ name: "distributor-leads", label: "Distributor Leads", category: "pricing", url: "internal" },
{ name: "community-issues", label: "Community Issues", category: "intelligence", url: "internal" },
{ name: "ai-clusters", label: "AI Clusters", category: "intelligence", url: "internal" },
{ name: "hot-topics", label: "Hot Topics", category: "intelligence", url: "internal" },
];
// GET /api/scrapers/status — Overview of all scrapers + DB stats
scraperRouter.get("/status", async (_req: Request, res: Response) => {
try {
// DB counts per source
const sourceStats = await pool.query(`
SELECT
vendor_slug as source,
COUNT(*) as count,
MAX(updated_at) as last_updated,
MIN(updated_at) as first_seen
FROM transceivers
WHERE vendor_slug IS NOT NULL
GROUP BY vendor_slug
ORDER BY count DESC
`).catch(() => ({ rows: [] }));
// Price data stats
const priceStats = await pool.query(`
SELECT
COUNT(*) as total_prices,
COUNT(DISTINCT part_number) as unique_parts,
MAX(scraped_at) as last_price_update,
AVG(price_eur) as avg_price_eur
FROM (
SELECT part_number, price_eur, scraped_at FROM competitor_prices
UNION ALL
SELECT part_number, price_eur, scraped_at FROM ebay_listings
) combined
`).catch(() => ({ rows: [{}] }));
// Overall DB stats
const dbStats = await pool.query(`
SELECT
(SELECT COUNT(*) FROM transceivers) as transceivers,
(SELECT COUNT(*) FROM vendors) as vendors,
(SELECT COUNT(*) FROM switches) as switches,
(SELECT COUNT(*) FROM news_articles) as news_articles,
(SELECT COUNT(*) FROM market_intelligence) as knowledge_base_entries,
(SELECT COUNT(*) FROM competitor_prices) as competitor_prices,
(SELECT pg_size_pretty(pg_database_size(current_database()))) as db_size
`).catch(() => ({ rows: [{}] }));
// News / intelligence freshness
const newsFreshness = await pool.query(`
SELECT
source,
COUNT(*) as count,
MAX(published_at) as latest
FROM news_articles
GROUP BY source
ORDER BY latest DESC
LIMIT 10
`).catch(() => ({ rows: [] }));
const sourceMap: Record<string, { count: number; last_updated: string; first_seen: string }> = {};
for (const row of sourceStats.rows) {
sourceMap[row.source] = {
count: Number(row.count),
last_updated: row.last_updated,
first_seen: row.first_seen,
};
}
const p = priceStats.rows[0] || {};
const d = dbStats.rows[0] || {};
const scraperStatus = SCRAPERS.map((s) => ({
...s,
records: sourceMap[s.name]?.count || 0,
lastRun: sourceMap[s.name]?.last_updated || null,
firstSeen: sourceMap[s.name]?.first_seen || null,
status: sourceMap[s.name]?.count > 0 ? "active" : "no-data",
}));
res.json({
success: true,
timestamp: new Date().toISOString(),
scrapers: {
total: SCRAPERS.length,
active: scraperStatus.filter((s) => s.status === "active").length,
list: scraperStatus,
},
database: {
transceivers: Number(d.transceivers || 0),
vendors: Number(d.vendors || 0),
switches: Number(d.switches || 0),
news_articles: Number(d.news_articles || 0),
knowledge_base_entries: Number(d.knowledge_base_entries || 0),
competitor_prices: Number(d.competitor_prices || 0),
size: d.db_size || "unknown",
},
pricing: {
total_prices: Number(p.total_prices || 0),
unique_parts: Number(p.unique_parts || 0),
last_update: p.last_price_update || null,
avg_price_eur: p.avg_price_eur ? Math.round(Number(p.avg_price_eur) * 100) / 100 : null,
},
intelligence: {
news_sources: newsFreshness.rows.map((r: any) => ({
source: r.source,
count: Number(r.count),
latest: r.latest,
})),
},
});
} catch (err) {
res.status(503).json({ success: false, error: String(err) });
}
});
// GET /api/scrapers/llm-insights — What the crawler LLM has learned
scraperRouter.get("/llm-insights", async (_req: Request, res: Response) => {
try {
const [topics, kb, recentNews] = await Promise.all([
// Hot Topics: top market intelligence items by relevance
pool.query(`
SELECT
title,
summary,
relevance_score as trend_score,
source_name as source,
published_at,
intel_type as category,
technologies,
buy_signal_implication
FROM market_intelligence
WHERE relevance_score IS NOT NULL
ORDER BY relevance_score DESC, published_at DESC
LIMIT 20
`).catch(() => ({ rows: [] })),
// Knowledge Base: market intelligence grouped by type
pool.query(`
SELECT
intel_type as category,
COUNT(*) as count,
MAX(relevance_score) as top_relevance,
MAX(created_at) as latest
FROM market_intelligence
GROUP BY intel_type
ORDER BY top_relevance DESC NULLS LAST
`).catch(() => ({ rows: [] })),
// Recent News
pool.query(`
SELECT title, source, published_at, summary, relevance_score, category
FROM news_articles
ORDER BY published_at DESC
LIMIT 10
`).catch(() => ({ rows: [] })),
]);
res.json({
success: true,
hotTopics: topics.rows,
knowledgeBase: kb.rows,
recentNews: recentNews.rows,
});
} catch (err) {
res.status(503).json({ success: false, error: String(err) });
}
});

View File

@ -3582,34 +3582,53 @@ async function loadCrawlerStatus() {
// LLM Insights — Hot Topics
var topics = (insights && insights.hotTopics) || [];
var buyColors = { bullish: 'var(--green)', bearish: '#ef4444', neutral: 'var(--text-dim)', opportunity: '#f59e0b' };
var topicsHtml = topics.length ? topics.map(function(t) {
var scoreVal = t.trend_score != null ? Math.round(Number(t.trend_score) * 100) : null;
var buyColor = buyColors[t.buy_signal_implication] || 'var(--text-dim)';
return '<div style="background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:0.75rem;margin-bottom:0.5rem">'
+ '<div style="display:flex;justify-content:space-between;align-items:flex-start;gap:0.5rem">'
+ '<div style="font-weight:700;font-size:0.82rem;color:var(--text-bright);flex:1">' + esc(t.title || '') + '</div>'
+ (t.trend_score != null ? '<div style="font-size:0.7rem;background:rgba(59,130,246,0.15);color:var(--blue);border-radius:4px;padding:2px 6px;white-space:nowrap">Score: ' + t.trend_score + '</div>' : '')
+ '<div style="display:flex;justify-content:space-between;align-items:flex-start;gap:0.5rem;flex-wrap:wrap">'
+ '<div style="font-weight:700;font-size:0.82rem;color:var(--text-bright);flex:1;min-width:200px">' + esc(t.title || '') + '</div>'
+ '<div style="display:flex;gap:0.4rem;flex-shrink:0;flex-wrap:wrap">'
+ (t.category ? '<span style="font-size:0.68rem;background:rgba(99,102,241,0.15);color:#818cf8;border-radius:4px;padding:2px 6px">' + esc(t.category.replace(/_/g,' ')) + '</span>' : '')
+ (t.buy_signal_implication ? '<span style="font-size:0.68rem;background:rgba(0,0,0,0.2);color:' + buyColor + ';border-radius:4px;padding:2px 6px;font-weight:600">' + esc(t.buy_signal_implication) + '</span>' : '')
+ (scoreVal != null ? '<span style="font-size:0.68rem;background:rgba(59,130,246,0.15);color:var(--blue);border-radius:4px;padding:2px 6px">Score: ' + scoreVal + '%</span>' : '')
+ '</div>'
+ (t.summary ? '<div style="font-size:0.75rem;color:var(--text-dim);margin-top:4px;line-height:1.5">' + esc(t.summary.substring(0,200)) + (t.summary.length > 200 ? '…' : '') + '</div>' : '')
+ '</div>'
+ (t.summary ? '<div style="font-size:0.75rem;color:var(--text-dim);margin-top:4px;line-height:1.5">' + esc(t.summary.substring(0,220)) + (t.summary.length > 220 ? '…' : '') + '</div>' : '')
+ '<div style="font-size:0.68rem;color:var(--text-dim);margin-top:4px">' + esc(t.source || '') + (t.published_at ? ' · ' + new Date(t.published_at).toLocaleDateString('de-DE') : '') + '</div>'
+ '</div>';
}).join('') : '<div style="color:var(--text-dim);padding:1rem">No LLM insights yet — run scrapers first.</div>';
document.getElementById('cr-topics').innerHTML = topicsHtml;
// Knowledge Base entries
// Knowledge Base entries — grouped by intel_type from market_intelligence
var kb = (insights && insights.knowledgeBase) || [];
var typeLabels = {
capex_cycle: '📈 CapEx Cycle', supply_chain: '🏭 Supply Chain',
distributor_lead_time: '📦 Lead Times', standard_draft: '📋 Draft Standards',
standard_ratified: '✅ Ratified Standards', trade_show: '🎪 Trade Shows',
tender: '📑 Tenders', market_share: '📊 Market Share',
technology_launch: '🚀 Technology Launch', price_movement: '💶 Price Movement'
};
var kbHtml = kb.length ? '<table style="width:100%;border-collapse:collapse;font-size:0.78rem"><thead><tr style="background:var(--surface2)">'
+ '<th style="padding:0.5rem;text-align:left;color:var(--text-dim)">Title</th>'
+ '<th style="padding:0.5rem;text-align:left;color:var(--text-dim)">Category</th>'
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Confidence</th>'
+ '<th style="padding:0.5rem;text-align:left;color:var(--text-dim)">Intelligence Type</th>'
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Items</th>'
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Top Relevance</th>'
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Latest</th>'
+ '</tr></thead><tbody>'
+ kb.map(function(k) {
var label = typeLabels[k.category] || k.category || '—';
var relScore = k.top_relevance != null ? Math.round(Number(k.top_relevance) * 100) + '%' : '—';
var latest = k.latest ? new Date(k.latest).toLocaleDateString('de-DE') : '—';
return '<tr style="border-bottom:1px solid var(--border)">'
+ '<td style="padding:0.5rem;color:var(--text-bright)">' + esc(k.title || '') + '</td>'
+ '<td style="padding:0.5rem;color:var(--text-dim)">' + esc(k.category || '—') + '</td>'
+ '<td style="padding:0.5rem;text-align:right;color:var(--blue)">' + (k.confidence_score != null ? k.confidence_score + '%' : '—') + '</td>'
+ '<td style="padding:0.5rem;color:var(--text-bright)">' + esc(label) + '</td>'
+ '<td style="padding:0.5rem;text-align:right;color:var(--blue);font-weight:600">' + esc(String(k.count || 0)) + '</td>'
+ '<td style="padding:0.5rem;text-align:right;color:var(--green)">' + relScore + '</td>'
+ '<td style="padding:0.5rem;text-align:right;color:var(--text-dim)">' + latest + '</td>'
+ '</tr>';
}).join('')
+ '</tbody></table>'
: '<div style="color:var(--text-dim);padding:1rem">Knowledge base is empty — crawler LLM hasn\'t learned yet.</div>';
: '<div style="color:var(--text-dim);padding:1rem">No market intelligence data yet — scrapers running.</div>';
document.getElementById('cr-kb-entries').innerHTML = kbHtml;
}