fix: crawler intelligence hot topics now reads market_intelligence table
- Fix trend_score → relevance_score (correct column name in news_articles) - Hot Topics source: market_intelligence table (not news_articles) → 15 items immediately available (capex_cycle, supply_chain, standards, etc.) - KB summary: grouped by intel_type with count + top_relevance + latest date - knowledge_base table ref → market_intelligence in /api/health status query - Hot topics cards: intel_type badge + buy_signal_implication + score % - Dashboard KB table: Intel Type / Items / Top Relevance / Latest columns
This commit is contained in:
parent
6a89b5468b
commit
e3b53343d4
192
packages/api/src/routes/scrapers.ts
Normal file
192
packages/api/src/routes/scrapers.ts
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
import { Router, Request, Response } from "express";
|
||||||
|
import { pool } from "../db/client";
|
||||||
|
import { readdirSync, statSync } from "fs";
|
||||||
|
import { join } from "path";
|
||||||
|
|
||||||
|
export const scraperRouter = Router();
|
||||||
|
|
||||||
|
// List of all known scrapers with metadata
|
||||||
|
const SCRAPERS = [
|
||||||
|
{ name: "fs-com", label: "FS.com", category: "vendor", url: "https://www.fs.com" },
|
||||||
|
{ name: "cisco-tmg", label: "Cisco TMG", category: "vendor", url: "https://tmg.cisco.com" },
|
||||||
|
{ name: "flexoptix-catalog", label: "Flexoptix Catalog", category: "vendor", url: "https://www.flexoptix.net" },
|
||||||
|
{ name: "flexoptix-vendors", label: "Flexoptix Vendors", category: "vendor", url: "https://www.flexoptix.net" },
|
||||||
|
{ name: "flexoptix-supported-vendors", label: "Flexoptix Supported", category: "vendor", url: "https://www.flexoptix.net" },
|
||||||
|
{ name: "champion-one", label: "Champion ONE", category: "vendor", url: "https://www.champione.com" },
|
||||||
|
{ name: "fluxlight", label: "Fluxlight", category: "vendor", url: "https://www.fluxlight.com" },
|
||||||
|
{ name: "gbics", label: "GBICS", category: "vendor", url: "https://www.gbics.com" },
|
||||||
|
{ name: "atgbics", label: "ATGBICS", category: "vendor", url: "https://www.atgbics.com" },
|
||||||
|
{ name: "blueoptics", label: "BlueOptics", category: "vendor", url: "https://www.blue-optics.net" },
|
||||||
|
{ name: "ascentoptics", label: "Ascent Optics", category: "vendor", url: "https://www.ascentoptics.com" },
|
||||||
|
{ name: "fiber24", label: "Fiber24", category: "vendor", url: "https://www.fiber24.de" },
|
||||||
|
{ name: "comms-express", label: "Comms Express", category: "vendor", url: "https://www.comms-express.com" },
|
||||||
|
{ name: "gaotek", label: "GaoTek", category: "vendor", url: "https://www.gaotek.com" },
|
||||||
|
{ name: "edgecore", label: "Edgecore", category: "vendor", url: "https://www.edge-core.com" },
|
||||||
|
{ name: "ebay-enricher", label: "eBay Prices", category: "pricing", url: "https://www.ebay.com" },
|
||||||
|
{ name: "ebay-velocity", label: "eBay Velocity", category: "pricing", url: "https://www.ebay.com" },
|
||||||
|
{ name: "distributor-leads", label: "Distributor Leads", category: "pricing", url: "internal" },
|
||||||
|
{ name: "community-issues", label: "Community Issues", category: "intelligence", url: "internal" },
|
||||||
|
{ name: "ai-clusters", label: "AI Clusters", category: "intelligence", url: "internal" },
|
||||||
|
{ name: "hot-topics", label: "Hot Topics", category: "intelligence", url: "internal" },
|
||||||
|
];
|
||||||
|
|
||||||
|
// GET /api/scrapers/status — Overview of all scrapers + DB stats
|
||||||
|
scraperRouter.get("/status", async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// DB counts per source
|
||||||
|
const sourceStats = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
vendor_slug as source,
|
||||||
|
COUNT(*) as count,
|
||||||
|
MAX(updated_at) as last_updated,
|
||||||
|
MIN(updated_at) as first_seen
|
||||||
|
FROM transceivers
|
||||||
|
WHERE vendor_slug IS NOT NULL
|
||||||
|
GROUP BY vendor_slug
|
||||||
|
ORDER BY count DESC
|
||||||
|
`).catch(() => ({ rows: [] }));
|
||||||
|
|
||||||
|
// Price data stats
|
||||||
|
const priceStats = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total_prices,
|
||||||
|
COUNT(DISTINCT part_number) as unique_parts,
|
||||||
|
MAX(scraped_at) as last_price_update,
|
||||||
|
AVG(price_eur) as avg_price_eur
|
||||||
|
FROM (
|
||||||
|
SELECT part_number, price_eur, scraped_at FROM competitor_prices
|
||||||
|
UNION ALL
|
||||||
|
SELECT part_number, price_eur, scraped_at FROM ebay_listings
|
||||||
|
) combined
|
||||||
|
`).catch(() => ({ rows: [{}] }));
|
||||||
|
|
||||||
|
// Overall DB stats
|
||||||
|
const dbStats = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
(SELECT COUNT(*) FROM transceivers) as transceivers,
|
||||||
|
(SELECT COUNT(*) FROM vendors) as vendors,
|
||||||
|
(SELECT COUNT(*) FROM switches) as switches,
|
||||||
|
(SELECT COUNT(*) FROM news_articles) as news_articles,
|
||||||
|
(SELECT COUNT(*) FROM market_intelligence) as knowledge_base_entries,
|
||||||
|
(SELECT COUNT(*) FROM competitor_prices) as competitor_prices,
|
||||||
|
(SELECT pg_size_pretty(pg_database_size(current_database()))) as db_size
|
||||||
|
`).catch(() => ({ rows: [{}] }));
|
||||||
|
|
||||||
|
// News / intelligence freshness
|
||||||
|
const newsFreshness = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
source,
|
||||||
|
COUNT(*) as count,
|
||||||
|
MAX(published_at) as latest
|
||||||
|
FROM news_articles
|
||||||
|
GROUP BY source
|
||||||
|
ORDER BY latest DESC
|
||||||
|
LIMIT 10
|
||||||
|
`).catch(() => ({ rows: [] }));
|
||||||
|
|
||||||
|
const sourceMap: Record<string, { count: number; last_updated: string; first_seen: string }> = {};
|
||||||
|
for (const row of sourceStats.rows) {
|
||||||
|
sourceMap[row.source] = {
|
||||||
|
count: Number(row.count),
|
||||||
|
last_updated: row.last_updated,
|
||||||
|
first_seen: row.first_seen,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const p = priceStats.rows[0] || {};
|
||||||
|
const d = dbStats.rows[0] || {};
|
||||||
|
|
||||||
|
const scraperStatus = SCRAPERS.map((s) => ({
|
||||||
|
...s,
|
||||||
|
records: sourceMap[s.name]?.count || 0,
|
||||||
|
lastRun: sourceMap[s.name]?.last_updated || null,
|
||||||
|
firstSeen: sourceMap[s.name]?.first_seen || null,
|
||||||
|
status: sourceMap[s.name]?.count > 0 ? "active" : "no-data",
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
scrapers: {
|
||||||
|
total: SCRAPERS.length,
|
||||||
|
active: scraperStatus.filter((s) => s.status === "active").length,
|
||||||
|
list: scraperStatus,
|
||||||
|
},
|
||||||
|
database: {
|
||||||
|
transceivers: Number(d.transceivers || 0),
|
||||||
|
vendors: Number(d.vendors || 0),
|
||||||
|
switches: Number(d.switches || 0),
|
||||||
|
news_articles: Number(d.news_articles || 0),
|
||||||
|
knowledge_base_entries: Number(d.knowledge_base_entries || 0),
|
||||||
|
competitor_prices: Number(d.competitor_prices || 0),
|
||||||
|
size: d.db_size || "unknown",
|
||||||
|
},
|
||||||
|
pricing: {
|
||||||
|
total_prices: Number(p.total_prices || 0),
|
||||||
|
unique_parts: Number(p.unique_parts || 0),
|
||||||
|
last_update: p.last_price_update || null,
|
||||||
|
avg_price_eur: p.avg_price_eur ? Math.round(Number(p.avg_price_eur) * 100) / 100 : null,
|
||||||
|
},
|
||||||
|
intelligence: {
|
||||||
|
news_sources: newsFreshness.rows.map((r: any) => ({
|
||||||
|
source: r.source,
|
||||||
|
count: Number(r.count),
|
||||||
|
latest: r.latest,
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
res.status(503).json({ success: false, error: String(err) });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// GET /api/scrapers/llm-insights — What the crawler LLM has learned
|
||||||
|
scraperRouter.get("/llm-insights", async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const [topics, kb, recentNews] = await Promise.all([
|
||||||
|
// Hot Topics: top market intelligence items by relevance
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
title,
|
||||||
|
summary,
|
||||||
|
relevance_score as trend_score,
|
||||||
|
source_name as source,
|
||||||
|
published_at,
|
||||||
|
intel_type as category,
|
||||||
|
technologies,
|
||||||
|
buy_signal_implication
|
||||||
|
FROM market_intelligence
|
||||||
|
WHERE relevance_score IS NOT NULL
|
||||||
|
ORDER BY relevance_score DESC, published_at DESC
|
||||||
|
LIMIT 20
|
||||||
|
`).catch(() => ({ rows: [] })),
|
||||||
|
// Knowledge Base: market intelligence grouped by type
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
intel_type as category,
|
||||||
|
COUNT(*) as count,
|
||||||
|
MAX(relevance_score) as top_relevance,
|
||||||
|
MAX(created_at) as latest
|
||||||
|
FROM market_intelligence
|
||||||
|
GROUP BY intel_type
|
||||||
|
ORDER BY top_relevance DESC NULLS LAST
|
||||||
|
`).catch(() => ({ rows: [] })),
|
||||||
|
// Recent News
|
||||||
|
pool.query(`
|
||||||
|
SELECT title, source, published_at, summary, relevance_score, category
|
||||||
|
FROM news_articles
|
||||||
|
ORDER BY published_at DESC
|
||||||
|
LIMIT 10
|
||||||
|
`).catch(() => ({ rows: [] })),
|
||||||
|
]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
hotTopics: topics.rows,
|
||||||
|
knowledgeBase: kb.rows,
|
||||||
|
recentNews: recentNews.rows,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
res.status(503).json({ success: false, error: String(err) });
|
||||||
|
}
|
||||||
|
});
|
||||||
@ -3582,34 +3582,53 @@ async function loadCrawlerStatus() {
|
|||||||
|
|
||||||
// LLM Insights — Hot Topics
|
// LLM Insights — Hot Topics
|
||||||
var topics = (insights && insights.hotTopics) || [];
|
var topics = (insights && insights.hotTopics) || [];
|
||||||
|
var buyColors = { bullish: 'var(--green)', bearish: '#ef4444', neutral: 'var(--text-dim)', opportunity: '#f59e0b' };
|
||||||
var topicsHtml = topics.length ? topics.map(function(t) {
|
var topicsHtml = topics.length ? topics.map(function(t) {
|
||||||
|
var scoreVal = t.trend_score != null ? Math.round(Number(t.trend_score) * 100) : null;
|
||||||
|
var buyColor = buyColors[t.buy_signal_implication] || 'var(--text-dim)';
|
||||||
return '<div style="background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:0.75rem;margin-bottom:0.5rem">'
|
return '<div style="background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:0.75rem;margin-bottom:0.5rem">'
|
||||||
+ '<div style="display:flex;justify-content:space-between;align-items:flex-start;gap:0.5rem">'
|
+ '<div style="display:flex;justify-content:space-between;align-items:flex-start;gap:0.5rem;flex-wrap:wrap">'
|
||||||
+ '<div style="font-weight:700;font-size:0.82rem;color:var(--text-bright);flex:1">' + esc(t.title || '') + '</div>'
|
+ '<div style="font-weight:700;font-size:0.82rem;color:var(--text-bright);flex:1;min-width:200px">' + esc(t.title || '') + '</div>'
|
||||||
+ (t.trend_score != null ? '<div style="font-size:0.7rem;background:rgba(59,130,246,0.15);color:var(--blue);border-radius:4px;padding:2px 6px;white-space:nowrap">Score: ' + t.trend_score + '</div>' : '')
|
+ '<div style="display:flex;gap:0.4rem;flex-shrink:0;flex-wrap:wrap">'
|
||||||
|
+ (t.category ? '<span style="font-size:0.68rem;background:rgba(99,102,241,0.15);color:#818cf8;border-radius:4px;padding:2px 6px">' + esc(t.category.replace(/_/g,' ')) + '</span>' : '')
|
||||||
|
+ (t.buy_signal_implication ? '<span style="font-size:0.68rem;background:rgba(0,0,0,0.2);color:' + buyColor + ';border-radius:4px;padding:2px 6px;font-weight:600">' + esc(t.buy_signal_implication) + '</span>' : '')
|
||||||
|
+ (scoreVal != null ? '<span style="font-size:0.68rem;background:rgba(59,130,246,0.15);color:var(--blue);border-radius:4px;padding:2px 6px">Score: ' + scoreVal + '%</span>' : '')
|
||||||
+ '</div>'
|
+ '</div>'
|
||||||
+ (t.summary ? '<div style="font-size:0.75rem;color:var(--text-dim);margin-top:4px;line-height:1.5">' + esc(t.summary.substring(0,200)) + (t.summary.length > 200 ? '…' : '') + '</div>' : '')
|
+ '</div>'
|
||||||
|
+ (t.summary ? '<div style="font-size:0.75rem;color:var(--text-dim);margin-top:4px;line-height:1.5">' + esc(t.summary.substring(0,220)) + (t.summary.length > 220 ? '…' : '') + '</div>' : '')
|
||||||
+ '<div style="font-size:0.68rem;color:var(--text-dim);margin-top:4px">' + esc(t.source || '') + (t.published_at ? ' · ' + new Date(t.published_at).toLocaleDateString('de-DE') : '') + '</div>'
|
+ '<div style="font-size:0.68rem;color:var(--text-dim);margin-top:4px">' + esc(t.source || '') + (t.published_at ? ' · ' + new Date(t.published_at).toLocaleDateString('de-DE') : '') + '</div>'
|
||||||
+ '</div>';
|
+ '</div>';
|
||||||
}).join('') : '<div style="color:var(--text-dim);padding:1rem">No LLM insights yet — run scrapers first.</div>';
|
}).join('') : '<div style="color:var(--text-dim);padding:1rem">No LLM insights yet — run scrapers first.</div>';
|
||||||
document.getElementById('cr-topics').innerHTML = topicsHtml;
|
document.getElementById('cr-topics').innerHTML = topicsHtml;
|
||||||
|
|
||||||
// Knowledge Base entries
|
// Knowledge Base entries — grouped by intel_type from market_intelligence
|
||||||
var kb = (insights && insights.knowledgeBase) || [];
|
var kb = (insights && insights.knowledgeBase) || [];
|
||||||
|
var typeLabels = {
|
||||||
|
capex_cycle: '📈 CapEx Cycle', supply_chain: '🏭 Supply Chain',
|
||||||
|
distributor_lead_time: '📦 Lead Times', standard_draft: '📋 Draft Standards',
|
||||||
|
standard_ratified: '✅ Ratified Standards', trade_show: '🎪 Trade Shows',
|
||||||
|
tender: '📑 Tenders', market_share: '📊 Market Share',
|
||||||
|
technology_launch: '🚀 Technology Launch', price_movement: '💶 Price Movement'
|
||||||
|
};
|
||||||
var kbHtml = kb.length ? '<table style="width:100%;border-collapse:collapse;font-size:0.78rem"><thead><tr style="background:var(--surface2)">'
|
var kbHtml = kb.length ? '<table style="width:100%;border-collapse:collapse;font-size:0.78rem"><thead><tr style="background:var(--surface2)">'
|
||||||
+ '<th style="padding:0.5rem;text-align:left;color:var(--text-dim)">Title</th>'
|
+ '<th style="padding:0.5rem;text-align:left;color:var(--text-dim)">Intelligence Type</th>'
|
||||||
+ '<th style="padding:0.5rem;text-align:left;color:var(--text-dim)">Category</th>'
|
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Items</th>'
|
||||||
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Confidence</th>'
|
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Top Relevance</th>'
|
||||||
|
+ '<th style="padding:0.5rem;text-align:right;color:var(--text-dim)">Latest</th>'
|
||||||
+ '</tr></thead><tbody>'
|
+ '</tr></thead><tbody>'
|
||||||
+ kb.map(function(k) {
|
+ kb.map(function(k) {
|
||||||
|
var label = typeLabels[k.category] || k.category || '—';
|
||||||
|
var relScore = k.top_relevance != null ? Math.round(Number(k.top_relevance) * 100) + '%' : '—';
|
||||||
|
var latest = k.latest ? new Date(k.latest).toLocaleDateString('de-DE') : '—';
|
||||||
return '<tr style="border-bottom:1px solid var(--border)">'
|
return '<tr style="border-bottom:1px solid var(--border)">'
|
||||||
+ '<td style="padding:0.5rem;color:var(--text-bright)">' + esc(k.title || '') + '</td>'
|
+ '<td style="padding:0.5rem;color:var(--text-bright)">' + esc(label) + '</td>'
|
||||||
+ '<td style="padding:0.5rem;color:var(--text-dim)">' + esc(k.category || '—') + '</td>'
|
+ '<td style="padding:0.5rem;text-align:right;color:var(--blue);font-weight:600">' + esc(String(k.count || 0)) + '</td>'
|
||||||
+ '<td style="padding:0.5rem;text-align:right;color:var(--blue)">' + (k.confidence_score != null ? k.confidence_score + '%' : '—') + '</td>'
|
+ '<td style="padding:0.5rem;text-align:right;color:var(--green)">' + relScore + '</td>'
|
||||||
|
+ '<td style="padding:0.5rem;text-align:right;color:var(--text-dim)">' + latest + '</td>'
|
||||||
+ '</tr>';
|
+ '</tr>';
|
||||||
}).join('')
|
}).join('')
|
||||||
+ '</tbody></table>'
|
+ '</tbody></table>'
|
||||||
: '<div style="color:var(--text-dim);padding:1rem">Knowledge base is empty — crawler LLM hasn\'t learned yet.</div>';
|
: '<div style="color:var(--text-dim);padding:1rem">No market intelligence data yet — scrapers running.</div>';
|
||||||
document.getElementById('cr-kb-entries').innerHTML = kbHtml;
|
document.getElementById('cr-kb-entries').innerHTML = kbHtml;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user