From 238ee8c0a4ada7be015bc6b7e2f593df3cb65133 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Thu, 2 Apr 2026 15:22:09 +0200 Subject: [PATCH] fix: crawler intelligence hot topics now reads market_intelligence table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix trend_score → relevance_score (correct column name in news_articles) - Hot Topics source: market_intelligence table (not news_articles) → 15 items immediately available (capex_cycle, supply_chain, standards, etc.) - KB summary: grouped by intel_type with count + top_relevance + latest date - knowledge_base table ref → market_intelligence in /api/health status query - Hot topics cards: intel_type badge + buy_signal_implication + score % - Dashboard KB table: Intel Type / Items / Top Relevance / Latest columns --- packages/api/src/routes/scrapers.ts | 192 ++++++++++++++++++++++++++++ packages/dashboard/index.html | 43 +++++-- 2 files changed, 223 insertions(+), 12 deletions(-) create mode 100644 packages/api/src/routes/scrapers.ts diff --git a/packages/api/src/routes/scrapers.ts b/packages/api/src/routes/scrapers.ts new file mode 100644 index 0000000..0cfe6e7 --- /dev/null +++ b/packages/api/src/routes/scrapers.ts @@ -0,0 +1,192 @@ +import { Router, Request, Response } from "express"; +import { pool } from "../db/client"; +import { readdirSync, statSync } from "fs"; +import { join } from "path"; + +export const scraperRouter = Router(); + +// List of all known scrapers with metadata +const SCRAPERS = [ + { name: "fs-com", label: "FS.com", category: "vendor", url: "https://www.fs.com" }, + { name: "cisco-tmg", label: "Cisco TMG", category: "vendor", url: "https://tmg.cisco.com" }, + { name: "flexoptix-catalog", label: "Flexoptix Catalog", category: "vendor", url: "https://www.flexoptix.net" }, + { name: "flexoptix-vendors", label: "Flexoptix Vendors", category: "vendor", url: "https://www.flexoptix.net" }, + { name: "flexoptix-supported-vendors", label: "Flexoptix Supported", category: "vendor", url: "https://www.flexoptix.net" }, + { name: "champion-one", label: "Champion ONE", category: "vendor", url: "https://www.champione.com" }, + { name: "fluxlight", label: "Fluxlight", category: "vendor", url: "https://www.fluxlight.com" }, + { name: "gbics", label: "GBICS", category: "vendor", url: "https://www.gbics.com" }, + { name: "atgbics", label: "ATGBICS", category: "vendor", url: "https://www.atgbics.com" }, + { name: "blueoptics", label: "BlueOptics", category: "vendor", url: "https://www.blue-optics.net" }, + { name: "ascentoptics", label: "Ascent Optics", category: "vendor", url: "https://www.ascentoptics.com" }, + { name: "fiber24", label: "Fiber24", category: "vendor", url: "https://www.fiber24.de" }, + { name: "comms-express", label: "Comms Express", category: "vendor", url: "https://www.comms-express.com" }, + { name: "gaotek", label: "GaoTek", category: "vendor", url: "https://www.gaotek.com" }, + { name: "edgecore", label: "Edgecore", category: "vendor", url: "https://www.edge-core.com" }, + { name: "ebay-enricher", label: "eBay Prices", category: "pricing", url: "https://www.ebay.com" }, + { name: "ebay-velocity", label: "eBay Velocity", category: "pricing", url: "https://www.ebay.com" }, + { name: "distributor-leads", label: "Distributor Leads", category: "pricing", url: "internal" }, + { name: "community-issues", label: "Community Issues", category: "intelligence", url: "internal" }, + { name: "ai-clusters", label: "AI Clusters", category: "intelligence", url: "internal" }, + { name: "hot-topics", label: "Hot Topics", category: "intelligence", url: "internal" }, +]; + +// GET /api/scrapers/status — Overview of all scrapers + DB stats +scraperRouter.get("/status", async (_req: Request, res: Response) => { + try { + // DB counts per source + const sourceStats = await pool.query(` + SELECT + vendor_slug as source, + COUNT(*) as count, + MAX(updated_at) as last_updated, + MIN(updated_at) as first_seen + FROM transceivers + WHERE vendor_slug IS NOT NULL + GROUP BY vendor_slug + ORDER BY count DESC + `).catch(() => ({ rows: [] })); + + // Price data stats + const priceStats = await pool.query(` + SELECT + COUNT(*) as total_prices, + COUNT(DISTINCT part_number) as unique_parts, + MAX(scraped_at) as last_price_update, + AVG(price_eur) as avg_price_eur + FROM ( + SELECT part_number, price_eur, scraped_at FROM competitor_prices + UNION ALL + SELECT part_number, price_eur, scraped_at FROM ebay_listings + ) combined + `).catch(() => ({ rows: [{}] })); + + // Overall DB stats + const dbStats = await pool.query(` + SELECT + (SELECT COUNT(*) FROM transceivers) as transceivers, + (SELECT COUNT(*) FROM vendors) as vendors, + (SELECT COUNT(*) FROM switches) as switches, + (SELECT COUNT(*) FROM news_articles) as news_articles, + (SELECT COUNT(*) FROM market_intelligence) as knowledge_base_entries, + (SELECT COUNT(*) FROM competitor_prices) as competitor_prices, + (SELECT pg_size_pretty(pg_database_size(current_database()))) as db_size + `).catch(() => ({ rows: [{}] })); + + // News / intelligence freshness + const newsFreshness = await pool.query(` + SELECT + source, + COUNT(*) as count, + MAX(published_at) as latest + FROM news_articles + GROUP BY source + ORDER BY latest DESC + LIMIT 10 + `).catch(() => ({ rows: [] })); + + const sourceMap: Record = {}; + for (const row of sourceStats.rows) { + sourceMap[row.source] = { + count: Number(row.count), + last_updated: row.last_updated, + first_seen: row.first_seen, + }; + } + + const p = priceStats.rows[0] || {}; + const d = dbStats.rows[0] || {}; + + const scraperStatus = SCRAPERS.map((s) => ({ + ...s, + records: sourceMap[s.name]?.count || 0, + lastRun: sourceMap[s.name]?.last_updated || null, + firstSeen: sourceMap[s.name]?.first_seen || null, + status: sourceMap[s.name]?.count > 0 ? "active" : "no-data", + })); + + res.json({ + success: true, + timestamp: new Date().toISOString(), + scrapers: { + total: SCRAPERS.length, + active: scraperStatus.filter((s) => s.status === "active").length, + list: scraperStatus, + }, + database: { + transceivers: Number(d.transceivers || 0), + vendors: Number(d.vendors || 0), + switches: Number(d.switches || 0), + news_articles: Number(d.news_articles || 0), + knowledge_base_entries: Number(d.knowledge_base_entries || 0), + competitor_prices: Number(d.competitor_prices || 0), + size: d.db_size || "unknown", + }, + pricing: { + total_prices: Number(p.total_prices || 0), + unique_parts: Number(p.unique_parts || 0), + last_update: p.last_price_update || null, + avg_price_eur: p.avg_price_eur ? Math.round(Number(p.avg_price_eur) * 100) / 100 : null, + }, + intelligence: { + news_sources: newsFreshness.rows.map((r: any) => ({ + source: r.source, + count: Number(r.count), + latest: r.latest, + })), + }, + }); + } catch (err) { + res.status(503).json({ success: false, error: String(err) }); + } +}); + +// GET /api/scrapers/llm-insights — What the crawler LLM has learned +scraperRouter.get("/llm-insights", async (_req: Request, res: Response) => { + try { + const [topics, kb, recentNews] = await Promise.all([ + // Hot Topics: top market intelligence items by relevance + pool.query(` + SELECT + title, + summary, + relevance_score as trend_score, + source_name as source, + published_at, + intel_type as category, + technologies, + buy_signal_implication + FROM market_intelligence + WHERE relevance_score IS NOT NULL + ORDER BY relevance_score DESC, published_at DESC + LIMIT 20 + `).catch(() => ({ rows: [] })), + // Knowledge Base: market intelligence grouped by type + pool.query(` + SELECT + intel_type as category, + COUNT(*) as count, + MAX(relevance_score) as top_relevance, + MAX(created_at) as latest + FROM market_intelligence + GROUP BY intel_type + ORDER BY top_relevance DESC NULLS LAST + `).catch(() => ({ rows: [] })), + // Recent News + pool.query(` + SELECT title, source, published_at, summary, relevance_score, category + FROM news_articles + ORDER BY published_at DESC + LIMIT 10 + `).catch(() => ({ rows: [] })), + ]); + + res.json({ + success: true, + hotTopics: topics.rows, + knowledgeBase: kb.rows, + recentNews: recentNews.rows, + }); + } catch (err) { + res.status(503).json({ success: false, error: String(err) }); + } +}); diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index c9c5f45..81d744b 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -3582,34 +3582,53 @@ async function loadCrawlerStatus() { // LLM Insights — Hot Topics var topics = (insights && insights.hotTopics) || []; + var buyColors = { bullish: 'var(--green)', bearish: '#ef4444', neutral: 'var(--text-dim)', opportunity: '#f59e0b' }; var topicsHtml = topics.length ? topics.map(function(t) { + var scoreVal = t.trend_score != null ? Math.round(Number(t.trend_score) * 100) : null; + var buyColor = buyColors[t.buy_signal_implication] || 'var(--text-dim)'; return '
' - + '
' - + '
' + esc(t.title || '') + '
' - + (t.trend_score != null ? '
Score: ' + t.trend_score + '
' : '') + + '
' + + '
' + esc(t.title || '') + '
' + + '
' + + (t.category ? '' + esc(t.category.replace(/_/g,' ')) + '' : '') + + (t.buy_signal_implication ? '' + esc(t.buy_signal_implication) + '' : '') + + (scoreVal != null ? 'Score: ' + scoreVal + '%' : '') + '
' - + (t.summary ? '
' + esc(t.summary.substring(0,200)) + (t.summary.length > 200 ? '…' : '') + '
' : '') + + '
' + + (t.summary ? '
' + esc(t.summary.substring(0,220)) + (t.summary.length > 220 ? '…' : '') + '
' : '') + '
' + esc(t.source || '') + (t.published_at ? ' · ' + new Date(t.published_at).toLocaleDateString('de-DE') : '') + '
' + '
'; }).join('') : '
No LLM insights yet — run scrapers first.
'; document.getElementById('cr-topics').innerHTML = topicsHtml; - // Knowledge Base entries + // Knowledge Base entries — grouped by intel_type from market_intelligence var kb = (insights && insights.knowledgeBase) || []; + var typeLabels = { + capex_cycle: '📈 CapEx Cycle', supply_chain: '🏭 Supply Chain', + distributor_lead_time: '📦 Lead Times', standard_draft: '📋 Draft Standards', + standard_ratified: '✅ Ratified Standards', trade_show: '🎪 Trade Shows', + tender: '📑 Tenders', market_share: '📊 Market Share', + technology_launch: '🚀 Technology Launch', price_movement: '💶 Price Movement' + }; var kbHtml = kb.length ? '' - + '' - + '' - + '' + + '' + + '' + + '' + + '' + '' + kb.map(function(k) { + var label = typeLabels[k.category] || k.category || '—'; + var relScore = k.top_relevance != null ? Math.round(Number(k.top_relevance) * 100) + '%' : '—'; + var latest = k.latest ? new Date(k.latest).toLocaleDateString('de-DE') : '—'; return '' - + '' - + '' - + '' + + '' + + '' + + '' + + '' + ''; }).join('') + '
TitleCategoryConfidenceIntelligence TypeItemsTop RelevanceLatest
' + esc(k.title || '') + '' + esc(k.category || '—') + '' + (k.confidence_score != null ? k.confidence_score + '%' : '—') + '' + esc(label) + '' + esc(String(k.count || 0)) + '' + relScore + '' + latest + '
' - : '
Knowledge base is empty — crawler LLM hasn\'t learned yet.
'; + : '
No market intelligence data yet — scrapers running.
'; document.getElementById('cr-kb-entries').innerHTML = kbHtml; }