From 4bd16af9a51e8dec06dcc60d065620e8fd23aeec Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Thu, 14 May 2026 16:22:25 +0200 Subject: [PATCH] feat: data quality panel in Crawler Intelligence tab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GET /api/scrapers/data-quality β€” 4 parallel queries across 200k+ transceiver_verification_evidence rows. Returns: coverage percentages (price 62%, image 68%, details 94%, competitor 2%), all 10 evidence types with counts + avg confidence, 17 robot/scraper contributions, 14-day daily activity time series. Dashboard: coverage progress bars (color-coded thresholds), evidence type table, SVG activity sparkline, robot contributions table. --- CHANGELOG_PENDING.md | 1 + packages/api/src/routes/scrapers.ts | 82 +++++++++++++++ packages/dashboard/index.html | 152 +++++++++++++++++++++++++++- 3 files changed, 233 insertions(+), 2 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 4f8f3f0..ef2a2ad 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -1,6 +1,7 @@ # TIP Changelog Format: `{"d":"YYYY-MM-DD","t":"TYPE","m":"Description"}` +{"d":"2026-05-14","t":"FEAT","m":"Crawler Intelligence: Data Quality panel. New GET /api/scrapers/data-quality endpoint β€” 4 parallel queries over 200,617 transceiver_verification_evidence rows: (1) coverage breakdown (price 11,366/18,146 = 62%, image 12,333/68%, details 17,085/94%, competitor_match 399/2%, quarantined 1,193); (2) all 10 evidence types with count + avg confidence + product count + last seen; (3) robot/scraper contributions table (17 robots ranked by output); (4) daily activity last 14 days. Dashboard Crawler Intelligence tab: new πŸ”¬ Data Quality section with coverage progress bars (color-coded β‰₯80% green / β‰₯50% amber / red), evidence type table, SVG sparkline bar chart for 14-day activity, robot contributions table with live/stale dot indicators."} {"d":"2026-05-14","t":"FEAT","m":"Dynamic Hype Cycle + Market Signal Engine: Hype Cycle tab is now fully data-driven. New GET /api/hype-cycle/market-signals endpoint blends 6 real data sources into a composite Market Signal Score (0–100) per technology: (1) hype_score from Norton-Bass model (30% weight), (2) hyperscaler CapEx YoY avg (Microsoft +68.8%, Alphabet +107.4%, Meta +46.8%), (3) price observation activity ratio 30d vs prior 30d, (4) AI cluster estimated transceiver demand (90d window), (5) eBay secondary market sell-through velocity, (6) internal fast-mover demand trend. Score thresholds: β‰₯70 green, β‰₯50 yellow, β‰₯30 orange, <30 gray. Recommendation engine: buildRecommendation(phase, signalScore, capexYoyAvg, speedGbps) maps hype phase Γ— capex boom Γ— speed class β†’ Buy/Hold/Watch label with color + detail tooltip. Dashboard: Hype Cycle table shows Market Signal ● LIVE column (score + progress bar) + Recommendation column (emoji label, tooltip with reasoning). Market Context cards row above table shows Top Signal, CapEx Boom %, Fast Movers signal, eBay Velocity. New Hyperscaler CapEx panel (SEC filing data) + eBay Secondary Market panel at bottom of hype tab. Procurement: new πŸ›’ eBay Market sub-section with per-form-factor sell-through grid. All 6 queries run in parallel via Promise.all()."} {"d":"2026-05-14","t":"FEAT","m":"Procurement tab: 2 new sections with real data. (1) πŸ“¦ Internal Demand β€” Flexoptix internal SKU velocity from flexoptix_internal_demand table (8,585 SKUs: 70 fast-movers 53k units/12M, 239 regular, 979 slow, 7,297 dead stock). Summary cards with trend %%. Filter by velocity class. API: GET /api/procurement/internal-demand?velocity_class=&limit=&sort=. (2) πŸ€– AI Clusters β€” live AI datacenter announcements from ai_cluster_announcements table (396 in last 30 days). Shows estimated transceiver demand per build, MW scale, company, location, source link. Filter for entries with transceiver estimates. Stats: total announcements, MW, distinct companies, total estimated transceivers. API: GET /api/procurement/ai-clusters?days=&limit=. Replaced misleading DEMO DATA banners on Signals + ABC sections with informational note pointing to Internal Demand data."} diff --git a/packages/api/src/routes/scrapers.ts b/packages/api/src/routes/scrapers.ts index f03a56d..7f2816a 100644 --- a/packages/api/src/routes/scrapers.ts +++ b/packages/api/src/routes/scrapers.ts @@ -238,3 +238,85 @@ scraperRouter.get("/llm-insights", async (_req: Request, res: Response) => { res.status(503).json({ success: false, error: String(err) }); } }); + +// GET /api/scrapers/data-quality β€” Verification evidence coverage + quality metrics +scraperRouter.get("/data-quality", async (_req: Request, res: Response) => { + try { + const [coverageRows, evidenceTypes, robotActivity, dailyActivity] = await Promise.all([ + // Coverage: how many transceivers have each evidence type + pool.query(` + SELECT + COUNT(DISTINCT t.id)::int AS total_transceivers, + COUNT(DISTINCT CASE WHEN e.verification_type = 'price' THEN t.id END)::int AS have_price, + COUNT(DISTINCT CASE WHEN e.verification_type = 'image' THEN t.id END)::int AS have_image, + COUNT(DISTINCT CASE WHEN e.verification_type = 'details' THEN t.id END)::int AS have_details, + COUNT(DISTINCT CASE WHEN e.verification_type = 'competitor_match' THEN t.id END)::int AS have_competitor, + COUNT(DISTINCT CASE WHEN e.verification_type = 'artifact_quarantine' THEN t.id END)::int AS quarantined + FROM transceivers t + LEFT JOIN transceiver_verification_evidence e ON e.transceiver_id = t.id + `), + // Evidence type breakdown + pool.query(` + SELECT + verification_type, + COUNT(*)::int AS cnt, + ROUND(AVG(confidence)::numeric, 3) AS avg_confidence, + COUNT(DISTINCT transceiver_id)::int AS distinct_tx, + COUNT(DISTINCT robot_name) AS robot_count, + MAX(created_at) AS last_seen + FROM transceiver_verification_evidence + GROUP BY verification_type + ORDER BY cnt DESC + `), + // Robot / scraper activity + pool.query(` + SELECT + robot_name, + COUNT(*)::int AS total_evidence, + COUNT(DISTINCT transceiver_id)::int AS transceivers_covered, + COUNT(DISTINCT verification_type) AS types_covered, + MIN(created_at)::date AS first_run, + MAX(created_at)::date AS last_run + FROM transceiver_verification_evidence + GROUP BY robot_name + ORDER BY total_evidence DESC + LIMIT 20 + `), + // Daily activity last 14 days + pool.query(` + SELECT + created_at::date AS day, + COUNT(*)::int AS evidence_added, + COUNT(DISTINCT transceiver_id)::int AS transceivers_processed + FROM transceiver_verification_evidence + WHERE created_at >= NOW() - INTERVAL '14 days' + GROUP BY day + ORDER BY day DESC + `), + ]); + + const cov = coverageRows.rows[0]; + const total = cov.total_transceivers || 1; + + res.json({ + success: true, + coverage: { + total: cov.total_transceivers, + price: cov.have_price, + image: cov.have_image, + details: cov.have_details, + competitor: cov.have_competitor, + quarantined: cov.quarantined, + pricePct: Math.round((cov.have_price / total) * 100), + imagePct: Math.round((cov.have_image / total) * 100), + detailsPct: Math.round((cov.have_details / total) * 100), + competitorPct: Math.round((cov.have_competitor / total) * 100), + }, + evidenceTypes: evidenceTypes.rows, + robotActivity: robotActivity.rows, + dailyActivity: dailyActivity.rows, + }); + } catch (err) { + res.status(503).json({ success: false, error: String(err) }); + } +}); diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index 74899cd..768703a 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -1795,7 +1795,7 @@ -
+

πŸ”₯ LLM Hot Topics

Loading…
@@ -1805,6 +1805,15 @@
Loading…
+ + +
+
+

πŸ”¬ Data Quality & Verification Coverage

+ +
+
Loading…
+
@@ -7540,7 +7549,8 @@ async function startSelflearningTrain(lane, provider, seedOnly) { // ── CRAWLER INTELLIGENCE ──────────────────────────────────────────── async function loadCrawlerStatus() { - loadCrawlerJobs(); // load live job queue in parallel + loadCrawlerJobs(); // load live job queue in parallel + loadDataQuality(); // load verification evidence quality panel in parallel var token = (window.loadToken ? window.loadToken() : '') || ''; var status = null; var insights = null; @@ -7731,6 +7741,144 @@ async function loadCrawlerJobs() { } } +/* ── Data Quality (Verification Evidence) ──────────────────────────────── */ +async function loadDataQuality() { + var token = (window.loadToken ? window.loadToken() : '') || ''; + var el = document.getElementById('cr-data-quality'); + if (!el) return; + try { + var r = await fetch('/api/scrapers/data-quality', { headers: { 'Authorization': 'Bearer ' + token } }); + var d = await r.json(); + if (!d.success) throw new Error(d.error || 'API error'); + el.innerHTML = renderDataQuality(d); + } catch(e) { + el.innerHTML = '
Error loading data quality: ' + esc(e.message) + '
'; + } +} + +function renderDataQuality(d) { + var cov = d.coverage || {}; + var total = cov.total || 1; + + // Coverage bars + var bars = [ + { label: 'Details / Spec', key: 'detailsPct', count: cov.details, pct: cov.detailsPct, color: '#6366f1' }, + { label: 'Image', key: 'imagePct', count: cov.image, pct: cov.imagePct, color: '#3b82f6' }, + { label: 'Price', key: 'pricePct', count: cov.price, pct: cov.pricePct, color: '#22c55e' }, + { label: 'Competitor Match', key: 'competitorPct', count: cov.competitor, pct: cov.competitorPct, color: '#f59e0b' }, + ]; + + var coverageHtml = '
' + + '
' + + 'Coverage Overview' + + '' + (total).toLocaleString() + ' total transceivers' + + (cov.quarantined ? ' Β· ' + cov.quarantined.toLocaleString() + ' quarantined' : '') + + '' + + '
' + + bars.map(function(b) { + var pct = b.pct || 0; + var bgColor = pct >= 80 ? 'rgba(34,197,94,0.08)' : pct >= 50 ? 'rgba(245,158,11,0.08)' : 'rgba(239,68,68,0.08)'; + return '
' + + '
' + + '' + esc(b.label) + '' + + '' + + (b.count || 0).toLocaleString() + ' / ' + total.toLocaleString() + + ' Β· ' + pct + '%' + + '' + + '
' + + '
' + + '
' + + '
'; + }).join('') + + '
'; + + // Evidence type table + var typeIcons = { + price: 'πŸ’Ά', price_unavailable: 'πŸ’ΆβŒ', image: 'πŸ–Ό', image_unavailable: 'πŸ–ΌβŒ', + details: 'πŸ“‹', details_unavailable: 'πŸ“‹βŒ', competitor_match: 'βœ…', + competitor_no_match: '❌', competitor_ambiguous: '⚠️', artifact_quarantine: '🚫' + }; + var types = d.evidenceTypes || []; + var evidenceHtml = '
' + + '
Evidence Type Breakdown
' + + '' + + '' + + '' + + '' + + '' + + '' + + '' + + '' + + types.map(function(t, i) { + var icon = typeIcons[t.verification_type] || '●'; + var conf = t.avg_confidence != null ? Math.round(Number(t.avg_confidence) * 100) + '%' : 'β€”'; + var confColor = t.avg_confidence >= 0.95 ? '#22c55e' : t.avg_confidence >= 0.8 ? '#f59e0b' : '#ef4444'; + var last = t.last_seen ? new Date(t.last_seen).toLocaleDateString('de-DE') : 'β€”'; + var stripe = i % 2 === 1 ? 'background:var(--surface2)' : ''; + return '' + + '' + + '' + + '' + + '' + + '' + + ''; + }).join('') + + '
TypeEvidenceProductsAvg ConfLast Seen
' + icon + ' ' + esc(t.verification_type.replace(/_/g,' ')) + '' + (t.cnt||0).toLocaleString() + '' + (t.distinct_tx||0).toLocaleString() + '' + conf + '' + last + '
'; + + // Daily activity sparkline + var days = (d.dailyActivity || []).slice().reverse(); // oldest first + var maxActivity = Math.max.apply(null, days.map(function(x) { return x.evidence_added || 0; })) || 1; + var sparkH = 50; + var sparkW = Math.max(days.length * 22, 200); + var sparkBars = days.map(function(x, i) { + var h = Math.max(2, Math.round((x.evidence_added / maxActivity) * sparkH)); + var dateStr = x.day; + var label = x.evidence_added.toLocaleString() + ' evidence\n' + (x.transceivers_processed||0).toLocaleString() + ' products\n' + dateStr; + var barColor = x.evidence_added > 10000 ? '#6366f1' : x.evidence_added > 1000 ? '#3b82f6' : x.evidence_added > 100 ? '#22c55e' : '#64748b'; + return ''; + }).join(''); + var sparkSvg = '' + sparkBars + ''; + + var activityHtml = '
' + + '
Daily Activity (last 14 days)
' + + '
' + sparkSvg + '
' + + '
Hover bars for details. Purple = >10k, Blue = >1k, Green = >100, Gray = low activity.
' + + '
'; + + // Robot table + var robots = (d.robotActivity || []); + var robotHtml = '
' + + '
Scraper / Robot Contributions
' + + '' + + '' + + '' + + '' + + '' + + '' + + '' + + '' + + robots.map(function(r, i) { + var stripe = i % 2 === 1 ? 'background:rgba(255,255,255,0.02)' : ''; + var isActive = r.last_run === new Date().toISOString().slice(0,10); + var dotColor = isActive ? '#22c55e' : '#64748b'; + return '' + + '' + + '' + + '' + + '' + + '' + + ''; + }).join('') + + '
RobotEvidenceProductsTypesLast Run
' + + '' + + esc(r.robot_name) + '' + (r.total_evidence||0).toLocaleString() + '' + (r.transceivers_covered||0).toLocaleString() + '' + (r.types_covered||0) + '' + esc(r.last_run || 'β€”') + '
'; + + return coverageHtml + '
' + + '
' + evidenceHtml + activityHtml + '
' + + '
' + robotHtml + '
' + + '
'; +} + /* ── Smart Tooltips ─────────────────────────────────────────────────────── */ function initSmartTooltips() { var tip = document.createElement('div');