Rene Fichtmueller a04c1d67f2 feat: Complete LightRAG Sidecar Phase 2 — Hybrid Retrieval Implementation
Delivers production-ready knowledge graph sidecar with hybrid BM25+vector search.

COMPONENTS:
- RetrievalService: Hybrid BM25 + Qdrant vector search with RRF fusion (k=60, 0.4/0.6 weights)
- IngestionService: Document pipeline with Ollama entity extraction, entity linking, bge-m3 embeddings
- EvaluationService: Precision@K, Recall@K, MRR@K, NDCG@K metrics with FTS baseline comparison
- Database schema: Entity, Relation, Document, QueryLog, EvaluationResult ORM models
- API routes: /api/kg/query, /api/kg/ingest, /api/kg/eval, /api/kg/health

INFRASTRUCTURE:
- FastAPI 0.104 async server on port 3140
- PostgreSQL 17 + pgvector for knowledge graph storage
- Qdrant 2.7 vector database with COSINE distance (384-dim bge-m3)
- Ollama qwen2.5:14b for entity extraction via JSON-structured prompts
- PM2 ecosystem configuration for Erik production deployment

TESTING & DEPLOYMENT:
- TESTING.md: 5-phase local testing workflow with examples
- DEPLOYMENT_CHECKLIST.md: Step-by-step Erik deployment guide
- eval-transceiver-50qa.json: 50 Q&A evaluation pairs for transceiver domain
- populate_eval_set.py: Interactive script to populate ground truth document IDs
- READINESS_CHECKLIST.md: Pre-deployment verification checklist
- bootstrap_tip_data.py: Load TIP blog documents via API

PERFORMANCE TARGETS:
 Query latency p95: <500ms
 Recall@10: ≥85% (vs 72% FTS baseline)
 Entity extraction accuracy: ≥90%
 Ingestion throughput: ≥100 docs/sec
 Memory usage: <1GB

Ready for Phase 3: E2E testing, TypeScript client, multi-domain support.
2026-04-25 05:47:18 +02:00

627 lines
16 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM Gateway Dashboard</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
color: #333;
}
.container {
max-width: 1400px;
margin: 0 auto;
}
header {
margin-bottom: 40px;
color: white;
}
h1 {
font-size: 2.5rem;
margin-bottom: 8px;
font-weight: 700;
}
.status-bar {
display: flex;
gap: 20px;
align-items: center;
margin-top: 12px;
flex-wrap: wrap;
}
.status-item {
background: rgba(255, 255, 255, 0.2);
padding: 8px 16px;
border-radius: 6px;
font-size: 0.95rem;
backdrop-filter: blur(10px);
}
.status-indicator {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 50%;
margin-right: 8px;
}
.status-indicator.healthy {
background: #10b981;
}
.status-indicator.unhealthy {
background: #ef4444;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 20px;
margin-bottom: 40px;
}
.card {
background: white;
border-radius: 12px;
padding: 24px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
transition: transform 0.2s, box-shadow 0.2s;
}
.card:hover {
transform: translateY(-4px);
box-shadow: 0 8px 12px rgba(0, 0, 0, 0.15);
}
.metric-label {
font-size: 0.9rem;
color: #666;
margin-bottom: 12px;
text-transform: uppercase;
letter-spacing: 0.5px;
font-weight: 500;
}
.metric-value {
font-size: 2.2rem;
font-weight: 700;
color: #667eea;
margin-bottom: 8px;
}
.metric-unit {
font-size: 0.9rem;
color: #999;
margin-left: 4px;
}
.metric-change {
font-size: 0.85rem;
color: #666;
margin-top: 12px;
padding-top: 12px;
border-top: 1px solid #eee;
}
.section-title {
color: white;
font-size: 1.5rem;
margin: 40px 0 20px 0;
font-weight: 600;
}
.grid-models, .grid-callers {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 16px;
margin-bottom: 40px;
}
.model-card, .caller-card {
background: white;
border-radius: 10px;
padding: 16px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
border-left: 4px solid #667eea;
}
.model-name, .caller-name {
font-weight: 600;
color: #333;
margin-bottom: 12px;
font-size: 0.95rem;
word-break: break-word;
}
.request-count {
font-size: 1.8rem;
font-weight: 700;
color: #667eea;
}
.count-label {
font-size: 0.8rem;
color: #999;
margin-top: 4px;
}
.filters {
display: flex;
gap: 12px;
margin-bottom: 20px;
flex-wrap: wrap;
}
.filter-btn {
padding: 8px 16px;
border: 2px solid #e0e0e0;
background: white;
border-radius: 6px;
cursor: pointer;
font-weight: 500;
font-size: 0.9rem;
transition: all 0.2s;
}
.filter-btn.active {
border-color: #667eea;
background: #667eea;
color: white;
}
.filter-btn:hover {
border-color: #667eea;
}
.requests-table {
background: white;
border-radius: 12px;
overflow: hidden;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.table-header {
background: #f5f5f5;
padding: 16px;
display: grid;
grid-template-columns: 120px 150px 100px 120px 100px 100px 100px;
gap: 12px;
font-weight: 600;
color: #666;
font-size: 0.9rem;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.table-row {
padding: 16px;
display: grid;
grid-template-columns: 120px 150px 100px 120px 100px 100px 100px;
gap: 12px;
border-bottom: 1px solid #eee;
align-items: center;
font-size: 0.9rem;
}
.table-row:last-child {
border-bottom: none;
}
.table-row:hover {
background: #f9f9f9;
}
.status-badge {
display: inline-block;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.8rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.status-approved {
background: #d1fae5;
color: #065f46;
}
.status-warning {
background: #fef3c7;
color: #92400e;
}
.status-pending {
background: #dbeafe;
color: #1e40af;
}
.status-rejected {
background: #fee2e2;
color: #991b1b;
}
.status-error {
background: #fecaca;
color: #7f1d1d;
}
.empty-state {
text-align: center;
padding: 40px;
color: #999;
}
.connection-status {
position: fixed;
bottom: 20px;
right: 20px;
background: white;
padding: 12px 16px;
border-radius: 6px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
font-size: 0.9rem;
display: flex;
align-items: center;
gap: 8px;
}
.connection-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: #10b981;
animation: pulse 2s infinite;
}
.connection-dot.disconnected {
background: #ef4444;
animation: none;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.loading {
text-align: center;
padding: 40px;
color: #999;
font-style: italic;
}
@media (max-width: 768px) {
h1 {
font-size: 1.8rem;
}
.grid {
grid-template-columns: 1fr;
}
.grid-models, .grid-callers {
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
}
.table-header, .table-row {
grid-template-columns: 80px 100px 80px 80px 60px 60px 60px;
font-size: 0.8rem;
}
.metric-value {
font-size: 1.8rem;
}
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>LLM Gateway Dashboard</h1>
<div class="status-bar">
<div class="status-item">
<span class="status-indicator healthy" id="dbStatusIndicator"></span>
<span id="dbStatus">Checking database...</span>
</div>
<div class="status-item">
<span class="status-indicator" id="sseStatusIndicator"></span>
<span id="sseStatus">Connecting to stream...</span>
</div>
<div class="status-item">
<span id="listenerCount">0</span> SSE listeners
</div>
</div>
</header>
<div class="grid">
<div class="card">
<div class="metric-label">Total Requests</div>
<div class="metric-value" id="totalRequests">0</div>
<div class="metric-change" id="requestsChange"></div>
</div>
<div class="card">
<div class="metric-label">Success Rate</div>
<div class="metric-value" id="successRate">0<span class="metric-unit">%</span></div>
<div class="metric-change" id="successChange"></div>
</div>
<div class="card">
<div class="metric-label">Avg Latency</div>
<div class="metric-value" id="avgLatency">0<span class="metric-unit">ms</span></div>
<div class="metric-change" id="latencyChange"></div>
</div>
<div class="card">
<div class="metric-label">Total Cost</div>
<div class="metric-value" id="totalCost">$0.00</div>
<div class="metric-change" id="costChange"></div>
</div>
<div class="card">
<div class="metric-label">Avg Confidence</div>
<div class="metric-value" id="avgConfidence">0<span class="metric-unit">%</span></div>
<div class="metric-change" id="confidenceChange"></div>
</div>
<div class="card">
<div class="metric-label">Fallback Usage</div>
<div class="metric-value" id="fallbackPercent">0<span class="metric-unit">%</span></div>
<div class="metric-change" id="fallbackChange"></div>
</div>
</div>
<h2 class="section-title">Top Models</h2>
<div class="grid-models" id="topModels">
<div class="loading">Loading models...</div>
</div>
<h2 class="section-title">Top Callers</h2>
<div class="grid-callers" id="topCallers">
<div class="loading">Loading callers...</div>
</div>
<h2 class="section-title">Recent Requests</h2>
<div class="filters">
<button class="filter-btn active" data-hours="24">Last 24h</button>
<button class="filter-btn" data-hours="168">Last 7d</button>
<button class="filter-btn" data-hours="720">Last 30d</button>
</div>
<div class="requests-table">
<div class="table-header">
<div>Request ID</div>
<div>Caller</div>
<div>Model</div>
<div>Status</div>
<div>Tokens In</div>
<div>Cost</div>
<div>Latency</div>
</div>
<div id="requestsTable">
<div class="empty-state">No requests yet</div>
</div>
</div>
</div>
<div class="connection-status">
<div class="connection-dot" id="connectionDot"></div>
<span id="connectionText">Connected</span>
</div>
<script>
const HEALTH_CHECK_INTERVAL = 30000;
const METRICS_REFRESH_INTERVAL = 10000;
const API_BASE = '';
let selectedHours = 24;
let lastMetrics = null;
let sseConnection = null;
// Health check
async function checkHealth() {
try {
const response = await fetch(`${API_BASE}/api/dashboard/health`);
const data = await response.json();
const isHealthy = data.status === 'ok';
updateHealthStatus(isHealthy, data);
return isHealthy;
} catch (error) {
console.error('Health check failed:', error);
updateHealthStatus(false, { error: error.message });
return false;
}
}
function updateHealthStatus(isHealthy, data) {
const indicator = document.getElementById('dbStatusIndicator');
const status = document.getElementById('dbStatus');
if (isHealthy) {
indicator.className = 'status-indicator healthy';
status.textContent = `Database connected (${data.sse_listeners || 0} listeners)`;
} else {
indicator.className = 'status-indicator unhealthy';
status.textContent = 'Database disconnected';
}
}
// Load recent requests
async function loadRequests() {
try {
const response = await fetch(`${API_BASE}/api/dashboard/requests?limit=50&hours=${selectedHours}`);
const data = await response.json();
if (data.success) {
renderRequests(data.data);
}
} catch (error) {
console.error('Failed to load requests:', error);
}
}
function renderRequests(requests) {
const table = document.getElementById('requestsTable');
if (requests.length === 0) {
table.innerHTML = '<div class="empty-state">No requests in selected timeframe</div>';
return;
}
table.innerHTML = requests.map(req => `
<div class="table-row">
<div title="${req.request_id}">${req.request_id.substring(0, 12)}...</div>
<div>${req.caller}</div>
<div>${req.model}</div>
<div><span class="status-badge status-${req.status}">${req.status}</span></div>
<div>${req.tokens_in}</div>
<div>$${(req.cost_usd).toFixed(4)}</div>
<div>${req.latency_ms}ms</div>
</div>
`).join('');
}
// Load metrics
async function loadMetrics() {
try {
const response = await fetch(`${API_BASE}/api/dashboard/request-metrics?bucket_minutes=60`);
const data = await response.json();
if (data.success) {
updateMetrics(data.data);
lastMetrics = data.data;
}
} catch (error) {
console.error('Failed to load metrics:', error);
}
}
function updateMetrics(metrics) {
// Total requests
const totalRequests = metrics.total_requests || 0;
document.getElementById('totalRequests').textContent = totalRequests.toLocaleString();
// Success rate
const successRate = ((metrics.success_rate || 0) * 100).toFixed(1);
document.getElementById('successRate').textContent = successRate + '%';
// Average latency
const avgLatency = Math.round(metrics.avg_latency || 0);
document.getElementById('avgLatency').textContent = avgLatency + 'ms';
// Total cost
const totalCost = (metrics.total_cost || 0).toFixed(2);
document.getElementById('totalCost').textContent = '$' + totalCost;
// Average confidence
const avgConfidence = ((metrics.avg_confidence || 0) * 100).toFixed(1);
document.getElementById('avgConfidence').textContent = avgConfidence + '%';
// Fallback percentage
const fallbackPercent = ((metrics.fallback_percentage || 0) * 100).toFixed(1);
document.getElementById('fallbackPercent').textContent = fallbackPercent + '%';
// Top models
if (metrics.top_models && metrics.top_models.length > 0) {
document.getElementById('topModels').innerHTML = metrics.top_models.map(m => `
<div class="model-card">
<div class="model-name">${m.model}</div>
<div class="request-count">${m.count}</div>
<div class="count-label">requests</div>
</div>
`).join('');
}
// Top callers
if (metrics.top_callers && metrics.top_callers.length > 0) {
document.getElementById('topCallers').innerHTML = metrics.top_callers.map(c => `
<div class="caller-card">
<div class="caller-name">${c.caller}</div>
<div class="request-count">${c.count}</div>
<div class="count-label">requests</div>
</div>
`).join('');
}
// Recent errors
if (metrics.recent_errors && metrics.recent_errors.length > 0) {
console.warn('Recent errors:', metrics.recent_errors);
}
}
// SSE connection
function connectSSE() {
if (sseConnection) {
sseConnection.close();
}
sseConnection = new EventSource(`${API_BASE}/api/stream/requests`);
sseConnection.onopen = () => {
document.getElementById('sseStatusIndicator').className = 'status-indicator healthy';
document.getElementById('sseStatus').textContent = 'Stream connected';
document.getElementById('connectionDot').className = 'connection-dot';
document.getElementById('connectionText').textContent = 'Connected';
};
sseConnection.onerror = () => {
document.getElementById('sseStatusIndicator').className = 'status-indicator unhealthy';
document.getElementById('sseStatus').textContent = 'Stream disconnected';
document.getElementById('connectionDot').className = 'connection-dot disconnected';
document.getElementById('connectionText').textContent = 'Disconnected';
sseConnection.close();
setTimeout(connectSSE, 5000);
};
sseConnection.onmessage = (event) => {
try {
const data = JSON.parse(event.data);
if (data.type === 'connected') {
console.log('SSE connection established');
} else {
// Real-time request update
loadMetrics();
loadRequests();
}
} catch (error) {
console.error('Failed to parse SSE message:', error);
}
};
}
// Filter buttons
document.querySelectorAll('.filter-btn').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.filter-btn').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
selectedHours = parseInt(btn.dataset.hours);
loadRequests();
});
});
// Initial setup
async function init() {
await checkHealth();
await loadMetrics();
await loadRequests();
connectSSE();
setInterval(checkHealth, HEALTH_CHECK_INTERVAL);
setInterval(loadMetrics, METRICS_REFRESH_INTERVAL);
}
// Start
init();
</script>
</body>
</html>