Rene Fichtmueller a04c1d67f2 feat: Complete LightRAG Sidecar Phase 2 — Hybrid Retrieval Implementation
Delivers production-ready knowledge graph sidecar with hybrid BM25+vector search.

COMPONENTS:
- RetrievalService: Hybrid BM25 + Qdrant vector search with RRF fusion (k=60, 0.4/0.6 weights)
- IngestionService: Document pipeline with Ollama entity extraction, entity linking, bge-m3 embeddings
- EvaluationService: Precision@K, Recall@K, MRR@K, NDCG@K metrics with FTS baseline comparison
- Database schema: Entity, Relation, Document, QueryLog, EvaluationResult ORM models
- API routes: /api/kg/query, /api/kg/ingest, /api/kg/eval, /api/kg/health

INFRASTRUCTURE:
- FastAPI 0.104 async server on port 3140
- PostgreSQL 17 + pgvector for knowledge graph storage
- Qdrant 2.7 vector database with COSINE distance (384-dim bge-m3)
- Ollama qwen2.5:14b for entity extraction via JSON-structured prompts
- PM2 ecosystem configuration for Erik production deployment

TESTING & DEPLOYMENT:
- TESTING.md: 5-phase local testing workflow with examples
- DEPLOYMENT_CHECKLIST.md: Step-by-step Erik deployment guide
- eval-transceiver-50qa.json: 50 Q&A evaluation pairs for transceiver domain
- populate_eval_set.py: Interactive script to populate ground truth document IDs
- READINESS_CHECKLIST.md: Pre-deployment verification checklist
- bootstrap_tip_data.py: Load TIP blog documents via API

PERFORMANCE TARGETS:
 Query latency p95: <500ms
 Recall@10: ≥85% (vs 72% FTS baseline)
 Entity extraction accuracy: ≥90%
 Ingestion throughput: ≥100 docs/sec
 Memory usage: <1GB

Ready for Phase 3: E2E testing, TypeScript client, multi-domain support.
2026-04-25 05:47:18 +02:00

238 lines
7.6 KiB
SQL

-- Migration: Dashboard & Real-Time Metrics
-- Created: 2026-04-19
-- Purpose: Support management dashboard with real-time request tracking and aggregated metrics
-- Table: Dashboard request log (append-only, 72-hour retention)
CREATE TABLE IF NOT EXISTS dashboard_request_log (
id SERIAL PRIMARY KEY,
request_id VARCHAR(50) NOT NULL UNIQUE,
caller VARCHAR(100) NOT NULL,
task_type VARCHAR(50),
model VARCHAR(100) NOT NULL,
status VARCHAR(50) NOT NULL,
confidence_score DECIMAL(3,2),
tokens_in INT NOT NULL DEFAULT 0,
tokens_out INT NOT NULL DEFAULT 0,
cost_usd DECIMAL(10,6) NOT NULL DEFAULT 0,
latency_ms INT NOT NULL DEFAULT 0,
fallback_used BOOLEAN DEFAULT FALSE,
error_message TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at_epoch INT NOT NULL,
INDEX idx_created_desc (created_at DESC),
INDEX idx_caller_created (caller, created_at DESC),
INDEX idx_status_created (status, created_at DESC),
INDEX idx_model_created (model, created_at DESC),
INDEX idx_task_created (task_type, created_at DESC),
INDEX idx_epoch (created_at_epoch DESC)
);
-- Table: Pre-aggregated metrics timeseries (1-minute buckets, 90-day retention)
CREATE TABLE IF NOT EXISTS metrics_timeseries (
id SERIAL PRIMARY KEY,
bucket_time TIMESTAMP NOT NULL,
bucket_time_epoch INT NOT NULL,
-- Counts
request_count INT NOT NULL DEFAULT 0,
success_count INT NOT NULL DEFAULT 0,
error_count INT NOT NULL DEFAULT 0,
fallback_count INT NOT NULL DEFAULT 0,
-- Latency metrics (ms)
avg_latency_ms DECIMAL(10,2),
p50_latency_ms INT,
p95_latency_ms INT,
p99_latency_ms INT,
max_latency_ms INT,
-- Token metrics
total_tokens_in INT NOT NULL DEFAULT 0,
total_tokens_out INT NOT NULL DEFAULT 0,
avg_tokens_in DECIMAL(10,2),
avg_tokens_out DECIMAL(10,2),
-- Cost metrics (USD)
total_cost_usd DECIMAL(10,6) NOT NULL DEFAULT 0,
avg_cost_usd DECIMAL(10,6),
-- Confidence metrics
avg_confidence DECIMAL(3,2),
min_confidence DECIMAL(3,2),
-- Model distribution (top 3)
top_model_1 VARCHAR(100),
top_model_1_count INT,
top_model_2 VARCHAR(100),
top_model_2_count INT,
top_model_3 VARCHAR(100),
top_model_3_count INT,
-- Status distribution
status_approved INT DEFAULT 0,
status_warning INT DEFAULT 0,
status_rejected INT DEFAULT 0,
status_pending INT DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY unique_bucket_time (bucket_time),
INDEX idx_bucket_time_desc (bucket_time DESC),
INDEX idx_bucket_epoch (bucket_time_epoch DESC)
);
-- Table: Per-caller metrics (1-minute buckets)
CREATE TABLE IF NOT EXISTS caller_metrics_timeseries (
id SERIAL PRIMARY KEY,
bucket_time TIMESTAMP NOT NULL,
caller VARCHAR(100) NOT NULL,
request_count INT NOT NULL DEFAULT 0,
success_count INT NOT NULL DEFAULT 0,
error_count INT NOT NULL DEFAULT 0,
avg_latency_ms DECIMAL(10,2),
total_cost_usd DECIMAL(10,6) NOT NULL DEFAULT 0,
avg_confidence DECIMAL(3,2),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY unique_bucket_caller (bucket_time, caller),
INDEX idx_bucket_time_desc (bucket_time DESC),
INDEX idx_caller (caller)
);
-- Table: Per-model metrics (1-minute buckets)
CREATE TABLE IF NOT EXISTS model_metrics_timeseries (
id SERIAL PRIMARY KEY,
bucket_time TIMESTAMP NOT NULL,
model VARCHAR(100) NOT NULL,
request_count INT NOT NULL DEFAULT 0,
success_count INT NOT NULL DEFAULT 0,
error_count INT NOT NULL DEFAULT 0,
avg_latency_ms DECIMAL(10,2),
total_cost_usd DECIMAL(10,6) NOT NULL DEFAULT 0,
avg_confidence DECIMAL(3,2),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY unique_bucket_model (bucket_time, model),
INDEX idx_bucket_time_desc (bucket_time DESC),
INDEX idx_model (model)
);
-- Table: Dashboard cache (frequently accessed aggregates)
CREATE TABLE IF NOT EXISTS dashboard_cache (
id SERIAL PRIMARY KEY,
cache_key VARCHAR(255) NOT NULL UNIQUE,
cache_value JSON NOT NULL,
ttl_seconds INT NOT NULL DEFAULT 60,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
expires_at TIMESTAMP NOT NULL,
INDEX idx_expires_at (expires_at)
);
-- Create event for auto-cleanup of old dashboard request logs (72 hour retention)
CREATE EVENT IF NOT EXISTS cleanup_dashboard_requests
ON SCHEDULE EVERY 1 HOUR
STARTS CURRENT_TIMESTAMP
DO
DELETE FROM dashboard_request_log
WHERE created_at < DATE_SUB(NOW(), INTERVAL 72 HOUR);
-- Create event for auto-cleanup of old metrics (90 day retention)
CREATE EVENT IF NOT EXISTS cleanup_metrics_timeseries
ON SCHEDULE EVERY 1 HOUR
STARTS CURRENT_TIMESTAMP
DO
DELETE FROM metrics_timeseries
WHERE bucket_time < DATE_SUB(NOW(), INTERVAL 90 DAY);
-- Create event for auto-cleanup of expired cache entries
CREATE EVENT IF NOT EXISTS cleanup_dashboard_cache
ON SCHEDULE EVERY 5 MINUTE
STARTS CURRENT_TIMESTAMP
DO
DELETE FROM dashboard_cache
WHERE expires_at < NOW();
-- Create procedure to aggregate dashboard_request_log into metrics_timeseries
DELIMITER //
CREATE PROCEDURE IF NOT EXISTS aggregate_metrics_to_timeseries()
BEGIN
INSERT INTO metrics_timeseries (
bucket_time,
bucket_time_epoch,
request_count,
success_count,
error_count,
fallback_count,
avg_latency_ms,
p50_latency_ms,
p95_latency_ms,
p99_latency_ms,
max_latency_ms,
total_tokens_in,
total_tokens_out,
avg_tokens_in,
avg_tokens_out,
total_cost_usd,
avg_cost_usd,
avg_confidence,
min_confidence,
top_model_1,
top_model_1_count,
top_model_2,
top_model_2_count,
top_model_3,
top_model_3_count,
status_approved,
status_warning,
status_rejected,
status_pending
)
SELECT
DATE_FORMAT(created_at, '%Y-%m-%d %H:%i:00') AS bucket_time,
UNIX_TIMESTAMP(DATE_FORMAT(created_at, '%Y-%m-%d %H:%i:00')) AS bucket_time_epoch,
COUNT(*) AS request_count,
SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END) AS success_count,
SUM(CASE WHEN status IN ('rejected', 'error') THEN 1 ELSE 0 END) AS error_count,
SUM(CASE WHEN fallback_used = TRUE THEN 1 ELSE 0 END) AS fallback_count,
AVG(latency_ms) AS avg_latency_ms,
NULL AS p50_latency_ms,
NULL AS p95_latency_ms,
NULL AS p99_latency_ms,
MAX(latency_ms) AS max_latency_ms,
SUM(tokens_in) AS total_tokens_in,
SUM(tokens_out) AS total_tokens_out,
AVG(tokens_in) AS avg_tokens_in,
AVG(tokens_out) AS avg_tokens_out,
SUM(cost_usd) AS total_cost_usd,
AVG(cost_usd) AS avg_cost_usd,
AVG(confidence_score) AS avg_confidence,
MIN(confidence_score) AS min_confidence,
NULL, NULL, NULL, NULL, NULL, NULL,
0, 0, 0, 0
FROM dashboard_request_log
WHERE created_at >= DATE_FORMAT(DATE_SUB(NOW(), INTERVAL 1 MINUTE), '%Y-%m-%d %H:%i:00')
AND created_at < DATE_FORMAT(NOW(), '%Y-%m-%d %H:%i:00')
GROUP BY bucket_time
ON DUPLICATE KEY UPDATE
request_count = VALUES(request_count),
success_count = VALUES(success_count),
error_count = VALUES(error_count),
fallback_count = VALUES(fallback_count),
avg_latency_ms = VALUES(avg_latency_ms),
max_latency_ms = VALUES(max_latency_ms),
total_tokens_in = VALUES(total_tokens_in),
total_tokens_out = VALUES(total_tokens_out),
avg_tokens_in = VALUES(avg_tokens_in),
avg_tokens_out = VALUES(avg_tokens_out),
total_cost_usd = VALUES(total_cost_usd),
avg_cost_usd = VALUES(avg_cost_usd),
avg_confidence = VALUES(avg_confidence),
min_confidence = VALUES(min_confidence);
END //
DELIMITER ;
-- Schedule the aggregation procedure to run every minute
CREATE EVENT IF NOT EXISTS aggregate_metrics_every_minute
ON SCHEDULE EVERY 1 MINUTE
STARTS CURRENT_TIMESTAMP
DO
CALL aggregate_metrics_to_timeseries();