llm-gateway/packages/lightrag-sidecar/scripts/populate_eval_set.py

#!/usr/bin/env python3
"""Populate evaluation set with ground truth document IDs by running queries."""

import os
import sys
import json
import asyncio
import httpx
from pathlib import Path
from typing import Optional

# Configuration
LIGHTRAG_SIDECAR_URL = os.getenv("LIGHTRAG_SIDECAR_URL", "http://localhost:3140")
DOMAIN = "transceiver"
EVAL_SET_FILE = Path(__file__).parent.parent / "data" / "eval-transceiver-50qa.json"


async def load_eval_set() -> dict:
    """Load evaluation set from JSON file."""
    if not EVAL_SET_FILE.exists():
        print(f"Error: Evaluation set file not found: {EVAL_SET_FILE}")
        sys.exit(1)

    with open(EVAL_SET_FILE, "r") as f:
        return json.load(f)


async def query_sidecar(client: httpx.AsyncClient, query: str) -> list[str]:
    """Run a query against the sidecar and return document IDs."""
    try:
        response = await client.post(
            f"{LIGHTRAG_SIDECAR_URL}/api/kg/query",
            json={
                "query": query,
                "domain": DOMAIN,
                "top_k": 10,
                "entity_links": False,
                "min_relevance": 0.3
            },
            timeout=10
        )

        if response.status_code != 200:
            print(f"  Query error: {response.status_code}")
            return []

        data = response.json()
        doc_ids = [result["source_doc_id"] for result in data.get("results", [])]
        return doc_ids
    except Exception as e:
        print(f"  Exception: {e}")
        return []


async def verify_ground_truth(
    client: httpx.AsyncClient,
    query: str,
    suggested_docs: list[str]
) -> list[str]:
    """Interactively verify and adjust ground truth document IDs."""
    print(f"\nQuery: {query}")
    print(f"Suggested documents ({len(suggested_docs)}):")
    for i, doc_id in enumerate(suggested_docs, 1):
        print(f"  {i}. {doc_id}")

    while True:
        user_input = input("\nAccept suggested docs? (y/n/edit): ").strip().lower()

        if user_input == "y":
            return suggested_docs
        elif user_input == "n":
            return []
        elif user_input == "edit":
            doc_input = input("Enter comma-separated doc IDs: ").strip()
            if doc_input:
                return [d.strip() for d in doc_input.split(",")]
            return []
        else:
            print("Invalid input. Please enter 'y', 'n', or 'edit'.")


async def main():
    """Populate evaluation set with ground truth document IDs."""
    print(f"LightRAG Evaluation Set Population")
    print(f"Sidecar URL: {LIGHTRAG_SIDECAR_URL}")
    print(f"Evaluation set: {EVAL_SET_FILE}")

    # Load evaluation set
    eval_set = await load_eval_set()
    queries = eval_set["queries"]

    print(f"\nLoaded {len(queries)} queries")

    # Check sidecar health
    async with httpx.AsyncClient() as client:
        try:
            health = await client.get(f"{LIGHTRAG_SIDECAR_URL}/api/kg/health", timeout=5)
            if health.status_code == 200:
                print("✓ Sidecar is healthy")
            else:
                print(f"✗ Sidecar health check failed: {health.status_code}")
                print("Run local sidecar: uvicorn app.main:app --reload")
                return
        except Exception as e:
            print(f"✗ Cannot reach sidecar: {e}")
            print("Run local sidecar: uvicorn app.main:app --reload")
            return

        # Process each query
        updated_count = 0
        for i, query_obj in enumerate(queries, 1):
            query_id = query_obj["query_id"]
            query_text = query_obj["query"]

            # Skip if already populated
            if query_obj.get("ground_truth_doc_ids"):
                print(f"\n[{i}/{len(queries)}] Query {query_id}: Already populated")
                continue

            print(f"\n[{i}/{len(queries)}] Processing Query {query_id}...")

            # Get suggested documents
            suggested_docs = await query_sidecar(client, query_text)

            if not suggested_docs:
                print("  No documents found")
                query_obj["ground_truth_doc_ids"] = []
                updated_count += 1
                continue

            # Verify with user
            ground_truth = await verify_ground_truth(client, query_text, suggested_docs)
            query_obj["ground_truth_doc_ids"] = ground_truth
            updated_count += 1

        # Save updated evaluation set
        if updated_count > 0:
            with open(EVAL_SET_FILE, "w") as f:
                json.dump(eval_set, f, indent=2)
            print(f"\n✓ Updated {updated_count} queries in {EVAL_SET_FILE}")
        else:
            print("\nNo updates made")


if __name__ == "__main__":
    asyncio.run(main())