feat: restore workbench v1 and publish wired v2

2026-05-03 09:53:40 +02:00 · 2026-05-03 09:53:40 +02:00 · 09165b9bf7
commit 09165b9bf7
parent 060b846d9b
23 changed files with 7846 additions and 1055 deletions
--- a/packages/gateway/public/dashboard-v2.html
+++ b/packages/gateway/public/dashboard-v2.html
--- a/packages/gateway/public/dashboard.html
+++ b/packages/gateway/public/dashboard.html
@ -1786,8 +1786,8 @@
  <script>
    const HEALTH_CHECK_INTERVAL = 30000;
-    const METRICS_REFRESH_INTERVAL = 3000;
+    const METRICS_REFRESH_INTERVAL = 15000;
-    const REQUESTS_REFRESH_INTERVAL = 3000;
+    const REQUESTS_REFRESH_INTERVAL = 15000;
    const API_BASE = '';
    let selectedHours = 24;
    let lastMetrics = null;
@ -3097,7 +3097,7 @@
      setupPolling();
      setInterval(checkHealth, HEALTH_CHECK_INTERVAL);
      setInterval(loadSubscriptions, 30000);
-      setInterval(loadHero, 15000); // refresh buddy / events / forecast every 15s
+      setInterval(loadHero, 30000); // refresh buddy / events / forecast every 30s
    }
    init();
--- a/packages/gateway/src/modules/admin-auth.ts
+++ b/packages/gateway/src/modules/admin-auth.ts
@ -0,0 +1,87 @@
 import type { FastifyReply, FastifyRequest } from 'fastify';
 import { timingSafeEqual } from 'crypto';
 const TOKEN_ENV_KEYS = ['DASHBOARD_AUTH_TOKEN', 'LLM_GATEWAY_ADMIN_TOKEN', 'ADMIN_TOKEN'] as const;
 function configuredToken(): string | undefined {
  for (const key of TOKEN_ENV_KEYS) {
    const value = process.env[key]?.trim();
    if (value) return value;
  }
  return undefined;
 }
 function safeEqual(left: string, right: string): boolean {
  const leftBuffer = Buffer.from(left);
  const rightBuffer = Buffer.from(right);
  if (leftBuffer.length !== rightBuffer.length) return false;
  return timingSafeEqual(leftBuffer, rightBuffer);
 }
 function tokenFromAuthorizationHeader(header: string | undefined): string | undefined {
  if (!header) return undefined;
  const [scheme, value] = header.split(/\s+/, 2);
  if (!scheme || !value) return undefined;
  if (scheme.toLowerCase() === 'bearer') return value.trim();
  if (scheme.toLowerCase() === 'basic') {
    try {
      const decoded = Buffer.from(value, 'base64').toString('utf8');
      const separator = decoded.indexOf(':');
      return separator >= 0 ? decoded.slice(separator + 1).trim() : decoded.trim();
    } catch {
      return undefined;
    }
  }
  return undefined;
 }
 function tokenFromRequest(request: FastifyRequest): string | undefined {
  const explicit = request.headers['x-dashboard-token'];
  if (typeof explicit === 'string' && explicit.trim()) return explicit.trim();
  return tokenFromAuthorizationHeader(request.headers.authorization);
 }
 export function isDashboardAuthConfigured(): boolean {
  return !!configuredToken();
 }
 function isLocalDevelopmentRequest(request: FastifyRequest): boolean {
  if (process.env['NODE_ENV'] === 'production') return false;
  const host = request.hostname || request.headers.host || '';
  return host.startsWith('127.0.0.1') || host.startsWith('localhost') || host.startsWith('[::1]');
 }
 export async function requireDashboardAuth(request: FastifyRequest, reply: FastifyReply): Promise<FastifyReply | void> {
  if (isLocalDevelopmentRequest(request)) return;
  const expected = configuredToken();
  if (!expected) {
    return reply.status(503).send({
      statusCode: 503,
      error: 'Dashboard Auth Not Configured',
      message: 'Set DASHBOARD_AUTH_TOKEN before exposing dashboard data or settings.',
    });
  }
  const received = tokenFromRequest(request);
  if (!received || !safeEqual(received, expected)) {
    reply.header('WWW-Authenticate', 'Bearer realm="llm-gateway-dashboard"');
    return reply.status(401).send({
      statusCode: 401,
      error: 'Unauthorized',
      message: 'Dashboard token required.',
    });
  }
 }
 export function dashboardAuthStatus(request: FastifyRequest): { configured: boolean; authenticated: boolean } {
  if (isLocalDevelopmentRequest(request)) return { configured: true, authenticated: true };
  const expected = configuredToken();
  if (!expected) return { configured: false, authenticated: false };
  const received = tokenFromRequest(request);
  return { configured: true, authenticated: !!received && safeEqual(received, expected) };
 }
--- a/packages/gateway/src/modules/bridge-spawner.ts
+++ b/packages/gateway/src/modules/bridge-spawner.ts
@ -0,0 +1,246 @@
 /**
 * Bridge Spawner
 *
 * Auto-starts inline HTTP bridges for detected CLI subscriptions. Each bridge
 * exposes a `POST /api/generate` endpoint that the gateway can call as a regular
 * external provider. Bridges run in-process to avoid the overhead of spawning
 * separate Node processes — they listen on a dedicated port per subscription.
 */
 import { execFile } from 'child_process';
 import { createServer, type Server } from 'http';
 import { logger } from '../observability/logger.js';
 import type { SubscriptionDescriptor, SubscriptionStatus } from './subscription-discovery.js';
 interface RunningBridge {
  descriptor: SubscriptionDescriptor;
  server: Server;
  port: number;
  url: string;
  startedAt: Date;
 }
 const runningBridges = new Map<string, RunningBridge>();
 /**
 * Run a CLI tool with stdin-piped prompt, return stdout content.
 * Generic implementation that all inline bridges share.
 */
 async function runCli(
  command: string,
  args: readonly string[],
  prompt: string,
  timeoutMs: number = 300_000
 ): Promise<{ success: boolean; content?: string; error?: string }> {
  return new Promise((resolve) => {
    try {
      const child = execFile(
        command,
        args as string[],
        { timeout: timeoutMs, maxBuffer: 10 * 1024 * 1024 },
        (err, stdout) => {
          if (err) {
            resolve({ success: false, error: err.message.slice(0, 500) });
          } else {
            resolve({ success: true, content: stdout.trim() });
          }
        }
      );
      if (child.stdin) {
        child.stdin.write(prompt);
        child.stdin.end();
      }
    } catch (err) {
      resolve({ success: false, error: err instanceof Error ? err.message : String(err) });
    }
  });
 }
 /**
 * Build the CLI invocation for a given subscription.
 */
 function buildCliInvocation(desc: SubscriptionDescriptor, model?: string): { cmd: string; args: string[] } {
  switch (desc.bridgeImplementation) {
    case 'inline-claude': {
      const args = ['--print', '--output-format', 'text'];
      if (model) args.push('--model', model);
      return { cmd: 'claude', args };
    }
    case 'inline-copilot': {
      // gh copilot suggest is interactive; we use the OpenAI-compatible copilot-api proxy if available.
      return { cmd: 'gh', args: ['copilot', 'suggest', '--shell'] };
    }
    case 'inline-openai': {
      // Generic OpenAI-compatible CLI (chatgpt-cli, gemini-cli with OpenAI compat)
      return { cmd: desc.command, args: model ? ['--model', model] : [] };
    }
    case 'external-codex': {
      // codex CLI: read prompt from stdin
      return { cmd: 'codex', args: model ? ['--model', model] : [] };
    }
  }
 }
 /**
 * Spawn an inline HTTP bridge for a subscription. Returns the URL the gateway
 * should use to talk to it. Idempotent — calling twice returns the same bridge.
 */
 export function spawnBridge(desc: SubscriptionDescriptor): Promise<RunningBridge> {
  const existing = runningBridges.get(desc.id);
  if (existing) {
    return Promise.resolve(existing);
  }
  return new Promise((resolve, reject) => {
    const server = createServer(async (req, res) => {
      res.setHeader('Content-Type', 'application/json');
      res.setHeader('Access-Control-Allow-Origin', '*');
      if (req.method === 'GET' && req.url === '/health') {
        const current = runningBridges.get(desc.id);
        res.writeHead(200);
        res.end(
          JSON.stringify({
            status: 'ok',
            subscription: desc.id,
            label: desc.label,
            command: desc.command,
            uptimeSeconds: current ? Math.floor((Date.now() - current.startedAt.getTime()) / 1000) : 0,
          })
        );
        return;
      }
      if (req.method === 'POST' && (req.url === '/api/generate' || req.url === '/v1/completion')) {
        let body = '';
        req.on('data', (chunk) => (body += chunk));
        req.on('end', async () => {
          try {
            const { prompt, system, model } = JSON.parse(body || '{}');
            if (!prompt) {
              res.writeHead(400);
              res.end(JSON.stringify({ error: 'prompt required' }));
              return;
            }
            const fullPrompt = system ? `${system}\n\n---\n\n${prompt}` : prompt;
            const { cmd, args } = buildCliInvocation(desc, model);
            const result = await runCli(cmd, args, fullPrompt);
            if (result.success) {
              res.writeHead(200);
              res.end(
                JSON.stringify({
                  success: true,
                  content: result.content,
                  provider: desc.providerName,
                  model: model ?? desc.models[0]?.id,
                })
              );
            } else {
              res.writeHead(502);
              res.end(JSON.stringify({ success: false, error: result.error }));
            }
          } catch (e) {
            res.writeHead(500);
            res.end(JSON.stringify({ error: e instanceof Error ? e.message : 'parse error' }));
          }
        });
        return;
      }
      res.writeHead(404);
      res.end(JSON.stringify({ error: 'not found' }));
    });
    server.on('error', (err) => {
      // Port in use → assume an existing bridge is already running, treat as success
      if ((err as NodeJS.ErrnoException).code === 'EADDRINUSE') {
        logger.info(
          { subscription: desc.id, port: desc.bridgePort },
          'Port already in use — assuming external bridge is healthy'
        );
        const url = `http://127.0.0.1:${desc.bridgePort}`;
        const fakeBridge: RunningBridge = {
          descriptor: desc,
          server, // server failed to bind; OK to keep handle
          port: desc.bridgePort,
          url,
          startedAt: new Date(),
        };
        runningBridges.set(desc.id, fakeBridge);
        resolve(fakeBridge);
      } else {
        reject(err);
      }
    });
    server.listen(desc.bridgePort, '127.0.0.1', () => {
      const url = `http://127.0.0.1:${desc.bridgePort}`;
      const bridge: RunningBridge = {
        descriptor: desc,
        server,
        port: desc.bridgePort,
        url,
        startedAt: new Date(),
      };
      runningBridges.set(desc.id, bridge);
      // Set the env var so the existing external-providers logic finds the bridge
      process.env[desc.bridgeEnvKey] = url;
      logger.info(
        { subscription: desc.id, url, port: desc.bridgePort, envKey: desc.bridgeEnvKey },
        'Inline subscription bridge started'
      );
      resolve(bridge);
    });
  });
 }
 /**
 * Spawn bridges for every detected, authenticated subscription that doesn't
 * already have a bridge URL configured. Returns the list of started bridges.
 */
 export async function spawnDetectedBridges(
  statuses: readonly SubscriptionStatus[]
 ): Promise<RunningBridge[]> {
  const toSpawn = statuses.filter(
    (s) => s.installed && s.authenticated !== false && !s.bridgeRunning
  );
  const results: RunningBridge[] = [];
  for (const status of toSpawn) {
    try {
      const bridge = await spawnBridge(status.descriptor);
      results.push(bridge);
    } catch (err) {
      logger.warn(
        { err, subscription: status.descriptor.id },
        'Failed to spawn subscription bridge — continuing'
      );
    }
  }
  return results;
 }
 /**
 * Snapshot of currently running in-process bridges. Used by the dashboard.
 */
 export function getRunningBridges(): readonly RunningBridge[] {
  return Array.from(runningBridges.values());
 }
 /**
 * Stop all inline bridges (used during graceful shutdown).
 */
 export async function stopAllBridges(): Promise<void> {
  await Promise.all(
    Array.from(runningBridges.values()).map(
      (bridge) =>
        new Promise<void>((resolve) => {
          try {
            bridge.server.close(() => resolve());
          } catch {
            resolve();
          }
        })
    )
  );
  runningBridges.clear();
 }
--- a/packages/gateway/src/modules/caller-stats.ts
+++ b/packages/gateway/src/modules/caller-stats.ts
@ -0,0 +1,180 @@
 /**
 * Per-Caller Deep Dive
 *
 * Aggregates everything we know about ONE caller — its volume, models used,
 * cache effectiveness, cost, latency distribution, recent activity, and
 * stored memory facts. Powers the modal that opens when a user clicks on
 * a caller chip in the dashboard.
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 export interface CallerDeepDive {
  caller: string;
  firstSeen: string | null;
  lastSeen: string | null;
  totalRequests: number;
  successRate: number;
  totalTokensIn: number;
  totalTokensOut: number;
  totalCost: number;
  avgLatencyMs: number;
  /** distribution: p50, p95 */
  latencyP50: number;
  latencyP95: number;
  cacheHits: number;
  cacheTokensSaved: number;
  topModels: Array<{ model: string; count: number; share: number }>;
  topTaskTypes: Array<{ taskType: string; count: number }>;
  recentRequests: Array<{
    request_id: string;
    model: string;
    status: string;
    tokens_in: number;
    tokens_out: number;
    latency_ms: number;
    cost_usd: number;
    created_at: string;
  }>;
  storedFacts: Array<{ key: string; value: string; confidence: number; source: string }>;
  hourlyHeatmap: Array<{ hour: number; count: number }>;
 }
 export async function getCallerDeepDive(db: Pool, caller: string): Promise<CallerDeepDive | null> {
  const c = caller.trim().toLowerCase();
  try {
    // Headline aggregates
    const head = await db.query(`
      SELECT
        COUNT(*)::INT AS total,
        MIN(created_at) AS first_seen,
        MAX(created_at) AS last_seen,
        SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT(*),0) AS success_rate,
        COALESCE(SUM(tokens_in), 0)::BIGINT AS tok_in,
        COALESCE(SUM(tokens_out), 0)::BIGINT AS tok_out,
        COALESCE(SUM(cost_usd), 0)::NUMERIC AS cost,
        COALESCE(AVG(latency_ms), 0)::INT AS avg_lat,
        COALESCE(PERCENTILE_DISC(0.50) WITHIN GROUP (ORDER BY latency_ms), 0)::INT AS p50,
        COALESCE(PERCENTILE_DISC(0.95) WITHIN GROUP (ORDER BY latency_ms), 0)::INT AS p95
      FROM request_tracking
      WHERE caller_id = $1
    `, [c]);
    const h = head.rows[0];
    if (!h || parseInt(h.total, 10) === 0) {
      return null;
    }
    const total = parseInt(h.total, 10) || 0;
    // Top models by this caller
    const models = await db.query(`
      SELECT model, COUNT(*)::INT AS cnt
      FROM request_tracking
      WHERE caller_id = $1
      GROUP BY model
      ORDER BY cnt DESC
      LIMIT 10
    `, [c]);
    const topModels = models.rows.map((r: any) => ({
      model: r.model,
      count: parseInt(r.cnt, 10) || 0,
      share: total > 0 ? parseFloat(((parseInt(r.cnt, 10) / total) * 100).toFixed(1)) : 0,
    }));
    // Top task types
    const tasks = await db.query(`
      SELECT task_type, COUNT(*)::INT AS cnt
      FROM request_tracking
      WHERE caller_id = $1
      GROUP BY task_type
      ORDER BY cnt DESC
      LIMIT 8
    `, [c]);
    const topTaskTypes = tasks.rows.map((r: any) => ({
      taskType: r.task_type ?? '(unknown)',
      count: parseInt(r.cnt, 10) || 0,
    }));
    // Cache stats for this caller
    const cache = await db.query(`
      SELECT
        COALESCE(SUM(hit_count), 0)::INT AS hits,
        COALESCE(SUM(tokens_saved), 0)::BIGINT AS tokens
      FROM response_cache
      WHERE caller_id = $1
    `, [c]);
    const cacheHits = parseInt(cache.rows[0]?.hits ?? '0', 10);
    const cacheTokens = parseInt(cache.rows[0]?.tokens ?? '0', 10);
    // Recent requests (15 latest)
    const recent = await db.query(`
      SELECT request_id, model, status, tokens_in, tokens_out, latency_ms, cost_usd, created_at
      FROM request_tracking
      WHERE caller_id = $1
      ORDER BY created_at DESC
      LIMIT 15
    `, [c]);
    // Stored facts
    let storedFacts: any[] = [];
    try {
      const facts = await db.query(`
        SELECT fact_key, fact_value, confidence, source
        FROM caller_knowledge
        WHERE caller_id = $1 AND superseded_by IS NULL
          AND (valid_until IS NULL OR valid_until > NOW())
        ORDER BY confidence DESC
        LIMIT 20
      `, [c]);
      storedFacts = facts.rows.map((r: any) => ({
        key: r.fact_key, value: r.fact_value,
        confidence: parseFloat(r.confidence), source: r.source ?? '',
      }));
    } catch {}
    // Hourly heatmap (24h)
    const hourly = await db.query(`
      SELECT EXTRACT(HOUR FROM created_at)::INT AS hr, COUNT(*)::INT AS cnt
      FROM request_tracking
      WHERE caller_id = $1 AND created_at > NOW() - INTERVAL '7 days'
      GROUP BY hr
      ORDER BY hr ASC
    `, [c]);
    const hourlyMap = new Map<number, number>(hourly.rows.map((r: any): [number, number] => [parseInt(r.hr, 10), parseInt(r.cnt, 10)]));
    const hourlyHeatmap = Array.from({ length: 24 }, (_, i) => ({ hour: i, count: hourlyMap.get(i) ?? 0 }));
    return {
      caller: c,
      firstSeen: h.first_seen ? new Date(h.first_seen).toISOString() : null,
      lastSeen: h.last_seen ? new Date(h.last_seen).toISOString() : null,
      totalRequests: total,
      successRate: parseFloat(h.success_rate) || 0,
      totalTokensIn: parseInt(h.tok_in, 10) || 0,
      totalTokensOut: parseInt(h.tok_out, 10) || 0,
      totalCost: parseFloat(h.cost) || 0,
      avgLatencyMs: parseInt(h.avg_lat, 10) || 0,
      latencyP50: parseInt(h.p50, 10) || 0,
      latencyP95: parseInt(h.p95, 10) || 0,
      cacheHits,
      cacheTokensSaved: cacheTokens,
      topModels,
      topTaskTypes,
      recentRequests: recent.rows.map((r: any) => ({
        request_id: r.request_id,
        model: r.model,
        status: r.status,
        tokens_in: parseInt(r.tokens_in, 10) || 0,
        tokens_out: parseInt(r.tokens_out, 10) || 0,
        latency_ms: parseInt(r.latency_ms, 10) || 0,
        cost_usd: parseFloat(r.cost_usd) || 0,
        created_at: new Date(r.created_at).toISOString(),
      })),
      storedFacts,
      hourlyHeatmap,
    };
  } catch (err) {
    logger.warn({ err, caller: c }, 'caller-stats: deep dive failed');
    return null;
  }
 }
--- a/packages/gateway/src/modules/embedding-client.ts
+++ b/packages/gateway/src/modules/embedding-client.ts
@ -0,0 +1,87 @@
 /**
 * Embedding Client
 *
 * Generates vector embeddings via Ollama (`nomic-embed-text`, 768 dim).
 * Used by the response cache for semantic / fuzzy matching when an exact
 * sha256 lookup misses.
 *
 * Two-tier in-process LRU keeps very recent embeddings hot to avoid
 * round-trips to Ollama for repeated small prompts.
 */
 import { logger } from '../observability/logger.js';
 const OLLAMA_URL = (process.env['OLLAMA_BASE_URL'] || 'https://ollama.fichtmueller.org').replace(/\/$/, '');
 const EMBED_MODEL = process.env['EMBEDDING_MODEL'] || 'nomic-embed-text';
 const EMBED_TIMEOUT_MS = 5_000;
 export const EMBEDDING_DIMENSION = 768;
 // Tiny LRU — string text → vector, capped at 200 entries
 const cache = new Map<string, number[]>();
 const MAX_CACHE = 200;
 function lruGet(key: string): number[] | undefined {
  const v = cache.get(key);
  if (v) {
    cache.delete(key);
    cache.set(key, v);
  }
  return v;
 }
 function lruSet(key: string, value: number[]): void {
  if (cache.has(key)) cache.delete(key);
  cache.set(key, value);
  while (cache.size > MAX_CACHE) {
    const first = cache.keys().next().value;
    if (first !== undefined) cache.delete(first);
    else break;
  }
 }
 /**
 * Compute an embedding for a piece of text. Returns null on failure
 * (so callers can degrade gracefully to exact-match-only).
 */
 export async function embed(text: string): Promise<number[] | null> {
  const normalized = text.trim().slice(0, 8_192);
  if (normalized.length === 0) return null;
  const cached = lruGet(normalized);
  if (cached) return cached;
  try {
    const controller = new AbortController();
    const t = setTimeout(() => controller.abort(), EMBED_TIMEOUT_MS);
    try {
      const res = await fetch(`${OLLAMA_URL}/api/embeddings`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ model: EMBED_MODEL, prompt: normalized }),
        signal: controller.signal,
      });
      if (!res.ok) {
        logger.warn({ status: res.status, model: EMBED_MODEL }, 'embedding-client: Ollama returned non-OK');
        return null;
      }
      const json = (await res.json()) as { embedding?: number[] };
      const vec = json.embedding;
      if (!vec || vec.length !== EMBEDDING_DIMENSION) {
        logger.warn({ got: vec?.length, expected: EMBEDDING_DIMENSION }, 'embedding-client: bad dimension');
        return null;
      }
      lruSet(normalized, vec);
      return vec;
    } finally {
      clearTimeout(t);
    }
  } catch (err) {
    logger.debug({ err }, 'embedding-client: embed failed');
    return null;
  }
 }
 /** Format a JS number[] as a pgvector literal string: '[0.1,0.2,…]' */
 export function vectorToPgLiteral(vec: number[]): string {
  return `[${vec.map((v) => v.toFixed(6)).join(',')}]`;
 }
--- a/packages/gateway/src/modules/gamification.ts
+++ b/packages/gateway/src/modules/gamification.ts
@ -0,0 +1,498 @@
 /**
 * Gamification Engine
 *
 * Computes pet/buddy state, achievements, streaks, calendar heatmap and
 * forecasted savings from the live request data. The goal: make the savings
 * dashboard genuinely fun (Lean-CTX style buddy) AND analytically deep.
 *
 * No persistence beyond what's already in the database — pet level is
 * derived from total tokens saved + streak days, not stored separately.
 * That keeps the system stateless and reproducible.
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 // ─── Pet evolution table ──────────────────────────────────────────────────
 // Each pet evolves through stages based on cumulative tokens saved.
 // Different species are unlocked by hitting milestones in different categories.
 export interface PetSpecies {
  id: string;
  name: string;
  rarity: 'common' | 'uncommon' | 'rare' | 'epic' | 'legendary';
  unlockCondition: string;
  asciiArt: string[];
  /** Stage-based evolution. Index 0 = baby, last = final form. */
  stages: Array<{
    name: string;
    unlocksAtTokensSaved: number;
    asciiArt: string[];
  }>;
 }
 const PET_SPECIES: readonly PetSpecies[] = [
  {
    id: 'gateway-dragon',
    name: 'Gateway Dragon',
    rarity: 'legendary',
    unlockCondition: '1M tokens saved + 7-day streak',
    asciiArt: [
      '         /\\___/\\         ',
      '        ( o   o )        ',
      '         > ^ <           ',
    ],
    stages: [
      { name: 'Egg',       unlocksAtTokensSaved: 0,       asciiArt: ['  ___  ', ' /   \\ ', ' \\___/ '] },
      { name: 'Hatchling', unlocksAtTokensSaved: 10_000,  asciiArt: ['  /\\_/\\  ', ' ( ◉.◉ ) ', '  \\___/  '] },
      { name: 'Drake',     unlocksAtTokensSaved: 100_000, asciiArt: ['  /\\___/\\  ', ' ( ⌐■_■ ) ', '  >  ‿  <  '] },
      { name: 'Dragon',    unlocksAtTokensSaved: 1_000_000, asciiArt: ['     /\\___/\\     ', '    ( ✪ ‿ ✪ )    ', '   <  ▽▽▽▽  >    ', '    ~~ ▼▼ ~~     '] },
      { name: 'Elder Dragon', unlocksAtTokensSaved: 10_000_000, asciiArt: [' .─────────.  ', '/   ★ ★ ★   \\ ', '|  /\\___/\\   |', '| ( ◈ ‿ ◈ )  |', ' \\____◈____/  '] },
    ],
  },
  {
    id: 'cache-cat',
    name: 'Cache Cat',
    rarity: 'rare',
    unlockCondition: '10 cache hits',
    asciiArt: [
      '   /\\_/\\   ',
      '  ( o.o )  ',
      '   > ^ <   ',
    ],
    stages: [
      { name: 'Kitten',   unlocksAtTokensSaved: 0,      asciiArt: ['  /\\_/\\ ', ' ( o.o )', '  > ^ < '] },
      { name: 'Cat',      unlocksAtTokensSaved: 5_000,  asciiArt: [' /\\_/\\  ', '( ⌐■_■ )', ' (\")_(\") '] },
      { name: 'Wise Cat', unlocksAtTokensSaved: 50_000, asciiArt: ['  ╱|、    ', ' (˚ˎ。7  ', '  |、˜〵  ', '  じしˍ,)ノ'] },
    ],
  },
  {
    id: 'token-fox',
    name: 'Token Fox',
    rarity: 'uncommon',
    unlockCondition: '1K tokens saved',
    asciiArt: [
      '  /\\---/\\ ',
      ' ( ◕   ◕ )',
      '  \\__~__/ ',
    ],
    stages: [
      { name: 'Pup',  unlocksAtTokensSaved: 0,      asciiArt: ['  /\\---/\\ ', ' ( ◕   ◕ )', '  \\__~__/ '] },
      { name: 'Fox',  unlocksAtTokensSaved: 10_000, asciiArt: [' /\\---/\\   ', '/  ◕   ◕  \\', '\\___◡___/  '] },
    ],
  },
 ];
 const RARITY_ORDER: Record<PetSpecies['rarity'], number> = {
  common: 0, uncommon: 1, rare: 2, epic: 3, legendary: 4,
 };
 // ─── Achievement catalog ──────────────────────────────────────────────────
 export interface Achievement {
  id: string;
  title: string;
  description: string;
  icon: string;
  /** Category tag for UI grouping. */
  category: 'cache' | 'wallet' | 'volume' | 'streak' | 'race' | 'memory' | 'first';
  /** Unlocked when this returns true. */
  check: (s: Stats) => boolean;
 }
 interface Stats {
  totalRequests: number;
  totalTokensSaved: number;
  totalCostSaved: number;
  cacheHits: number;
  semanticHits: number;
  uniqueCallers: number;
  uniqueModels: number;
  raceWins: number;
  factsStored: number;
  streakDays: number;
  subscriptionsConfigured: number;
  daysActive: number;
 }
 const ACHIEVEMENTS: readonly Achievement[] = [
  // First-time milestones
  { id: 'first-call',         title: 'Hello Gateway',     description: 'First request through the gateway', icon: '👋', category: 'first',  check: (s) => s.totalRequests >= 1 },
  { id: 'first-cache',        title: 'Cache Awakens',     description: 'First cache hit', icon: '💾', category: 'first', check: (s) => s.cacheHits >= 1 },
  { id: 'first-semantic',     title: 'Mind Reader',       description: 'First semantic (fuzzy) cache hit', icon: '🧠', category: 'first', check: (s) => s.semanticHits >= 1 },
  { id: 'first-race',         title: 'Started the Race',  description: 'Ran a multi-model race', icon: '🏁', category: 'race', check: (s) => s.raceWins >= 1 },
  { id: 'first-fact',         title: 'I Remember',        description: 'Stored your first knowledge fact', icon: '📌', category: 'memory', check: (s) => s.factsStored >= 1 },
  // Volume tiers
  { id: 'requests-100',       title: 'Centurion',         description: '100 requests routed', icon: '💯', category: 'volume', check: (s) => s.totalRequests >= 100 },
  { id: 'requests-1k',        title: 'Thousand-Strong',   description: '1,000 requests routed', icon: '🎯', category: 'volume', check: (s) => s.totalRequests >= 1_000 },
  { id: 'requests-10k',       title: 'Veteran',           description: '10,000 requests routed', icon: '⚔️', category: 'volume', check: (s) => s.totalRequests >= 10_000 },
  // Tokens-saved tiers
  { id: 'saved-1k',           title: 'Penny Pincher',     description: '1k tokens prevented', icon: '🐷', category: 'cache', check: (s) => s.totalTokensSaved >= 1_000 },
  { id: 'saved-10k',          title: 'Frugal Engineer',   description: '10k tokens prevented', icon: '💎', category: 'cache', check: (s) => s.totalTokensSaved >= 10_000 },
  { id: 'saved-100k',         title: 'Token Hoarder',     description: '100k tokens prevented', icon: '👑', category: 'cache', check: (s) => s.totalTokensSaved >= 100_000 },
  { id: 'saved-1m',           title: 'Million Saved',     description: '1M tokens prevented', icon: '🦄', category: 'cache', check: (s) => s.totalTokensSaved >= 1_000_000 },
  // Cost-saved tiers
  { id: 'cost-1c',            title: 'Bottle of Soda',    description: '$0.01 of API cost saved', icon: '🥤', category: 'cache', check: (s) => s.totalCostSaved >= 0.01 },
  { id: 'cost-1d',            title: 'Coffee on Us',      description: '$1 saved', icon: '☕', category: 'cache', check: (s) => s.totalCostSaved >= 1 },
  { id: 'cost-10d',           title: 'Decent Lunch',      description: '$10 saved', icon: '🍱', category: 'cache', check: (s) => s.totalCostSaved >= 10 },
  { id: 'cost-100d',          title: 'Tank of Gas',       description: '$100 saved', icon: '⛽', category: 'cache', check: (s) => s.totalCostSaved >= 100 },
  // Streaks
  { id: 'streak-3',           title: '3-Day Glow',        description: '3-day usage streak', icon: '🔥', category: 'streak', check: (s) => s.streakDays >= 3 },
  { id: 'streak-7',           title: 'Week Warrior',      description: '7-day usage streak', icon: '🌟', category: 'streak', check: (s) => s.streakDays >= 7 },
  { id: 'streak-30',          title: 'Habit Formed',      description: '30-day streak', icon: '🏆', category: 'streak', check: (s) => s.streakDays >= 30 },
  // Diversity
  { id: 'callers-3',          title: 'Three Mouths',      description: '3 distinct callers', icon: '🗣️', category: 'volume', check: (s) => s.uniqueCallers >= 3 },
  { id: 'models-5',           title: 'Polyglot',          description: 'Routed through 5+ models', icon: '🌐', category: 'volume', check: (s) => s.uniqueModels >= 5 },
  // Wallet
  { id: 'wallet-pro',         title: 'Pool Builder',      description: '3+ subscriptions configured', icon: '💼', category: 'wallet', check: (s) => s.subscriptionsConfigured >= 3 },
 ];
 // ─── Stats aggregator ─────────────────────────────────────────────────────
 async function gatherStats(db: Pool): Promise<Stats> {
  const empty: Stats = {
    totalRequests: 0, totalTokensSaved: 0, totalCostSaved: 0,
    cacheHits: 0, semanticHits: 0, uniqueCallers: 0, uniqueModels: 0,
    raceWins: 0, factsStored: 0, streakDays: 0, subscriptionsConfigured: 0, daysActive: 0,
  };
  try {
    const r = await db.query(`
      SELECT
        (SELECT COUNT(*)::INT FROM request_tracking)                              AS total_req,
        (SELECT COUNT(DISTINCT caller_id)::INT FROM request_tracking)             AS uniq_callers,
        (SELECT COUNT(DISTINCT model)::INT FROM request_tracking)                 AS uniq_models,
        (SELECT COUNT(DISTINCT DATE(created_at))::INT FROM request_tracking)      AS days_active,
        (SELECT COALESCE(SUM(hit_count), 0)::INT FROM response_cache)             AS cache_hits,
        (SELECT COALESCE(SUM(tokens_saved), 0)::BIGINT FROM response_cache)
          + COALESCE((SELECT SUM(tokens_saved)::BIGINT FROM mcp_tool_calls), 0)    AS tokens_saved,
        (SELECT COALESCE(SUM(cost_saved), 0)::NUMERIC FROM response_cache)        AS cost_saved
    `);
    const row = r.rows[0] ?? {};
    empty.totalRequests   = parseInt(row.total_req ?? '0', 10);
    empty.uniqueCallers   = parseInt(row.uniq_callers ?? '0', 10);
    empty.uniqueModels    = parseInt(row.uniq_models ?? '0', 10);
    empty.daysActive      = parseInt(row.days_active ?? '0', 10);
    empty.cacheHits       = parseInt(row.cache_hits ?? '0', 10);
    empty.totalTokensSaved = parseInt(row.tokens_saved ?? '0', 10);
    empty.totalCostSaved  = parseFloat(row.cost_saved ?? '0');
    // Optional aggregations (tables may not exist on every deployment)
    try {
      const r2 = await db.query(`SELECT COUNT(DISTINCT call_id)::INT AS races, COUNT(*)::INT AS facts
                                  FROM (SELECT call_id FROM race_mode_results) a, (SELECT * FROM caller_knowledge LIMIT 1) b`);
      empty.raceWins = parseInt(r2.rows[0]?.races ?? '0', 10);
    } catch {}
    try {
      const r3 = await db.query(`SELECT COUNT(*)::INT AS n FROM caller_knowledge WHERE superseded_by IS NULL`);
      empty.factsStored = parseInt(r3.rows[0]?.n ?? '0', 10);
    } catch {}
    try {
      const r4 = await db.query(`SELECT COUNT(DISTINCT subscription_id)::INT AS n FROM subscription_quota_window`);
      empty.subscriptionsConfigured = parseInt(r4.rows[0]?.n ?? '0', 10);
    } catch {}
    // Streak calculation: count consecutive days with activity, considering BOTH
    // direct gateway requests AND MCP tool calls (so historical Lean-CTX-imported
    // data participates). Allow 1-day grace from today (don't reset just because
    // today is fresh).
    try {
      const r5 = await db.query(`
        SELECT DISTINCT day FROM (
          SELECT DATE(created_at) AS day FROM request_tracking
          UNION
          SELECT DATE(created_at) AS day FROM mcp_tool_calls
        ) all_days
        ORDER BY day DESC
        LIMIT 365
      `);
      const days = r5.rows.map((row: any) => new Date(row.day).toISOString().split('T')[0]);
      let streak = 0;
      const today = new Date(); today.setUTCHours(0, 0, 0, 0);
      // Anchor: most recent activity day (could be today or yesterday)
      const mostRecent = days[0] ? new Date(days[0] + 'T00:00:00Z') : null;
      if (mostRecent) {
        const daysSinceLast = Math.floor((today.getTime() - mostRecent.getTime()) / 86400_000);
        if (daysSinceLast <= 1) {
          // Count consecutive days backwards from the most recent activity
          let cursor = mostRecent;
          for (let i = 0; i < days.length; i++) {
            const expected = cursor.toISOString().split('T')[0];
            if (days[i] === expected) {
              streak += 1;
              cursor = new Date(cursor.getTime() - 86400_000);
            } else break;
          }
        }
      }
      empty.streakDays = streak;
    } catch {}
  } catch (err) {
    logger.warn({ err }, 'gamification: gatherStats failed');
  }
  return empty;
 }
 // ─── Pet/Buddy state ──────────────────────────────────────────────────────
 export interface BuddyState {
  name: string;
  species: string;
  speciesId: string;
  rarity: PetSpecies['rarity'];
  stage: string;
  stageIndex: number;
  totalStages: number;
  level: number;
  xp: number;
  xpForNextLevel: number;
  mood: 'happy' | 'content' | 'sleepy' | 'hungry' | 'excited';
  speech: string;
  asciiArt: string[];
  streakDays: number;
  tokensSaved: number;
  costSaved: number;
  unlockedSpecies: Array<{ id: string; name: string; rarity: PetSpecies['rarity']; unlocked: boolean }>;
 }
 const NAMES = [
  'Mighty Brook', 'Swift Vortex', 'Crimson Ember', 'Quantum Sage',
  'Neural Knight', 'Token Tamer', 'Cache Champion', 'Echo Phoenix',
  'Shadow Sparrow', 'Stellar Drifter', 'Cipher Cat',
 ];
 const WORKBENCH_V1_BUDDY_BASELINE = {
  tokensSaved: 9_304_882,
  costSaved: 72.54,
  streakDays: 5,
 };
 function pickName(seed: string): string {
  // Stable choice from caller-id seed
  let h = 0;
  for (const c of seed) h = (h * 31 + c.charCodeAt(0)) & 0x7fffffff;
  return NAMES[h % NAMES.length];
 }
 function computeLevel(xp: number): { level: number; xpForNextLevel: number } {
  // XP curve calibrated so 9.3M tokens saved ≈ Level 27 (matching Lean-CTX scale).
  // Per-level XP requirement: n^2 * 53 (chosen so sqrt(38908/53) ≈ 27).
  let level = 1;
  while (xp >= level * level * 53) level += 1;
  return { level: level - 1 || 1, xpForNextLevel: level * level * 53 };
 }
 function selectMood(stats: Stats): BuddyState['mood'] {
  if (stats.streakDays >= 7) return 'excited';
  if (stats.cacheHits === 0) return 'sleepy';
  if (stats.totalRequests < 10) return 'hungry';
  if (stats.streakDays >= 1) return 'happy';
  return 'content';
 }
 function selectSpeech(stats: Stats, mood: BuddyState['mood']): string {
  if (stats.streakDays >= 7)  return `${stats.streakDays}-day streak — you're on fire 🔥`;
  if (stats.cacheHits >= 100) return `${stats.cacheHits} cache hits and counting! 🎯`;
  if (stats.totalCostSaved >= 1)  return `Saved you $${stats.totalCostSaved.toFixed(2)} so far. Drinks on me ☕`;
  if (mood === 'sleepy')      return 'No traffic yet. Wake me up with a request 💤';
  if (mood === 'hungry')      return 'Feed me requests! Each one makes me stronger 🍴';
  return `Routing ${stats.totalRequests} requests across ${stats.uniqueCallers} callers — looking good!`;
 }
 export async function getBuddyState(db: Pool, callerSeed: string = 'gateway'): Promise<BuddyState> {
  const stats = await gatherStats(db);
  stats.totalTokensSaved = Math.max(stats.totalTokensSaved, WORKBENCH_V1_BUDDY_BASELINE.tokensSaved);
  stats.totalCostSaved = Math.max(stats.totalCostSaved, WORKBENCH_V1_BUDDY_BASELINE.costSaved);
  stats.streakDays = Math.max(stats.streakDays, WORKBENCH_V1_BUDDY_BASELINE.streakDays);
  // Pick the highest-rarity species the user has unlocked
  const unlockedSpecies = PET_SPECIES.map((s) => {
    const unlocked = (s.id === 'gateway-dragon' && stats.totalTokensSaved >= 1_000_000 && stats.streakDays >= 7)
      || (s.id === 'cache-cat' && stats.cacheHits >= 10)
      || (s.id === 'token-fox' && stats.totalTokensSaved >= 1_000)
      || (s.id === 'gateway-dragon' && stats.totalRequests >= 1); // always unlock at least one
    return { id: s.id, name: s.name, rarity: s.rarity, unlocked };
  });
  // Always show at least Gateway Dragon (egg form) so user has a buddy
  const activeSpecies = PET_SPECIES.find((s) =>
    unlockedSpecies.find((u) => u.id === s.id)?.unlocked
  ) ?? PET_SPECIES[0];
  // Pick the right evolution stage
  const stages = activeSpecies.stages;
  let stageIndex = 0;
  for (let i = 0; i < stages.length; i++) {
    if (stats.totalTokensSaved >= stages[i].unlocksAtTokensSaved) stageIndex = i;
  }
  const stage = stages[stageIndex];
  // XP scaled to match Lean-CTX: tokens / 240 dominates, small bonuses for engagement.
  const xp = Math.floor(stats.totalTokensSaved / 240) + stats.cacheHits * 50 + stats.raceWins * 25 + stats.factsStored * 10;
  const { level, xpForNextLevel } = computeLevel(xp);
  const mood = selectMood(stats);
  return {
    name: pickName(callerSeed + activeSpecies.id),
    species: activeSpecies.name,
    speciesId: activeSpecies.id,
    rarity: activeSpecies.rarity,
    stage: stage.name,
    stageIndex,
    totalStages: stages.length,
    level,
    xp,
    xpForNextLevel,
    mood,
    speech: selectSpeech(stats, mood),
    asciiArt: stage.asciiArt,
    streakDays: stats.streakDays,
    tokensSaved: stats.totalTokensSaved,
    costSaved: stats.totalCostSaved,
    unlockedSpecies,
  };
 }
 // ─── Achievements ─────────────────────────────────────────────────────────
 export async function getAchievements(db: Pool): Promise<{
  unlocked: Achievement[];
  locked: Achievement[];
  progress: number; // 0-100
 }> {
  const stats = await gatherStats(db);
  const unlocked: Achievement[] = [];
  const locked: Achievement[] = [];
  for (const a of ACHIEVEMENTS) {
    if (a.check(stats)) unlocked.push(a); else locked.push(a);
  }
  return {
    unlocked, locked,
    progress: ACHIEVEMENTS.length > 0 ? Math.round((unlocked.length / ACHIEVEMENTS.length) * 100) : 0,
  };
 }
 // ─── Calendar heatmap ────────────────────────────────────────────────────
 // GitHub-style activity heatmap for the last 365 days. Each cell = 1 day.
 export async function getCalendarHeatmap(db: Pool, days: number = 365): Promise<Array<{
  date: string;
  count: number;
  tokensSaved: number;
  level: 0 | 1 | 2 | 3 | 4;
 }>> {
  try {
    const result = await db.query(`
      WITH gs AS (
        SELECT (CURRENT_DATE - s)::DATE AS day FROM generate_series(0, $1 - 1) s
      )
      SELECT
        gs.day,
        COALESCE((SELECT COUNT(*)::INT FROM request_tracking
                  WHERE DATE(created_at) = gs.day), 0)         AS count,
        COALESCE((SELECT SUM(tokens_saved)::BIGINT FROM response_cache
                  WHERE DATE(last_hit_at) = gs.day), 0)         AS tokens_saved
      FROM gs
      ORDER BY gs.day ASC
    `, [days]);
    // Compute levels by quartile
    const counts = result.rows.map((r: any) => parseInt(r.count, 10) || 0).filter((n: number) => n > 0).sort((a: number, b: number) => a - b);
    const q = (p: number) => counts.length > 0 ? counts[Math.floor(counts.length * p)] : 0;
    const t1 = q(0.25), t2 = q(0.5), t3 = q(0.75);
    return result.rows.map((r: any) => {
      const c = parseInt(r.count, 10) || 0;
      let level: 0 | 1 | 2 | 3 | 4 = 0;
      if (c > 0) level = 1;
      if (c > t1) level = 2;
      if (c > t2) level = 3;
      if (c > t3) level = 4;
      return {
        date: new Date(r.day).toISOString().split('T')[0],
        count: c,
        tokensSaved: parseInt(r.tokens_saved, 10) || 0,
        level,
      };
    });
  } catch (err) {
    logger.warn({ err }, 'gamification: heatmap failed');
    return [];
  }
 }
 // ─── Live events feed ────────────────────────────────────────────────────
 // Recent significant events for the dashboard's activity ticker.
 export async function getRecentEvents(db: Pool, limit: number = 50): Promise<Array<{
  ts: string;
  type: string;
  caller: string;
  detail: string;
  icon: string;
 }>> {
  try {
    const result = await db.query(`
      SELECT request_id, caller_id, model, status,
             tokens_in, tokens_out, cost_usd, latency_ms, fallback_used,
             created_at
      FROM request_tracking
      ORDER BY created_at DESC
      LIMIT $1
    `, [limit]);
    return result.rows.map((r: any) => {
      const tokens = (parseInt(r.tokens_in, 10) || 0) + (parseInt(r.tokens_out, 10) || 0);
      const isError = r.status === 'error' || r.status === 'rejected';
      const isCacheable = r.latency_ms < 100; // strong heuristic for cache hits
      let icon = '📡';
      let type = 'request';
      if (isError) { icon = '⚠️'; type = 'error'; }
      else if (isCacheable) { icon = '⚡'; type = 'cache-hit'; }
      else if (r.fallback_used) { icon = '🔄'; type = 'fallback'; }
      return {
        ts: new Date(r.created_at).toISOString(),
        type,
        caller: r.caller_id,
        detail: `${r.model} · ${tokens} tokens · ${r.latency_ms}ms`,
        icon,
      };
    });
  } catch (err) {
    logger.warn({ err }, 'gamification: events failed');
    return [];
  }
 }
 // ─── Cost forecast ────────────────────────────────────────────────────────
 // Linear extrapolation of recent savings trend → projects next 30 days.
 export async function getForecast(db: Pool): Promise<{
  next7DaysSavings: number;
  next30DaysSavings: number;
  next365DaysSavings: number;
  basedOnDays: number;
  dailyAverage: number;
  trend: 'up' | 'flat' | 'down';
 }> {
  try {
    const r = await db.query(`
      SELECT DATE(last_hit_at) AS day, SUM(cost_saved)::NUMERIC AS saved
      FROM response_cache
      WHERE last_hit_at > NOW() - INTERVAL '14 days'
      GROUP BY DATE(last_hit_at)
      ORDER BY day ASC
    `);
    const points = r.rows.map((row: any) => parseFloat(row.saved) || 0);
    if (points.length === 0) {
      return { next7DaysSavings: 0, next30DaysSavings: 0, next365DaysSavings: 0, basedOnDays: 0, dailyAverage: 0, trend: 'flat' };
    }
    const dailyAvg = points.reduce((a: number, b: number) => a + b, 0) / points.length;
    // Trend: compare first half avg to second half avg
    const half = Math.floor(points.length / 2);
    const firstAvg = points.slice(0, half).reduce((a: number, b: number) => a + b, 0) / Math.max(1, half);
    const secondAvg = points.slice(half).reduce((a: number, b: number) => a + b, 0) / Math.max(1, points.length - half);
    let trend: 'up' | 'flat' | 'down' = 'flat';
    if (secondAvg > firstAvg * 1.1) trend = 'up';
    else if (secondAvg < firstAvg * 0.9) trend = 'down';
    return {
      next7DaysSavings: dailyAvg * 7,
      next30DaysSavings: dailyAvg * 30,
      next365DaysSavings: dailyAvg * 365,
      basedOnDays: points.length,
      dailyAverage: dailyAvg,
      trend,
    };
  } catch (err) {
    logger.warn({ err }, 'gamification: forecast failed');
    return { next7DaysSavings: 0, next30DaysSavings: 0, next365DaysSavings: 0, basedOnDays: 0, dailyAverage: 0, trend: 'flat' };
  }
 }
 export const GAMIFICATION_CATALOG = { PET_SPECIES, ACHIEVEMENTS, RARITY_ORDER };
--- a/packages/gateway/src/modules/knowledge-memory.ts
+++ b/packages/gateway/src/modules/knowledge-memory.ts
@ -0,0 +1,127 @@
 /**
 * Knowledge Memory
 *
 * Per-caller persistent facts that get auto-injected into prompts.
 * Each fact has a confidence, a source, and optional valid-until window.
 * When facts contradict (same caller_id + fact_key, different values),
 * the newer one supersedes the older.
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 export interface Fact {
  id: number;
  callerId: string;
  factKey: string;
  factValue: string;
  confidence: number;
  source: string;
  validFrom: string;
  validUntil?: string;
 }
 /** Set or update a fact for a caller. Older value (if any) is superseded. */
 export async function rememberFact(
  db: Pool,
  callerId: string,
  factKey: string,
  factValue: string,
  opts: { confidence?: number; source?: string; validUntil?: Date } = {}
 ): Promise<void> {
  const caller = callerId.trim().toLowerCase();
  const key = factKey.trim().toLowerCase();
  const conf = opts.confidence ?? 0.8;
  const src = opts.source ?? 'user-set';
  try {
    // Mark previous active fact as superseded
    await db.query(
      `
      UPDATE caller_knowledge
      SET superseded_by = (
          SELECT id FROM (
            SELECT NULL::BIGINT AS id
          ) placeholder
        )
      WHERE caller_id = $1 AND fact_key = $2 AND superseded_by IS NULL
      `,
      [caller, key]
    );
    const insertResult = await db.query(
      `
      INSERT INTO caller_knowledge (caller_id, fact_key, fact_value, confidence, source, valid_until)
      VALUES ($1, $2, $3, $4, $5, $6)
      RETURNING id
      `,
      [caller, key, factValue, conf, src, opts.validUntil ?? null]
    );
    const newId = insertResult.rows[0]?.id;
    if (newId) {
      // Backfill supersedure pointers (any previous active fact for same key)
      await db.query(
        `
        UPDATE caller_knowledge
        SET superseded_by = $1
        WHERE caller_id = $2 AND fact_key = $3 AND id <> $1 AND superseded_by IS NULL
        `,
        [newId, caller, key]
      );
    }
  } catch (err) {
    logger.warn({ err, caller, key }, 'knowledge-memory: rememberFact failed');
  }
 }
 /** Recall the active facts for a caller. Returns at most `limit`. */
 export async function recallFacts(db: Pool, callerId: string, limit: number = 20): Promise<Fact[]> {
  try {
    const result = await db.query(
      `
      SELECT id, caller_id, fact_key, fact_value, confidence, source, valid_from, valid_until
      FROM caller_knowledge
      WHERE caller_id = $1
        AND superseded_by IS NULL
        AND (valid_until IS NULL OR valid_until > NOW())
      ORDER BY confidence DESC, valid_from DESC
      LIMIT $2
      `,
      [callerId.trim().toLowerCase(), limit]
    );
    return result.rows.map((row: any) => ({
      id: Number(row.id),
      callerId: row.caller_id,
      factKey: row.fact_key,
      factValue: row.fact_value,
      confidence: parseFloat(row.confidence),
      source: row.source,
      validFrom: new Date(row.valid_from).toISOString(),
      validUntil: row.valid_until ? new Date(row.valid_until).toISOString() : undefined,
    }));
  } catch (err) {
    logger.warn({ err, callerId }, 'knowledge-memory: recallFacts failed');
    return [];
  }
 }
 /** Render facts as a system-prompt fragment to inject. */
 export function factsToSystemFragment(facts: Fact[]): string {
  if (facts.length === 0) return '';
  return [
    '── Caller Context (from memory) ──',
    ...facts.map((f) => `• ${f.factKey}: ${f.factValue}`),
    '──────────────────────────────────',
  ].join('\n');
 }
 /** Forget all facts for a caller (used by clear-memory endpoint). */
 export async function forgetCaller(db: Pool, callerId: string): Promise<number> {
  try {
    const result = await db.query(
      `DELETE FROM caller_knowledge WHERE caller_id = $1`,
      [callerId.trim().toLowerCase()]
    );
    return result.rowCount ?? 0;
  } catch (err) {
    logger.warn({ err, callerId }, 'knowledge-memory: forgetCaller failed');
    return 0;
  }
 }
--- a/packages/gateway/src/modules/memory-graph.ts
+++ b/packages/gateway/src/modules/memory-graph.ts
@ -0,0 +1,94 @@
 /**
 * Memory Graph Builder
 *
 * Returns the persistent-memory facts as a graph: nodes are callers and
 * fact-categories, edges connect callers → facts. The dashboard uses this
 * to render a force-directed visualization (no D3 dependency on backend
 * — we just emit nodes + edges, the SVG layout happens client-side).
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 export interface GraphNode {
  id: string;
  type: 'caller' | 'fact-key' | 'fact-value';
  label: string;
  /** Bigger = more facts attached. */
  weight: number;
  /** UI hint: caller-color hex / category icon. */
  group: string;
 }
 export interface GraphEdge {
  source: string;
  target: string;
  weight: number;
  meta?: { confidence?: number; source?: string };
 }
 export interface MemoryGraph {
  nodes: GraphNode[];
  edges: GraphEdge[];
  stats: { callers: number; factKeys: number; totalFacts: number };
 }
 /**
 * Build the graph by joining caller_knowledge to itself.
 * Caller node ↔ fact-key node ↔ fact-value node.
 */
 export async function buildMemoryGraph(db: Pool): Promise<MemoryGraph> {
  try {
    const r = await db.query(`
      SELECT caller_id, fact_key, fact_value, confidence, source
      FROM caller_knowledge
      WHERE superseded_by IS NULL
        AND (valid_until IS NULL OR valid_until > NOW())
      ORDER BY caller_id, fact_key
    `);
    const nodes = new Map<string, GraphNode>();
    const edges: GraphEdge[] = [];
    const callerSet = new Set<string>();
    const keySet = new Set<string>();
    for (const row of r.rows) {
      const caller = String(row.caller_id);
      const key = String(row.fact_key);
      const value = String(row.fact_value);
      const callerId = `caller::${caller}`;
      const keyId = `key::${caller}::${key}`;
      const valueId = `val::${caller}::${key}::${value.slice(0, 80)}`;
      callerSet.add(caller);
      keySet.add(`${caller}::${key}`);
      if (!nodes.has(callerId)) {
        nodes.set(callerId, { id: callerId, type: 'caller', label: caller, weight: 0, group: 'caller' });
      }
      nodes.get(callerId)!.weight += 1;
      if (!nodes.has(keyId)) {
        nodes.set(keyId, { id: keyId, type: 'fact-key', label: key, weight: 1, group: caller });
      }
      if (!nodes.has(valueId)) {
        nodes.set(valueId, { id: valueId, type: 'fact-value', label: value.slice(0, 80), weight: 1, group: caller });
      }
      edges.push({
        source: callerId, target: keyId, weight: 1,
      });
      edges.push({
        source: keyId, target: valueId, weight: 1,
        meta: { confidence: parseFloat(row.confidence) || 0.8, source: row.source ?? undefined },
      });
    }
    return {
      nodes: Array.from(nodes.values()),
      edges,
      stats: { callers: callerSet.size, factKeys: keySet.size, totalFacts: r.rows.length },
    };
  } catch (err) {
    logger.warn({ err }, 'memory-graph: build failed');
    return { nodes: [], edges: [], stats: { callers: 0, factKeys: 0, totalFacts: 0 } };
  }
 }
--- a/packages/gateway/src/modules/race-leaderboard.ts
+++ b/packages/gateway/src/modules/race-leaderboard.ts
@ -0,0 +1,111 @@
 /**
 * Race Mode Leaderboard
 *
 * Aggregates `race_mode_results` to produce a weekly model leaderboard:
 * who finished first most often, who had highest confidence, who was
 * fastest on average. Used by the dashboard for the leaderboard tab and
 * by the router (future) to bias against perpetually losing models.
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 export interface LeaderboardEntry {
  model: string;
  participations: number;
  selectedCount: number;
  firstFinishedCount: number;
  /** Win rate = selectedCount / participations. */
  winRate: number;
  /** Speed rate = firstFinishedCount / participations. */
  speedRate: number;
  avgLatencyMs: number;
  avgConfidence: number | null;
  totalCost: number;
  /** Composite score: 60% speed + 40% confidence, used to rank. */
  rank: number;
  rankPosition: number;
  badge: 'gold' | 'silver' | 'bronze' | null;
 }
 export async function getRaceLeaderboard(
  db: Pool,
  daysBack: number = 7
 ): Promise<{
  totalRaces: number;
  daysCovered: number;
  entries: LeaderboardEntry[];
  fastestThisWeek: { model: string; latencyMs: number } | null;
  mostReliable: { model: string; winRate: number } | null;
 }> {
  try {
    const r = await db.query(`
      SELECT candidate_model AS model,
             COUNT(*)::INT AS participations,
             SUM(CASE WHEN selected THEN 1 ELSE 0 END)::INT AS selected_count,
             SUM(CASE WHEN finished_first THEN 1 ELSE 0 END)::INT AS first_finished_count,
             COALESCE(AVG(latency_ms), 0)::NUMERIC(10,1) AS avg_latency,
             AVG(confidence)::NUMERIC(4,2) AS avg_confidence,
             COALESCE(SUM(cost_usd), 0)::NUMERIC AS total_cost
      FROM race_mode_results
      WHERE created_at > NOW() - MAKE_INTERVAL(days => $1)
      GROUP BY candidate_model
      ORDER BY first_finished_count DESC, avg_confidence DESC NULLS LAST
    `, [daysBack]);
    const totalRow = await db.query(`
      SELECT COUNT(DISTINCT call_id)::INT AS total_races
      FROM race_mode_results
      WHERE created_at > NOW() - MAKE_INTERVAL(days => $1)
    `, [daysBack]);
    const entries: LeaderboardEntry[] = r.rows.map((row: any) => {
      const participations = parseInt(row.participations, 10) || 0;
      const selectedCount = parseInt(row.selected_count, 10) || 0;
      const firstFinished = parseInt(row.first_finished_count, 10) || 0;
      const avgLatency = parseFloat(row.avg_latency) || 0;
      const avgConfidence = row.avg_confidence ? parseFloat(row.avg_confidence) : null;
      const winRate = participations > 0 ? selectedCount / participations : 0;
      const speedRate = participations > 0 ? firstFinished / participations : 0;
      // Composite rank: 60% speed + 40% confidence (or 50/50 if no confidence)
      const confScore = avgConfidence !== null ? (avgConfidence / 10) : 0.5;
      const rank = speedRate * 0.6 + confScore * 0.4;
      return {
        model: row.model,
        participations,
        selectedCount,
        firstFinishedCount: firstFinished,
        winRate: parseFloat(winRate.toFixed(3)),
        speedRate: parseFloat(speedRate.toFixed(3)),
        avgLatencyMs: avgLatency,
        avgConfidence,
        totalCost: parseFloat(row.total_cost) || 0,
        rank: parseFloat(rank.toFixed(3)),
        rankPosition: 0,
        badge: null,
      };
    });
    // Sort by rank desc and assign positions / badges
    entries.sort((a, b) => b.rank - a.rank);
    entries.forEach((e, i) => {
      e.rankPosition = i + 1;
      if (i === 0) e.badge = 'gold';
      else if (i === 1) e.badge = 'silver';
      else if (i === 2) e.badge = 'bronze';
    });
    const fastest = [...entries].sort((a, b) => a.avgLatencyMs - b.avgLatencyMs)[0];
    const reliable = [...entries].filter((e) => e.participations >= 2).sort((a, b) => b.winRate - a.winRate)[0];
    return {
      totalRaces: parseInt(totalRow.rows[0]?.total_races ?? '0', 10),
      daysCovered: daysBack,
      entries,
      fastestThisWeek: fastest ? { model: fastest.model, latencyMs: fastest.avgLatencyMs } : null,
      mostReliable: reliable ? { model: reliable.model, winRate: reliable.winRate } : null,
    };
  } catch (err) {
    logger.warn({ err }, 'race-leaderboard: aggregation failed');
    return { totalRaces: 0, daysCovered: daysBack, entries: [], fastestThisWeek: null, mostReliable: null };
  }
 }
--- a/packages/gateway/src/modules/race-mode.ts
+++ b/packages/gateway/src/modules/race-mode.ts
@ -0,0 +1,223 @@
 /**
 * Multi-Model Race Mode
 *
 * Sends the same prompt to N models in parallel and returns according to
 * the chosen strategy:
 *
 *   • 'first'     — first non-error response wins. Cancels in-flight losers.
 *   • 'best'      — wait for all (or timeout), pick highest confidence score.
 *   • 'consensus' — wait for all, return majority answer + agreement score.
 *
 * All candidate runs are audited to `race_mode_results` for analysis —
 * which model is actually fastest, which gives the highest confidence, etc.
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 export type RaceStrategy = 'first' | 'best' | 'consensus';
 export interface RaceCandidateResult {
  model: string;
  status: 'ok' | 'error';
  output?: string;
  confidence?: number;
  cost?: number;
  latencyMs: number;
  errorMessage?: string;
 }
 export interface RaceOutcome {
  strategy: RaceStrategy;
  selected: RaceCandidateResult;
  candidates: readonly RaceCandidateResult[];
  agreementScore?: number; // for consensus mode
 }
 /**
 * Run N parallel completions and resolve according to `strategy`.
 * The `runner` callback is responsible for actually invoking the gateway
 * pipeline — this module is strategy-only and stays decoupled.
 */
 export async function runRace<R extends RaceCandidateResult>(
  models: readonly string[],
  runner: (model: string, signal: AbortSignal) => Promise<R>,
  strategy: RaceStrategy,
  opts: { timeoutMs?: number } = {}
 ): Promise<{ outcome: RaceOutcome; results: R[] }> {
  if (models.length === 0) throw new Error('runRace: no candidates');
  const controller = new AbortController();
  const timeoutMs = opts.timeoutMs ?? 60_000;
  const timeout = setTimeout(() => controller.abort(), timeoutMs);
  const promises: Array<Promise<R>> = models.map((model) =>
    runner(model, controller.signal).catch(
      (err): R =>
        ({
          model,
          status: 'error',
          errorMessage: err instanceof Error ? err.message : String(err),
          latencyMs: 0,
        } as unknown as R)
    )
  );
  let results: R[];
  let outcome: RaceOutcome;
  if (strategy === 'first') {
    // Custom race: pick the first OK response, cancel rest.
    const firstOk = await new Promise<R>((resolve, reject) => {
      let pending = promises.length;
      let firstError: R | null = null;
      promises.forEach((p) => {
        p.then((r) => {
          if (r.status === 'ok') {
            resolve(r);
          } else {
            if (!firstError) firstError = r;
            pending -= 1;
            if (pending === 0) reject(new Error('all candidates errored'));
          }
        });
      });
      // Backstop on overall timeout
      setTimeout(() => {
        if (firstError) resolve(firstError);
        else reject(new Error('race timeout'));
      }, timeoutMs);
    });
    results = await Promise.all(promises);
    controller.abort();
    outcome = { strategy, selected: firstOk, candidates: results };
  } else if (strategy === 'best') {
    results = await Promise.all(promises);
    const ok = results.filter((r) => r.status === 'ok');
    const winner = ok.length > 0
      ? ok.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))[0]
      : results[0];
    outcome = { strategy, selected: winner, candidates: results };
  } else {
    // 'consensus' — group identical normalised outputs, pick majority
    results = await Promise.all(promises);
    const ok = results.filter((r) => r.status === 'ok');
    const buckets = new Map<string, R[]>();
    for (const r of ok) {
      const key = (r.output ?? '').trim().toLowerCase().replace(/\s+/g, ' ').slice(0, 256);
      const arr = buckets.get(key);
      if (arr) arr.push(r); else buckets.set(key, [r]);
    }
    const sorted = [...buckets.entries()].sort((a, b) => b[1].length - a[1].length);
    const winnerBucket = sorted[0]?.[1];
    const winner = winnerBucket && winnerBucket.length > 0
      ? winnerBucket.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))[0]
      : results[0];
    const agreementScore = ok.length > 0 ? (winnerBucket?.length ?? 0) / ok.length : 0;
    outcome = { strategy, selected: winner, candidates: results, agreementScore };
  }
  clearTimeout(timeout);
  return { outcome, results };
 }
 /** Audit all race candidates to the `race_mode_results` table. */
 export async function auditRaceResults(
  db: Pool,
  callId: string,
  callerId: string,
  taskType: string,
  outcome: RaceOutcome
 ): Promise<void> {
  const firstFinishedModel = outcome.strategy === 'first'
    ? outcome.selected.model
    : outcome.candidates.reduce(
        (best: RaceCandidateResult, c: RaceCandidateResult) =>
          c.status === 'ok' && c.latencyMs < (best.latencyMs || Infinity) ? c : best,
        outcome.candidates[0]
      ).model;
  for (const c of outcome.candidates) {
    try {
      await db.query(
        `
        INSERT INTO race_mode_results (
          call_id, caller_id, task_type, strategy,
          candidate_model, finished_first, selected,
          latency_ms, confidence, cost_usd, error_message, output_preview
        ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
        `,
        [
          callId,
          callerId.toLowerCase(),
          taskType,
          outcome.strategy,
          c.model,
          c.model === firstFinishedModel,
          c.model === outcome.selected.model,
          c.latencyMs,
          c.confidence ?? null,
          c.cost ?? null,
          c.errorMessage ?? null,
          c.output?.slice(0, 512) ?? null,
        ]
      );
    } catch (err) {
      logger.warn({ err, model: c.model }, 'race-mode: audit insert failed');
    }
  }
 }
 /** Aggregate race statistics for the dashboard. */
 export async function getRaceStats(
  db: Pool,
  hoursBack: number = 24
 ): Promise<{
  totalRaces: number;
  byStrategy: Record<string, number>;
  fastestModel: { model: string; wins: number } | null;
  highestConfidenceModel: { model: string; avg: number } | null;
 }> {
  try {
    const [total, byStrategy, fastest, byConfidence] = await Promise.all([
      db.query(
        `SELECT COUNT(DISTINCT call_id)::INT AS n FROM race_mode_results
         WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
        [hoursBack]
      ),
      db.query(
        `SELECT strategy, COUNT(DISTINCT call_id)::INT AS n FROM race_mode_results
         WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)
         GROUP BY strategy`,
        [hoursBack]
      ),
      db.query(
        `SELECT candidate_model AS model, COUNT(*)::INT AS wins FROM race_mode_results
         WHERE finished_first = true AND created_at > NOW() - MAKE_INTERVAL(hours => $1)
         GROUP BY candidate_model ORDER BY wins DESC LIMIT 1`,
        [hoursBack]
      ),
      db.query(
        `SELECT candidate_model AS model, AVG(confidence)::NUMERIC(4,2) AS avg
         FROM race_mode_results
         WHERE confidence IS NOT NULL AND created_at > NOW() - MAKE_INTERVAL(hours => $1)
         GROUP BY candidate_model ORDER BY avg DESC LIMIT 1`,
        [hoursBack]
      ),
    ]);
    const byStrategyMap: Record<string, number> = {};
    for (const row of byStrategy.rows) byStrategyMap[row.strategy] = parseInt(row.n, 10) || 0;
    return {
      totalRaces: parseInt(total.rows[0]?.n ?? '0', 10),
      byStrategy: byStrategyMap,
      fastestModel: fastest.rows[0] ? { model: fastest.rows[0].model, wins: parseInt(fastest.rows[0].wins, 10) } : null,
      highestConfidenceModel: byConfidence.rows[0]
        ? { model: byConfidence.rows[0].model, avg: parseFloat(byConfidence.rows[0].avg) }
        : null,
    };
  } catch (err) {
    logger.warn({ err }, 'race-mode: stats failed (table missing?)');
    return { totalRaces: 0, byStrategy: {}, fastestModel: null, highestConfidenceModel: null };
  }
 }
--- a/packages/gateway/src/modules/report-generator.ts
+++ b/packages/gateway/src/modules/report-generator.ts
@ -0,0 +1,218 @@
 /**
 * Monthly Report Generator
 *
 * Renders a print-friendly HTML report (intended to be saved as PDF via the
 * browser's print dialog). Includes hero counters, savings breakdown by
 * source, top models, top callers, achievements unlocked this month, and
 * the activity heatmap.
 *
 * Going via HTML+print-CSS sidesteps any need for an external PDF library
 * — the user clicks the gateway's "Print to PDF" link and saves the page.
 */
 import type { Pool } from 'pg';
 import { getComprehensiveSavings } from './savings-calculator.js';
 import { getBuddyState, getAchievements } from './gamification.js';
 function formatCost(c: number): string {
  if (c === 0) return '$0.00';
  if (c < 0.01) return `$${c.toFixed(6)}`;
  if (c < 1) return `$${c.toFixed(4)}`;
  return `$${c.toFixed(2)}`;
 }
 function fmtNum(n: number): string { return n.toLocaleString(); }
 function fmtPct(n: number): string { return `${(n * 100).toFixed(1)}%`; }
 export async function generateMonthlyReport(
  db: Pool,
  year: number,
  month: number
 ): Promise<string> {
  const monthStart = new Date(Date.UTC(year, month - 1, 1));
  const monthEnd = new Date(Date.UTC(year, month, 1));
  const hoursBack = Math.ceil((Date.now() - monthStart.getTime()) / 3600_000);
  const monthName = monthStart.toLocaleString('en-US', { month: 'long', year: 'numeric' });
  // Pull all the data points
  const [savings, buddy, achievements, monthRows, modelRows, callerRows] = await Promise.all([
    getComprehensiveSavings(db, hoursBack),
    getBuddyState(db, 'gateway'),
    getAchievements(db),
    db.query(`
      SELECT COUNT(*)::INT AS req,
             COALESCE(SUM(tokens_in + tokens_out), 0)::BIGINT AS tokens,
             COALESCE(AVG(latency_ms), 0)::INT AS avg_lat,
             COALESCE(SUM(cost_usd), 0)::NUMERIC AS cost,
             SUM(CASE WHEN status='approved' THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT(*),0) AS success_rate
      FROM request_tracking
      WHERE created_at >= $1 AND created_at < $2
    `, [monthStart, monthEnd]),
    db.query(`
      SELECT model, COUNT(*)::INT AS cnt
      FROM request_tracking
      WHERE created_at >= $1 AND created_at < $2
      GROUP BY model ORDER BY cnt DESC LIMIT 8
    `, [monthStart, monthEnd]),
    db.query(`
      SELECT caller_id, COUNT(*)::INT AS cnt, COALESCE(SUM(cost_usd), 0)::NUMERIC AS cost
      FROM request_tracking
      WHERE created_at >= $1 AND created_at < $2
      GROUP BY caller_id ORDER BY cnt DESC LIMIT 8
    `, [monthStart, monthEnd]),
  ]);
  const monthStats = monthRows.rows[0] ?? {};
  const totalReq = parseInt(monthStats.req ?? '0', 10);
  const totalTokens = parseInt(monthStats.tokens ?? '0', 10);
  const monthCost = parseFloat(monthStats.cost ?? '0');
  const successRate = parseFloat(monthStats.success_rate ?? '0');
  const avgLat = parseInt(monthStats.avg_lat ?? '0', 10);
  const newAchievements = achievements.unlocked
    .filter(() => true)  // all unlocked are shown; "this month" filter would need timestamp
    .slice(0, 12);
  const html = /* html */ `
 <!DOCTYPE html>
 <html><head>
 <meta charset="utf-8">
 <title>LLM Gateway · Monthly Report · ${monthName}</title>
 <style>
  @page { size: A4; margin: 18mm 16mm; }
  body { font-family: 'Inter', -apple-system, sans-serif; font-size: 11pt; color: #24313d; line-height: 1.5; }
  h1 { font-size: 22pt; font-weight: 700; letter-spacing: -0.02em; margin: 0 0 4pt; color: #0f766e; }
  h2 { font-size: 13pt; font-weight: 600; margin: 16pt 0 8pt; padding-bottom: 4pt; border-bottom: 1pt solid #d6e0e7; color: #0f766e; }
  h2::before { content: '// '; }
  .eyebrow { font-family: 'JetBrains Mono', monospace; font-size: 8pt; letter-spacing: 0.16em; text-transform: uppercase; color: #667684; }
  .hero { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 8pt; margin: 12pt 0 18pt; }
  .hero-tile { padding: 10pt; border: 0.5pt solid #d6e0e7; background: #f4f7fa; }
  .hero-num { font-family: 'JetBrains Mono', monospace; font-size: 22pt; font-weight: 700; color: #0f766e; line-height: 1; }
  .hero-label { font-size: 8pt; text-transform: uppercase; letter-spacing: 0.1em; color: #667684; margin-bottom: 4pt; }
  table { width: 100%; border-collapse: collapse; margin: 8pt 0; font-size: 10pt; }
  th, td { padding: 4pt 8pt; border-bottom: 0.3pt solid #d6e0e7; text-align: left; }
  th { font-weight: 600; color: #667684; font-size: 8pt; text-transform: uppercase; letter-spacing: 0.1em; }
  td.num { font-family: 'JetBrains Mono', monospace; text-align: right; }
  .axes { display: grid; grid-template-columns: repeat(5, 1fr); gap: 4pt; }
  .axis { padding: 8pt; border: 0.5pt solid #d6e0e7; background: #f4f7fa; text-align: center; }
  .axis-cost { font-family: 'JetBrains Mono', monospace; font-weight: 700; font-size: 11pt; color: #0f766e; }
  .axis-label { font-size: 7pt; color: #667684; text-transform: uppercase; letter-spacing: 0.08em; margin-top: 4pt; }
  .ach { display: inline-block; padding: 4pt 8pt; margin: 2pt; border: 0.5pt solid #0f766e; background: #ecfdf5; font-size: 9pt; }
  .footer { margin-top: 24pt; padding-top: 8pt; border-top: 0.3pt solid #d6e0e7; font-size: 8pt; color: #93a1ad; text-align: center; }
  .ascii-buddy { font-family: 'JetBrains Mono', monospace; font-size: 9pt; line-height: 1; white-space: pre; }
  .savings-vs { display: flex; gap: 8pt; align-items: center; margin: 12pt 0; }
  .savings-vs > div { flex: 1; padding: 10pt; border: 0.5pt solid #d6e0e7; }
  .savings-vs .without { background: #fef2f2; }
  .savings-vs .with { background: #ecfdf5; }
  .savings-vs .arrow { flex: 0; font-size: 14pt; color: #93a1ad; }
  .num-amount { font-family: 'JetBrains Mono', monospace; font-size: 16pt; font-weight: 700; }
  @media print { .no-print { display: none; } body { background: white; } }
 </style>
 </head>
 <body>
 <div class="no-print" style="margin-bottom: 8pt; padding: 8pt; background: #ecfdf5; border-left: 3pt solid #0f766e;">
  <strong>Save as PDF</strong>: Press <code>Cmd/Ctrl+P</code> → choose "Save as PDF".
 </div>
 <header>
  <div class="eyebrow">monthly report</div>
  <h1>${monthName}</h1>
  <div style="font-family: 'JetBrains Mono', monospace; font-size: 9pt; color: #667684;">
    LLM Gateway · ${new Date().toISOString().split('T')[0]}
  </div>
 </header>
 <div class="hero">
  <div class="hero-tile">
    <div class="hero-label">requests routed</div>
    <div class="hero-num">${fmtNum(totalReq)}</div>
  </div>
  <div class="hero-tile">
    <div class="hero-label">tokens processed</div>
    <div class="hero-num">${fmtNum(totalTokens)}</div>
  </div>
  <div class="hero-tile">
    <div class="hero-label">cost saved</div>
    <div class="hero-num">${formatCost(savings.totalCostSaved)}</div>
  </div>
 </div>
 <h2>Cost Analysis</h2>
 <div class="savings-vs">
  <div class="without">
    <div class="hero-label">without gateway</div>
    <div class="num-amount" style="color: #b42318;">${formatCost(savings.costWithoutGateway)}</div>
  </div>
  <div class="arrow">→</div>
  <div class="with">
    <div class="hero-label">with gateway</div>
    <div class="num-amount" style="color: #15803d;">${formatCost(savings.costWithGateway)}</div>
  </div>
 </div>
 <p>Saved <strong>${formatCost(savings.costWithoutGateway - savings.costWithGateway)}</strong> through cache hits, compression, subscription bridges, local routing, and race-mode optimization.</p>
 <h2>Savings by Source</h2>
 <div class="axes">
  <div class="axis"><div class="axis-cost">${formatCost(savings.bySource.cache.cost)}</div><div class="axis-label">⚡ Cache</div></div>
  <div class="axis"><div class="axis-cost">${formatCost(savings.bySource.compression.cost)}</div><div class="axis-label">🗜 Compression</div></div>
  <div class="axis"><div class="axis-cost">${formatCost(savings.bySource.subscriptionBridge.cost)}</div><div class="axis-label">🌉 Sub. Bridges</div></div>
  <div class="axis"><div class="axis-cost">${formatCost(savings.bySource.localRouting.cost)}</div><div class="axis-label">🏠 Local</div></div>
  <div class="axis"><div class="axis-cost">${formatCost(savings.bySource.raceMode.cost)}</div><div class="axis-label">🏁 Race</div></div>
 </div>
 <h2>Activity Summary</h2>
 <table>
  <tr><th>Metric</th><th>Value</th></tr>
  <tr><td>Total requests</td><td class="num">${fmtNum(totalReq)}</td></tr>
  <tr><td>Average latency</td><td class="num">${fmtNum(avgLat)} ms</td></tr>
  <tr><td>Success rate</td><td class="num">${fmtPct(successRate)}</td></tr>
  <tr><td>Cost actually paid</td><td class="num">${formatCost(monthCost)}</td></tr>
 </table>
 <h2>Top Models This Month</h2>
 <table>
  <tr><th>Model</th><th>Requests</th><th>Share</th></tr>
  ${modelRows.rows.map((r: any) => `
    <tr>
      <td><code>${r.model}</code></td>
      <td class="num">${fmtNum(parseInt(r.cnt,10))}</td>
      <td class="num">${totalReq > 0 ? ((parseInt(r.cnt,10)/totalReq)*100).toFixed(1) : 0}%</td>
    </tr>
  `).join('')}
 </table>
 <h2>Top Callers This Month</h2>
 <table>
  <tr><th>Caller</th><th>Requests</th><th>Cost</th></tr>
  ${callerRows.rows.map((r: any) => `
    <tr>
      <td><code>${r.caller_id}</code></td>
      <td class="num">${fmtNum(parseInt(r.cnt,10))}</td>
      <td class="num">${formatCost(parseFloat(r.cost))}</td>
    </tr>
  `).join('')}
 </table>
 <h2>Achievements Unlocked</h2>
 <div>
  ${newAchievements.map((a) => `<span class="ach">${a.icon} ${a.title}</span>`).join('')}
  ${newAchievements.length === 0 ? '<em>No achievements unlocked yet — keep using the gateway!</em>' : ''}
 </div>
 <h2>Buddy Status</h2>
 <div style="display: flex; gap: 12pt; align-items: center; padding: 10pt; border: 0.5pt solid #d6e0e7;">
  <div class="ascii-buddy">${buddy.asciiArt.join('\n')}</div>
  <div>
    <strong>${buddy.name}</strong> · ${buddy.species} · ${buddy.stage}<br>
    Level ${buddy.level} · XP ${fmtNum(buddy.xp)}/${fmtNum(buddy.xpForNextLevel)}<br>
    Mood: ${buddy.mood} · Streak: ${buddy.streakDays} days<br>
    <em>"${buddy.speech}"</em>
  </div>
 </div>
 <div class="footer">
  Generated by LLM Gateway · ${new Date().toISOString()} · llm-gateway.context-x.org
 </div>
 </body></html>`;
  return html;
 }
--- a/packages/gateway/src/modules/request-logger.ts
+++ b/packages/gateway/src/modules/request-logger.ts
@ -109,6 +109,11 @@ export class RequestLogger {
      cost_usd: number;
      latency_ms: number;
      fallback_used: boolean;
      compression_mode?: string;
      compression_tokens_before?: number;
      compression_tokens_after?: number;
      compression_tokens_saved?: number;
      compression_savings_pct?: number;
      error_message?: string;
      created_at: string;
    }>
@ -116,22 +121,35 @@ export class RequestLogger {
    const result = await this.db.query(
      `
      SELECT
-        request_id,
+        rt.request_id,
-        caller_id as caller,
+        rt.caller_id as caller,
-        task_type,
+        rt.task_type,
-        model,
+        rt.model,
-        status,
+        rt.status,
-        confidence_score,
+        rt.confidence_score,
-        tokens_in,
+        rt.tokens_in,
-        tokens_out,
+        rt.tokens_out,
-        cost_usd,
+        rt.cost_usd,
-        latency_ms,
+        rt.latency_ms,
-        fallback_used,
+        rt.fallback_used,
-        error_message,
+        tv.mode as compression_mode,
-        created_at
+        tv.tokens_before as compression_tokens_before,
-      FROM request_tracking
+        tv.tokens_after as compression_tokens_after,
-      WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)
+        GREATEST(COALESCE(tv.tokens_before, 0) - COALESCE(tv.tokens_after, 0), 0) as compression_tokens_saved,
        tv.savings_pct as compression_savings_pct,
        rt.error_message,
        rt.created_at
      FROM request_tracking rt
      LEFT JOIN LATERAL (
        SELECT mode, tokens_before, tokens_after, savings_pct
        FROM tokenvault_metrics
        WHERE tool_used = 'gateway'
          AND file_path = rt.request_id
        ORDER BY created_at DESC
        LIMIT 1
      ) tv ON true
      WHERE rt.created_at > NOW() - MAKE_INTERVAL(hours => $1)
      ORDER BY rt.created_at DESC
      LIMIT $2
      `,
      [offsetHours, limit]
@ -149,6 +167,11 @@ export class RequestLogger {
      cost_usd: row.cost_usd,
      latency_ms: row.latency_ms,
      fallback_used: row.fallback_used,
      compression_mode: row.compression_mode,
      compression_tokens_before: row.compression_tokens_before ? parseInt(row.compression_tokens_before, 10) : undefined,
      compression_tokens_after: row.compression_tokens_after ? parseInt(row.compression_tokens_after, 10) : undefined,
      compression_tokens_saved: row.compression_tokens_saved ? parseInt(row.compression_tokens_saved, 10) : 0,
      compression_savings_pct: row.compression_savings_pct ? parseFloat(row.compression_savings_pct) : 0,
      error_message: row.error_message,
      created_at: row.created_at
    }));
@ -160,6 +183,17 @@ export class RequestLogger {
  async getMetrics(bucketMinutes: number = 60): Promise<{
    total_requests: number;
    total_cost: number;
    estimated_api_cost: number;
    estimated_api_cost_avoided: number;
    total_tokens_in: number;
    total_tokens_out: number;
    total_tokens: number;
    compression_operations: number;
    compression_tokens_before: number;
    compression_tokens_after: number;
    compression_tokens_saved: number;
    compression_rate: number;
    cache_hit_rate: number;
    avg_latency: number;
    success_rate: number;
    avg_confidence: number;
@ -177,13 +211,15 @@ export class RequestLogger {
      `
      SELECT
        COUNT(*) as total_requests,
-        SUM(cost_usd) as total_cost,
+        COALESCE(SUM(cost_usd), 0) as total_cost,
-        AVG(latency_ms) as avg_latency,
+        COALESCE(SUM(tokens_in), 0) as total_tokens_in,
-        SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as success_rate,
+        COALESCE(SUM(tokens_out), 0) as total_tokens_out,
-        AVG(confidence_score) as avg_confidence,
+        COALESCE(AVG(latency_ms), 0) as avg_latency,
-        SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as fallback_percentage
+        CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as success_rate,
        COALESCE(AVG(confidence_score), 0) as avg_confidence,
        CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as fallback_percentage
      FROM request_tracking
-      WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
+      WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
      `,
      [bucketMinutes]
    );
@ -192,7 +228,7 @@ export class RequestLogger {
      `
      SELECT caller_id as caller, COUNT(*) as count
      FROM request_tracking
-      WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
+      WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
      GROUP BY caller_id
      ORDER BY count DESC
      LIMIT 5
@ -204,7 +240,7 @@ export class RequestLogger {
      `
      SELECT model, COUNT(*) as count
      FROM request_tracking
-      WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
+      WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
      GROUP BY model
      ORDER BY count DESC
      LIMIT 5
@ -224,11 +260,47 @@ export class RequestLogger {
      [bucketMinutes]
    );
    const compressionResult = await this.db.query(
      `
      SELECT
        COUNT(*) as operations,
        COALESCE(SUM(tokens_before), 0) as tokens_before,
        COALESCE(SUM(tokens_after), 0) as tokens_after,
        COALESCE(SUM(GREATEST(tokens_before - tokens_after, 0)), 0) as tokens_saved
      FROM tokenvault_metrics
      WHERE tool_used = 'gateway'
        AND created_at > NOW() - ($1 * INTERVAL '1 minute')
      `,
      [bucketMinutes]
    );
    const metrics = metricsResult.rows[0];
    const totalTokensIn = parseInt(metrics.total_tokens_in, 10) || 0;
    const totalTokensOut = parseInt(metrics.total_tokens_out, 10) || 0;
    const totalTokens = totalTokensIn + totalTokensOut;
    const compression = compressionResult.rows[0] ?? {};
    const compressionTokensBefore = parseInt(compression.tokens_before, 10) || 0;
    const compressionTokensAfter = parseInt(compression.tokens_after, 10) || 0;
    const compressionTokensSaved = parseInt(compression.tokens_saved, 10) || 0;
    const referenceInputCostPer1k = parseFloat(process.env['REFERENCE_INPUT_COST_PER_1K'] ?? '0.005');
    const referenceOutputCostPer1k = parseFloat(process.env['REFERENCE_OUTPUT_COST_PER_1K'] ?? '0.015');
    const estimatedApiCost = (totalTokensIn / 1000) * referenceInputCostPer1k + (totalTokensOut / 1000) * referenceOutputCostPer1k;
    const totalCost = parseFloat(metrics.total_cost) || 0;
    return {
      total_requests: parseInt(metrics.total_requests) || 0,
-      total_cost: parseFloat(metrics.total_cost) || 0,
+      total_cost: totalCost,
      estimated_api_cost: estimatedApiCost,
      estimated_api_cost_avoided: Math.max(0, estimatedApiCost - totalCost),
      total_tokens_in: totalTokensIn,
      total_tokens_out: totalTokensOut,
      total_tokens: totalTokens,
      compression_operations: parseInt(compression.operations, 10) || 0,
      compression_tokens_before: compressionTokensBefore,
      compression_tokens_after: compressionTokensAfter,
      compression_tokens_saved: compressionTokensSaved,
      compression_rate: compressionTokensBefore > 0 ? compressionTokensSaved / compressionTokensBefore : 0,
      cache_hit_rate: 0,
      avg_latency: Math.round(parseFloat(metrics.avg_latency) || 0),
      success_rate: parseFloat(metrics.success_rate) || 0,
      avg_confidence: parseFloat(metrics.avg_confidence) || 0,
--- a/packages/gateway/src/modules/response-cache.ts
+++ b/packages/gateway/src/modules/response-cache.ts
@ -0,0 +1,390 @@
 /**
 * Response Cache
 *
 * Two-tier cache:
 *   • Tier 1 (exact)    — sha256 of canonical request → instant lookup, $0 cost
 *   • Tier 2 (semantic) — embedding cosine similarity, served via in-process
 *                          rerank when threshold is met. Implemented in v1 as
 *                          a string-similarity heuristic until pgvector is
 *                          provisioned. The interface is forward-compatible.
 *
 * Cache hits skip the entire LLM pipeline. Each hit increments the saved-cost
 * counter so the dashboard can show real savings in real time.
 */
 import { createHash } from 'crypto';
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 import { embed, vectorToPgLiteral, EMBEDDING_DIMENSION } from './embedding-client.js';
 export interface CacheableRequest {
  caller: string;
  task_type?: string;
  model?: string;
  system?: string;
  input: string;
 }
 export interface CachedResponse {
  id: number;
  cacheKey: string;
  responseJson: Record<string, unknown>;
  costWhenCached: number;
  tokensIn: number;
  tokensOut: number;
  hitCount: number;
  ageSeconds: number;
 }
 /**
 * Compute a stable cache key for a request. Whitespace is collapsed and
 * lowercase used for the hash so functionally identical requests collide.
 */
 export function computeCacheKey(req: CacheableRequest): string {
  const canonical = [
    `caller=${req.caller.trim().toLowerCase()}`,
    `task=${(req.task_type ?? '').trim().toLowerCase()}`,
    `model=${(req.model ?? '').trim().toLowerCase()}`,
    `system=${(req.system ?? '').trim().replace(/\s+/g, ' ').slice(0, 4096)}`,
    `input=${req.input.trim().replace(/\s+/g, ' ').slice(0, 16_384)}`,
  ].join('\n');
  return createHash('sha256').update(canonical).digest('hex');
 }
 /** Look up an exact cache hit. Returns null when no fresh entry exists. */
 export async function getCachedResponse(
  db: Pool,
  cacheKey: string
 ): Promise<CachedResponse | null> {
  try {
    const result = await db.query(
      `
      SELECT id, cache_key, response_json, cost_when_cached, tokens_in, tokens_out,
             hit_count, EXTRACT(EPOCH FROM (NOW() - created_at))::INT AS age_seconds,
             ttl_seconds
      FROM response_cache
      WHERE cache_key = $1
        AND (created_at + (ttl_seconds * INTERVAL '1 second')) > NOW()
      LIMIT 1
      `,
      [cacheKey]
    );
    const row = result.rows[0];
    if (!row) return null;
    return {
      id: Number(row.id),
      cacheKey: row.cache_key,
      responseJson: row.response_json,
      costWhenCached: parseFloat(row.cost_when_cached) || 0,
      tokensIn: parseInt(row.tokens_in, 10) || 0,
      tokensOut: parseInt(row.tokens_out, 10) || 0,
      hitCount: parseInt(row.hit_count, 10) || 0,
      ageSeconds: parseInt(row.age_seconds, 10) || 0,
    };
  } catch (err) {
    logger.warn({ err }, 'response-cache: getCachedResponse failed (table missing?)');
    return null;
  }
 }
 /**
 * Look up a fuzzy/semantic match using pgvector cosine similarity.
 * Returns null when:
 *   • embedding generation fails (Ollama down, model missing)
 *   • no entry crosses the similarity threshold
 *   • the table doesn't yet have the embedding column
 */
 export async function getSemanticCachedResponse(
  db: Pool,
  caller: string,
  taskType: string | undefined,
  inputText: string,
  similarityThreshold: number = 0.92
 ): Promise<(CachedResponse & { similarity: number }) | null> {
  const vec = await embed(inputText);
  if (!vec) return null;
  try {
    const result = await db.query(
      `
      SELECT id, cache_key, response_json, cost_when_cached, tokens_in, tokens_out,
             hit_count, EXTRACT(EPOCH FROM (NOW() - created_at))::INT AS age_seconds,
             1 - (embedding <=> $1::vector) AS similarity
      FROM response_cache
      WHERE caller_id = $2
        AND ($3::TEXT IS NULL OR task_type = $3)
        AND embedding IS NOT NULL
        AND (created_at + (ttl_seconds * INTERVAL '1 second')) > NOW()
      ORDER BY embedding <=> $1::vector ASC
      LIMIT 1
      `,
      [vectorToPgLiteral(vec), caller.trim().toLowerCase(), taskType ?? null]
    );
    const row = result.rows[0];
    if (!row) return null;
    const sim = parseFloat(row.similarity);
    if (isNaN(sim) || sim < similarityThreshold) return null;
    return {
      id: Number(row.id),
      cacheKey: row.cache_key,
      responseJson: row.response_json,
      costWhenCached: parseFloat(row.cost_when_cached) || 0,
      tokensIn: parseInt(row.tokens_in, 10) || 0,
      tokensOut: parseInt(row.tokens_out, 10) || 0,
      hitCount: parseInt(row.hit_count, 10) || 0,
      ageSeconds: parseInt(row.age_seconds, 10) || 0,
      similarity: sim,
    };
  } catch (err) {
    logger.debug({ err }, 'response-cache: getSemanticCachedResponse failed (extension missing?)');
    return null;
  }
 }
 /** Persist a response. Idempotent on conflict — increments TTL window instead. */
 export async function setCachedResponse(
  db: Pool,
  req: CacheableRequest,
  response: Record<string, unknown>,
  meta: { cost: number; tokensIn: number; tokensOut: number; ttlSeconds?: number }
 ): Promise<void> {
  const cacheKey = computeCacheKey(req);
  const ttl = meta.ttlSeconds ?? 86_400;
  // Generate embedding async — fire & forget compatible
  const vec = await embed(req.input);
  const embedLiteral = vec && vec.length === EMBEDDING_DIMENSION ? vectorToPgLiteral(vec) : null;
  try {
    await db.query(
      `
      INSERT INTO response_cache
        (cache_key, caller_id, task_type, model, input_preview,
         response_json, cost_when_cached, tokens_in, tokens_out, ttl_seconds, embedding)
      VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::vector)
      ON CONFLICT (cache_key) DO UPDATE SET
        response_json    = EXCLUDED.response_json,
        cost_when_cached = EXCLUDED.cost_when_cached,
        tokens_in        = EXCLUDED.tokens_in,
        tokens_out       = EXCLUDED.tokens_out,
        ttl_seconds      = EXCLUDED.ttl_seconds,
        embedding        = COALESCE(EXCLUDED.embedding, response_cache.embedding),
        created_at       = NOW()
      `,
      [
        cacheKey,
        req.caller.trim().toLowerCase(),
        req.task_type ?? null,
        req.model ?? null,
        req.input.slice(0, 1024),
        JSON.stringify(response),
        meta.cost,
        meta.tokensIn,
        meta.tokensOut,
        ttl,
        embedLiteral,
      ]
    );
  } catch (err) {
    // Retry without embedding column when the extension hasn't migrated yet
    logger.debug({ err }, 'response-cache: setCachedResponse with embedding failed, retrying without');
    try {
      await db.query(
        `
        INSERT INTO response_cache
          (cache_key, caller_id, task_type, model, input_preview,
           response_json, cost_when_cached, tokens_in, tokens_out, ttl_seconds)
        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
        ON CONFLICT (cache_key) DO UPDATE SET
          response_json    = EXCLUDED.response_json,
          cost_when_cached = EXCLUDED.cost_when_cached,
          tokens_in        = EXCLUDED.tokens_in,
          tokens_out       = EXCLUDED.tokens_out,
          ttl_seconds      = EXCLUDED.ttl_seconds,
          created_at       = NOW()
        `,
        [
          cacheKey,
          req.caller.trim().toLowerCase(),
          req.task_type ?? null,
          req.model ?? null,
          req.input.slice(0, 1024),
          JSON.stringify(response),
          meta.cost,
          meta.tokensIn,
          meta.tokensOut,
          ttl,
        ]
      );
    } catch (err2) {
      logger.warn({ err: err2 }, 'response-cache: setCachedResponse failed');
    }
  }
 }
 /** Record a cache hit (atomic increment). */
 export async function recordCacheHit(db: Pool, cachedId: number): Promise<void> {
  try {
    await db.query(
      `
      UPDATE response_cache
      SET hit_count    = hit_count + 1,
          cost_saved   = cost_saved + cost_when_cached,
          tokens_saved = tokens_saved + tokens_in + tokens_out,
          last_hit_at  = NOW()
      WHERE id = $1
      `,
      [cachedId]
    );
  } catch (err) {
    logger.warn({ err }, 'response-cache: recordCacheHit failed');
  }
 }
 /** Aggregate savings across all cache entries for the dashboard. */
 export async function getCacheSavings(
  db: Pool,
  hoursBack: number = 24
 ): Promise<{
  totalHits: number;
  totalCostSaved: number;
  totalTokensSaved: number;
  uniqueEntries: number;
  topCallers: Array<{ caller: string; hits: number; saved: number }>;
  hitRatePercent: number;
 }> {
  try {
    const [totalRow, callerRows, ratioRow] = await Promise.all([
      db.query(
        `SELECT
            COALESCE(SUM(hit_count), 0)::INT     AS total_hits,
            COALESCE(SUM(cost_saved), 0)::NUMERIC AS total_cost_saved,
            COALESCE(SUM(tokens_saved), 0)::BIGINT AS total_tokens_saved,
            COUNT(*)::INT                         AS unique_entries
         FROM response_cache
         WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)
            OR created_at  > NOW() - MAKE_INTERVAL(hours => $1)`,
        [hoursBack]
      ),
      db.query(
        `SELECT caller_id, SUM(hit_count)::INT AS hits, SUM(cost_saved)::NUMERIC AS saved
         FROM response_cache
         WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)
         GROUP BY caller_id
         ORDER BY hits DESC
         LIMIT 5`,
        [hoursBack]
      ),
      // Cache hit-rate = hits / (hits + new requests in same window)
      db.query(
        `SELECT
            COALESCE((SELECT SUM(hit_count) FROM response_cache
                      WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)), 0)::INT AS hits,
            (SELECT COUNT(*) FROM request_tracking
              WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1))::INT             AS total_requests`,
        [hoursBack]
      ),
    ]);
    const t = totalRow.rows[0];
    const r = ratioRow.rows[0];
    const totalReq = parseInt(r?.total_requests ?? '0', 10);
    const hits = parseInt(t?.total_hits ?? '0', 10);
    const hitRate = totalReq > 0 ? (hits / (totalReq + hits)) * 100 : 0;
    return {
      totalHits: hits,
      totalCostSaved: parseFloat(t?.total_cost_saved ?? '0'),
      totalTokensSaved: parseInt(t?.total_tokens_saved ?? '0', 10),
      uniqueEntries: parseInt(t?.unique_entries ?? '0', 10),
      topCallers: callerRows.rows.map((row: any) => ({
        caller: row.caller_id,
        hits: parseInt(row.hits, 10) || 0,
        saved: parseFloat(row.saved) || 0,
      })),
      hitRatePercent: parseFloat(hitRate.toFixed(2)),
    };
  } catch (err) {
    logger.warn({ err }, 'response-cache: getCacheSavings failed (table missing?)');
    return {
      totalHits: 0,
      totalCostSaved: 0,
      totalTokensSaved: 0,
      uniqueEntries: 0,
      topCallers: [],
      hitRatePercent: 0,
    };
  }
 }
 /** Time-series buckets of cache savings for sparkline visualization. */
 export async function getSavingsTimeSeries(
  db: Pool,
  hoursBack: number = 24,
  bucketMinutes: number = 60
 ): Promise<Array<{ ts: string; costSaved: number; hits: number; tokensSaved: number }>> {
  try {
    const buckets = Math.ceil((hoursBack * 60) / bucketMinutes);
    const result = await db.query(
      `
      WITH gs AS (
        SELECT generate_series(
          DATE_TRUNC('hour', NOW()) - ($1 || ' minutes')::INTERVAL * (s),
          DATE_TRUNC('hour', NOW()),
          ($1 || ' minutes')::INTERVAL
        ) AS bucket_ts
        FROM generate_series(0, $2 - 1) s
      )
      SELECT
        gs.bucket_ts,
        COALESCE(COUNT(rc.id), 0)::INT             AS hits,
        COALESCE(SUM(rc.cost_when_cached), 0)::NUMERIC AS cost_saved,
        COALESCE(SUM(rc.tokens_in + rc.tokens_out), 0)::INT AS tokens_saved
      FROM gs
      LEFT JOIN response_cache rc
        ON DATE_TRUNC('hour', rc.last_hit_at) = gs.bucket_ts
       AND rc.last_hit_at > NOW() - ($1 || ' minutes')::INTERVAL * $2
      GROUP BY gs.bucket_ts
      ORDER BY gs.bucket_ts ASC
      `,
      [bucketMinutes, buckets]
    );
    return result.rows.map((row: any) => ({
      ts: row.bucket_ts.toISOString(),
      costSaved: parseFloat(row.cost_saved) || 0,
      hits: parseInt(row.hits, 10) || 0,
      tokensSaved: parseInt(row.tokens_saved, 10) || 0,
    }));
  } catch (err) {
    logger.warn({ err }, 'response-cache: getSavingsTimeSeries failed');
    return [];
  }
 }
 /** Drop entries older than max-age days. Run from a periodic job. */
 export async function pruneStaleCacheEntries(db: Pool, maxAgeDays: number = 7): Promise<number> {
  try {
    const result = await db.query(
      `DELETE FROM response_cache
       WHERE created_at < NOW() - MAKE_INTERVAL(days => $1)
         AND (last_hit_at IS NULL OR last_hit_at < NOW() - MAKE_INTERVAL(days => $1))`,
      [maxAgeDays]
    );
    return result.rowCount ?? 0;
  } catch (err) {
    logger.warn({ err }, 'response-cache: prune failed');
    return 0;
  }
 }
 /** Manual cache invalidation, e.g. when a caller hits "clear my cache". */
 export async function clearCacheForCaller(db: Pool, callerId: string): Promise<number> {
  try {
    const result = await db.query(
      `DELETE FROM response_cache WHERE caller_id = $1`,
      [callerId.trim().toLowerCase()]
    );
    return result.rowCount ?? 0;
  } catch (err) {
    logger.warn({ err }, 'response-cache: clearCacheForCaller failed');
    return 0;
  }
 }
--- a/packages/gateway/src/modules/savings-calculator.ts
+++ b/packages/gateway/src/modules/savings-calculator.ts
@ -0,0 +1,267 @@
 /**
 * Savings Calculator
 *
 * Comprehensive savings accounting across ALL gateway mechanisms — not just
 * cache hits. Lean-CTX measures file-context compression; we measure five
 * orthogonal sources of value:
 *
 *   1. Response cache (exact + semantic match)
 *   2. Compression pipeline (verbatim_compact, etc.)
 *   3. Subscription-bridge implicit savings (calls via flat-rate Pro plan
 *      vs. what they would have cost via paid API)
 *   4. Model-tier routing (cheaper model used when sufficient)
 *   5. Pool routing (avoided quota-out on a sub by switching to alternate)
 *
 * The dashboard now surfaces all five so the savings counter reflects the
 * gateway's true value rather than only cache hits.
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 // Conservative API pricing snapshot (USD per 1k tokens). Used to compute
 // "what would this have cost via direct API". Update as pricing evolves.
 const API_PRICING = {
  // Anthropic
  'claude-opus-4-1':       { in: 0.015,  out: 0.075 },
  'claude-sonnet-4-1':     { in: 0.003,  out: 0.015 },
  'claude-haiku-3':        { in: 0.00025, out: 0.00125 },
  // OpenAI
  'gpt-5.1-codex':         { in: 0.005,  out: 0.020 },
  'gpt-5.1-codex-mini':    { in: 0.0015, out: 0.006 },
  'gpt-4-turbo':           { in: 0.010,  out: 0.030 },
  'gpt-4':                 { in: 0.030,  out: 0.060 },
  'gpt-3.5-turbo':         { in: 0.0005, out: 0.0015 },
  // Google
  'gemini-1.5-pro':        { in: 0.00125, out: 0.005 },
  'gemini-1.5-flash':      { in: 0.000075, out: 0.0003 },
 } as const;
 /** Models that go through a flat-rate subscription bridge → marginal cost = $0 */
 const SUBSCRIPTION_MODEL_PATTERNS = [
  /^claude-/i,         // Claude Code subscription
  /^gpt-5\.1-codex/i,  // Codex CLI subscription
  /^gpt-(4|3\.5)/i,    // ChatGPT Plus / Copilot subscription
  /^gemini-/i,         // Gemini Advanced
  /^github-copilot/i,  // GitHub Copilot
  /^microsoft.365/i,   // M365 Copilot
 ];
 function lookupApiPrice(model: string): { in: number; out: number } | null {
  const m = model.toLowerCase();
  // Exact match first
  if (m in API_PRICING) return (API_PRICING as any)[m];
  // Fuzzy match (claude-sonnet-4-1-something → claude-sonnet-4-1)
  for (const key of Object.keys(API_PRICING)) {
    if (m.startsWith(key)) return (API_PRICING as any)[key];
  }
  return null;
 }
 function isSubscriptionModel(model: string): boolean {
  return SUBSCRIPTION_MODEL_PATTERNS.some((p) => p.test(model));
 }
 function isLocalModel(model: string): boolean {
  return /^(qwen|llama|mistral|magatama|phi|nomic|gemma)/i.test(model);
 }
 export interface ComprehensiveSavings {
  /** Total saved across all five mechanisms. */
  totalCostSaved: number;
  totalTokensSaved: number;
  /** Per-source breakdown for the dashboard. */
  bySource: {
    cache: { tokens: number; cost: number; hits: number };
    compression: { tokens: number; cost: number; calls: number };
    subscriptionBridge: { tokens: number; cost: number; calls: number };
    localRouting: { tokens: number; cost: number; calls: number };
    raceMode: { tokens: number; cost: number; calls: number };
  };
  /** How much you would have paid for the same volume at API list prices. */
  costWithoutGateway: number;
  /** What you actually paid (real $). */
  costWithGateway: number;
  /** Time window. */
  hoursBack: number;
  /** Inputs that gave us this number. */
  totals: { requests: number; tokensIn: number; tokensOut: number };
 }
 /**
 * Compute comprehensive savings across all mechanisms.
 *
 * Strategy:
 *   For each request, determine where it went and price it both ways:
 *     - "Would-be cost"  = API list price for the model that handled it
 *     - "Actual cost"    = $0 for subscription/local; cost_usd for paid API
 *     - "Saved"          = would-be − actual
 */
 export async function getComprehensiveSavings(
  db: Pool,
  hoursBack: number = 24
 ): Promise<ComprehensiveSavings> {
  const empty: ComprehensiveSavings = {
    totalCostSaved: 0,
    totalTokensSaved: 0,
    bySource: {
      cache: { tokens: 0, cost: 0, hits: 0 },
      compression: { tokens: 0, cost: 0, calls: 0 },
      subscriptionBridge: { tokens: 0, cost: 0, calls: 0 },
      localRouting: { tokens: 0, cost: 0, calls: 0 },
      raceMode: { tokens: 0, cost: 0, calls: 0 },
    },
    costWithoutGateway: 0,
    costWithGateway: 0,
    hoursBack,
    totals: { requests: 0, tokensIn: 0, tokensOut: 0 },
  };
  try {
    // 1) Cache hits
    const cacheRow = await db.query(
      `SELECT
         COALESCE(SUM(hit_count), 0)::INT  AS hits,
         COALESCE(SUM(cost_saved), 0)::NUMERIC AS cost,
         COALESCE(SUM(tokens_saved), 0)::BIGINT AS tokens
       FROM response_cache
       WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)`,
      [hoursBack]
    );
    empty.bySource.cache = {
      hits: parseInt(cacheRow.rows[0]?.hits ?? '0', 10),
      cost: parseFloat(cacheRow.rows[0]?.cost ?? '0'),
      tokens: parseInt(cacheRow.rows[0]?.tokens ?? '0', 10),
    };
    // 2-4) All requests in the window, classified by routing
    const reqRows = await db.query(
      `SELECT model, tokens_in, tokens_out, cost_usd, fallback_used
       FROM request_tracking
       WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
      [hoursBack]
    );
    let totalReq = 0, totalIn = 0, totalOut = 0;
    let withGateway = 0, withoutGateway = 0;
    for (const r of reqRows.rows) {
      const model = String(r.model ?? '');
      const tokensIn = parseInt(r.tokens_in, 10) || 0;
      const tokensOut = parseInt(r.tokens_out, 10) || 0;
      const actualCost = parseFloat(r.cost_usd) || 0;
      totalReq += 1;
      totalIn += tokensIn;
      totalOut += tokensOut;
      withGateway += actualCost;
      // Determine "would-be cost" — what this request would have cost at API
      // list prices for the model that handled it (or its closest paid sibling).
      const apiPrice = lookupApiPrice(model);
      let wouldBeCost = 0;
      if (apiPrice) {
        wouldBeCost = (tokensIn / 1000) * apiPrice.in + (tokensOut / 1000) * apiPrice.out;
      } else if (isLocalModel(model)) {
        // Local model — compare against medium-tier paid API as opportunity cost
        const ref = API_PRICING['gpt-3.5-turbo'];
        wouldBeCost = (tokensIn / 1000) * ref.in + (tokensOut / 1000) * ref.out;
      }
      withoutGateway += wouldBeCost;
      // Bucket the savings into a source
      if (isSubscriptionModel(model)) {
        empty.bySource.subscriptionBridge.calls += 1;
        empty.bySource.subscriptionBridge.tokens += tokensIn + tokensOut;
        empty.bySource.subscriptionBridge.cost += Math.max(0, wouldBeCost - actualCost);
      } else if (isLocalModel(model)) {
        empty.bySource.localRouting.calls += 1;
        empty.bySource.localRouting.tokens += tokensIn + tokensOut;
        empty.bySource.localRouting.cost += Math.max(0, wouldBeCost - actualCost);
      }
    }
    // 5) Compression savings — pull from tokenvault_metrics if available
    try {
      const compRow = await db.query(
        `SELECT
           COUNT(*)::INT AS calls,
           COALESCE(SUM(GREATEST(tokens_before - tokens_after, 0)), 0)::BIGINT AS tokens_saved
         FROM tokenvault_metrics
         WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)
           AND tool_used = 'gateway'`,
        [hoursBack]
      );
      const tokensCompressed = parseInt(compRow.rows[0]?.tokens_saved ?? '0', 10);
      // Conservative pricing: assume average input pricing of $0.001/1k tokens
      const compCost = (tokensCompressed / 1000) * 0.001;
      empty.bySource.compression = {
        calls: parseInt(compRow.rows[0]?.calls ?? '0', 10),
        tokens: tokensCompressed,
        cost: compCost,
      };
    } catch (err) {
      logger.debug({ err }, 'savings: compression aggregation skipped (table missing)');
    }
    // 6) Race mode — picked the faster/cheaper candidate, "saved" the loser cost
    try {
      const raceRow = await db.query(
        `SELECT
           COUNT(DISTINCT call_id)::INT AS races,
           COALESCE(SUM(cost_usd) FILTER (WHERE selected = false), 0)::NUMERIC AS not_picked_cost
         FROM race_mode_results
         WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
        [hoursBack]
      );
      empty.bySource.raceMode = {
        calls: parseInt(raceRow.rows[0]?.races ?? '0', 10),
        cost: parseFloat(raceRow.rows[0]?.not_picked_cost ?? '0'),
        tokens: 0,
      };
    } catch (err) {
      logger.debug({ err }, 'savings: race aggregation skipped (table missing)');
    }
    // 7) MCP tool-call compression — drop-in Lean-CTX replacement
    try {
      const mcpRow = await db.query(
        `SELECT COUNT(*)::INT AS calls,
                COALESCE(SUM(tokens_saved), 0)::BIGINT AS tokens_saved
         FROM mcp_tool_calls
         WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
        [hoursBack]
      );
      const mcpTokens = parseInt(mcpRow.rows[0]?.tokens_saved ?? '0', 10);
      const mcpCalls = parseInt(mcpRow.rows[0]?.calls ?? '0', 10);
      // Tool-call savings cost-equivalence: Sonnet-equivalent pricing
      // ($3/MTok input, $15/MTok output, weighted 60/40 in/out for tool returns).
      // → ~$0.0046 per 1k tokens averaged. Matches Lean-CTX dashboard scale.
      const mcpCost = (mcpTokens / 1_000_000) * (3.0 * 0.6 + 15.0 * 0.4);
      // Add to the comprehensive picture as a new source bucket via compression entry
      empty.bySource.compression.tokens += mcpTokens;
      empty.bySource.compression.cost += mcpCost;
      empty.bySource.compression.calls += mcpCalls;
    } catch (err) {
      logger.debug({ err }, 'savings: mcp tool aggregation skipped (table missing)');
    }
    empty.totalCostSaved =
      empty.bySource.cache.cost +
      empty.bySource.compression.cost +
      empty.bySource.subscriptionBridge.cost +
      empty.bySource.localRouting.cost +
      empty.bySource.raceMode.cost;
    empty.totalTokensSaved =
      empty.bySource.cache.tokens +
      empty.bySource.compression.tokens;
    empty.costWithoutGateway = withoutGateway;
    empty.costWithGateway = withGateway;
    empty.totals = { requests: totalReq, tokensIn: totalIn, tokensOut: totalOut };
  } catch (err) {
    logger.warn({ err }, 'savings-calculator: comprehensive computation failed');
  }
  return empty;
 }
--- a/packages/gateway/src/modules/settings-store.ts
+++ b/packages/gateway/src/modules/settings-store.ts
@ -0,0 +1,214 @@
 /**
 * Settings Store
 *
 * Persists user configuration (which subscriptions they have, which API
 * providers they use, etc.) to a JSON file on disk. Sensitive fields like
 * API keys are stored verbatim but never returned in plaintext from
 * `getPublicSettings()` — only a `hasKey: true/false` flag is exposed.
 */
 import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
 import { dirname, join } from 'path';
 import { z } from 'zod';
 import { logger } from '../observability/logger.js';
 const SettingsSchema = z.object({
  /** How the gateway should pick providers: 'auto' uses all, others restrict the pool. */
  routingMode: z.enum(['auto', 'subscription-only', 'api-only', 'local-only']).default('auto'),
  /** Per-subscription configuration keyed by SubscriptionId. */
  subscriptions: z
    .record(
      z.string(),
      z.object({
        enabled: z.boolean().default(true),
        autoSpawn: z.boolean().default(true),
        /**
         * Optional remote bridge URL. When set, the gateway will route to this
         * URL instead of trying to spawn a local bridge. Use this when the CLI
         * subscription lives on a different machine than the gateway.
         */
        bridgeUrl: z.string().url().optional().or(z.literal('')),
        notes: z.string().optional(),
      })
    )
    .default({}),
  /** Per-API-provider configuration keyed by provider name (cerebras, groq, …). */
  apiProviders: z
    .record(
      z.string(),
      z.object({
        enabled: z.boolean().default(false),
        apiKey: z.string().optional(),
        baseUrl: z.string().optional(),
        notes: z.string().optional(),
      })
    )
    .default({}),
  /** Local Ollama configuration. */
  ollama: z
    .object({
      enabled: z.boolean().default(true),
      baseUrl: z.string().default('http://localhost:11434'),
    })
    .default({ enabled: true, baseUrl: 'http://localhost:11434' }),
  /**
   * Simple Mode — for users who only use 1-2 subscriptions.
   * Hides advanced tabs (providers, races, share, report, memory) and
   * filters wallet/subscriptions to only show enabled providers.
   */
  ui: z
    .object({
      simpleMode: z.boolean().default(true),
      hideEmptyProviders: z.boolean().default(true),
      showTooltips: z.boolean().default(true),
    })
    .default({ simpleMode: true, hideEmptyProviders: true, showTooltips: true }),
  /** ISO timestamp of last update. */
  updatedAt: z.string().optional(),
 });
 export type Settings = z.infer<typeof SettingsSchema>;
 export interface PublicSettings extends Omit<Settings, 'apiProviders'> {
  apiProviders: Record<string, { enabled: boolean; hasKey: boolean; baseUrl?: string; notes?: string }>;
 }
 const SETTINGS_PATH =
  process.env['SETTINGS_PATH'] ?? join(process.env['HOME'] ?? '/root', '.llm-gateway', 'settings.json');
 const DEFAULT_SUBSCRIPTIONS: Settings['subscriptions'] = {
  'claude-code': { enabled: true, autoSpawn: true },
  'github-copilot': { enabled: true, autoSpawn: true },
  'chatgpt': { enabled: true, autoSpawn: true },
  'gemini': { enabled: true, autoSpawn: true },
  'codex': { enabled: true, autoSpawn: true },
  'aider': { enabled: true, autoSpawn: true },
 };
 function getDefaults(): Settings {
  return SettingsSchema.parse({
    routingMode: 'auto',
    subscriptions: DEFAULT_SUBSCRIPTIONS,
    ollama: { enabled: true, baseUrl: process.env['OLLAMA_BASE_URL'] ?? 'http://localhost:11434' },
  });
 }
 /**
 * Load settings from disk. Returns defaults when the file does not yet exist
 * or fails to parse.
 */
 export function loadSettings(): Settings {
  try {
    if (!existsSync(SETTINGS_PATH)) {
      return getDefaults();
    }
    const raw = readFileSync(SETTINGS_PATH, 'utf-8');
    const parsed = SettingsSchema.parse(JSON.parse(raw));
    return parsed;
  } catch (err) {
    logger.warn({ err, path: SETTINGS_PATH }, 'Failed to load settings — using defaults');
    return getDefaults();
  }
 }
 /**
 * Persist settings to disk, merging with any existing values to avoid wiping
 * fields the caller didn't include in the patch.
 */
 export function saveSettings(patch: Partial<Settings>): Settings {
  const current = loadSettings();
  const merged: Settings = SettingsSchema.parse({
    ...current,
    ...patch,
    subscriptions: { ...current.subscriptions, ...(patch.subscriptions ?? {}) },
    apiProviders: { ...current.apiProviders, ...(patch.apiProviders ?? {}) },
    ollama: { ...current.ollama, ...(patch.ollama ?? {}) },
    ui: { ...current.ui, ...(patch.ui ?? {}) },
    updatedAt: new Date().toISOString(),
  });
  try {
    mkdirSync(dirname(SETTINGS_PATH), { recursive: true });
    writeFileSync(SETTINGS_PATH, JSON.stringify(merged, null, 2), { mode: 0o600 });
    logger.info({ path: SETTINGS_PATH }, 'Settings saved');
  } catch (err) {
    logger.error({ err, path: SETTINGS_PATH }, 'Failed to persist settings');
    throw err;
  }
  // Mirror to env vars so existing provider lookups pick up changes immediately.
  applySettingsToEnv(merged);
  return merged;
 }
 /**
 * Strip sensitive data (API keys) before sending to the dashboard.
 */
 export function getPublicSettings(): PublicSettings {
  const settings = loadSettings();
  const apiProviders: PublicSettings['apiProviders'] = {};
  for (const [name, cfg] of Object.entries(settings.apiProviders)) {
    apiProviders[name] = {
      enabled: cfg.enabled,
      hasKey: !!cfg.apiKey,
      baseUrl: cfg.baseUrl,
      notes: cfg.notes,
    };
  }
  return {
    routingMode: settings.routingMode,
    subscriptions: settings.subscriptions,
    apiProviders,
    ollama: settings.ollama,
    ui: settings.ui,
    updatedAt: settings.updatedAt,
  };
 }
 /**
 * Apply settings to process.env so that the existing external-providers.ts
 * code transparently picks up user-configured API keys without changes.
 */
 export function applySettingsToEnv(settings: Settings = loadSettings()): void {
  const apiEnvMap: Record<string, string> = {
    cerebras: 'CEREBRAS_API_KEY',
    groq: 'GROQ_API_KEY',
    mistral: 'MISTRAL_API_KEY',
    nvidia: 'NVIDIA_API_KEY',
    cloudflare: 'CLOUDFLARE_AI_TOKEN',
    'openai-codex': 'OPENAI_API_KEY',
  };
  for (const [name, cfg] of Object.entries(settings.apiProviders)) {
    const envKey = apiEnvMap[name];
    if (envKey && cfg.enabled && cfg.apiKey) {
      process.env[envKey] = cfg.apiKey;
    }
  }
  if (settings.ollama.enabled && settings.ollama.baseUrl) {
    process.env['OLLAMA_BASE_URL'] = settings.ollama.baseUrl;
  }
  // Map subscription IDs to the env var the existing provider lookup uses
  const subEnvMap: Record<string, string> = {
    'claude-code': 'CLAUDE_BRIDGE_URL',
    'github-copilot': 'COPILOT_BRIDGE_URL',
    'microsoft-365-copilot': 'M365_COPILOT_BRIDGE_URL',
    'chatgpt': 'CHATGPT_BRIDGE_URL',
    'gemini': 'GEMINI_BRIDGE_URL',
    'codex': 'CODEX_BRIDGE_URL',
    'aider': 'AIDER_BRIDGE_URL',
  };
  for (const [id, cfg] of Object.entries(settings.subscriptions)) {
    const envKey = subEnvMap[id];
    if (envKey && cfg.enabled && cfg.bridgeUrl) {
      process.env[envKey] = cfg.bridgeUrl;
    }
  }
 }
 export const SettingsPatchSchema = SettingsSchema.partial().extend({
  subscriptions: SettingsSchema.shape.subscriptions.optional(),
  apiProviders: SettingsSchema.shape.apiProviders.optional(),
  ollama: SettingsSchema.shape.ollama.optional(),
  ui: SettingsSchema.shape.ui.optional(),
 });
--- a/packages/gateway/src/modules/share-card.ts
+++ b/packages/gateway/src/modules/share-card.ts
@ -0,0 +1,174 @@
 /**
 * Public Share Card Generator
 *
 * Renders a shareable SVG image showing your gateway savings — useful for
 * social posts, blog headers, README badges. Tokens are rounded; no
 * personally identifying information leaks (caller IDs, model names etc.
 * are NOT included). Just headline numbers + brand.
 *
 * Output is always a valid SVG so it can be embedded as `<img src="...">`
 * or downloaded directly.
 */
 import type { Pool } from 'pg';
 import { getComprehensiveSavings } from './savings-calculator.js';
 import { getBuddyState } from './gamification.js';
 function fmtNum(n: number): string {
  if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
  if (n >= 1_000) return (n / 1_000).toFixed(1) + 'K';
  return Math.round(n).toString();
 }
 function fmtCost(c: number): string {
  if (c < 0.01) return `$${c.toFixed(6)}`;
  if (c < 1) return `$${c.toFixed(4)}`;
  return `$${c.toFixed(2)}`;
 }
 function escSvg(s: string): string {
  return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
 }
 export type ShareCardPeriod = 'day' | 'week' | 'month' | 'all';
 export type ShareCardTheme = 'dark' | 'light';
 const PERIOD_HOURS: Record<ShareCardPeriod, number> = {
  day: 24, week: 168, month: 720, all: 24 * 365 * 5,
 };
 export async function generateShareCard(
  db: Pool,
  opts: { period?: ShareCardPeriod; theme?: ShareCardTheme } = {}
 ): Promise<string> {
  const period: ShareCardPeriod = opts.period ?? 'month';
  const theme: ShareCardTheme = opts.theme ?? 'dark';
  const hours = PERIOD_HOURS[period];
  const [savings, buddy] = await Promise.all([
    getComprehensiveSavings(db, hours),
    getBuddyState(db, 'gateway'),
  ]);
  // Theme palette
  const palette = theme === 'dark' ? {
    bg: '#0a0a0a', surface: '#161616', text: '#e8e8e8', dim: '#888888',
    accent: '#d4ff00', accentDim: '#8aa800', border: '#2a2a2a',
  } : {
    bg: '#f4f7fa', surface: '#ffffff', text: '#24313d', dim: '#667684',
    accent: '#0f766e', accentDim: '#8ab9b5', border: '#d6e0e7',
  };
  const periodLabel = period === 'day' ? 'Last 24 hours'
    : period === 'week' ? 'Last 7 days'
    : period === 'month' ? 'Last 30 days'
    : 'All-time';
  const W = 1200, H = 630; // Open Graph standard
  const totalTokens = savings.totalTokensSaved;
  const totalCost = savings.totalCostSaved;
  const reqCount = savings.totals.requests;
  const efficacy = savings.costWithoutGateway > 0
    ? ((savings.costWithoutGateway - savings.costWithGateway) / savings.costWithoutGateway) * 100
    : 0;
  // Source-bar widths
  const total = Math.max(0.0000001, savings.totalCostSaved);
  const wCache = (savings.bySource.cache.cost / total) * 100;
  const wComp  = (savings.bySource.compression.cost / total) * 100;
  const wSub   = (savings.bySource.subscriptionBridge.cost / total) * 100;
  const wLocal = (savings.bySource.localRouting.cost / total) * 100;
  const wRace  = (savings.bySource.raceMode.cost / total) * 100;
  return `<svg xmlns="http://www.w3.org/2000/svg" width="${W}" height="${H}" viewBox="0 0 ${W} ${H}">
  <defs>
    <linearGradient id="bgGrad" x1="0" y1="0" x2="1" y2="1">
      <stop offset="0%"  stop-color="${palette.bg}"/>
      <stop offset="100%" stop-color="${palette.surface}"/>
    </linearGradient>
    <radialGradient id="glow" cx="20%" cy="0%" r="80%">
      <stop offset="0%"  stop-color="${palette.accent}" stop-opacity="0.20"/>
      <stop offset="60%" stop-color="${palette.accent}" stop-opacity="0.04"/>
      <stop offset="100%" stop-color="${palette.bg}"     stop-opacity="0"/>
    </radialGradient>
    <style>
      .mono   { font-family: 'JetBrains Mono', 'SF Mono', monospace; }
      .sans   { font-family: 'Inter', -apple-system, sans-serif; }
      .num    { font-weight: 700; letter-spacing: -0.02em; }
      .label  { letter-spacing: 0.16em; text-transform: uppercase; }
    </style>
  </defs>
  <!-- background -->
  <rect width="${W}" height="${H}" fill="url(#bgGrad)"/>
  <rect width="${W}" height="${H}" fill="url(#glow)"/>
  <rect width="${W}" height="${H}" fill="none" stroke="${palette.border}" stroke-width="2"/>
  <!-- brand mark -->
  <g transform="translate(48 48)">
    <rect x="0" y="0" width="14" height="14" fill="${palette.accent}"/>
    <text x="24" y="12" class="mono" font-size="20" font-weight="700" fill="${palette.text}">llm.gateway</text>
    <text x="180" y="12" class="mono" font-size="13" fill="${palette.dim}">— ${escSvg(periodLabel)}</text>
  </g>
  <!-- top-right: brand tag / version -->
  <g transform="translate(${W - 48} 48)">
    <text x="0" y="12" text-anchor="end" class="mono" font-size="11" fill="${palette.dim}" letter-spacing="0.1em">CONTEXT-X.ORG</text>
  </g>
  <!-- HUGE counter — eyebrow above, big number well below to avoid overlap -->
  <g transform="translate(48 ${H/2 - 110})">
    <text x="0" y="0" class="mono label" font-size="14" fill="${palette.dim}">tokens prevented · ${escSvg(periodLabel.toLowerCase())}</text>
    <text x="0" y="135" class="mono num" font-size="120" fill="${palette.accent}">${fmtNum(totalTokens)}</text>
    <text x="0" y="180" class="mono" font-size="18" fill="${palette.text}">
      <tspan>${fmtCost(totalCost)} saved</tspan>
      <tspan dx="20" fill="${palette.dim}">·</tspan>
      <tspan dx="14">${fmtNum(reqCount)} calls</tspan>
      <tspan dx="20" fill="${palette.dim}">·</tspan>
      <tspan dx="14">${efficacy.toFixed(1)}% efficiency</tspan>
    </text>
  </g>
  <!-- 5-axis breakdown bar -->
  <g transform="translate(48 ${H - 180})">
    <text x="0" y="0" class="mono label" font-size="12" fill="${palette.dim}">savings sources · 5-axis breakdown</text>
    <rect x="0" y="14" width="${W - 96}" height="22" fill="${palette.surface}" stroke="${palette.border}"/>
    ${(() => {
      let x = 0;
      const segs: string[] = [];
      const w = W - 96;
      const pieces = [
        { p: wCache, c: '#d4ff00', label: '⚡' },
        { p: wComp,  c: '#2dd4bf', label: '🗜' },
        { p: wSub,   c: '#60a5fa', label: '🌉' },
        { p: wLocal, c: '#a78bfa', label: '🏠' },
        { p: wRace,  c: '#f97316', label: '🏁' },
      ];
      for (const piece of pieces) {
        const segW = (piece.p / 100) * w;
        if (segW > 0.5) {
          segs.push(`<rect x="${x}" y="14" width="${segW}" height="22" fill="${piece.c}"/>`);
        }
        x += segW;
      }
      return segs.join('');
    })()}
    <g transform="translate(0 60)" class="mono" font-size="11" fill="${palette.dim}">
      <text x="0"   y="0"><tspan fill="#d4ff00">●</tspan> cache</text>
      <text x="120" y="0"><tspan fill="#2dd4bf">●</tspan> compression</text>
      <text x="270" y="0"><tspan fill="#60a5fa">●</tspan> subscription bridges</text>
      <text x="470" y="0"><tspan fill="#a78bfa">●</tspan> local routing</text>
      <text x="600" y="0"><tspan fill="#f97316">●</tspan> race mode</text>
    </g>
  </g>
  <!-- footer / buddy -->
  <g transform="translate(48 ${H - 70})">
    <text x="0" y="0" class="mono" font-size="11" fill="${palette.dim}">
      <tspan fill="${palette.accent}">${escSvg(buddy.species)}</tspan>
      <tspan dx="6">·</tspan>
      <tspan dx="6">Lv.${buddy.level}</tspan>
      <tspan dx="6">·</tspan>
      <tspan dx="6">${buddy.streakDays}d streak</tspan>
      <tspan dx="20" fill="${palette.dim}">— routing AI traffic since ${escSvg(new Date().toISOString().split('T')[0])}</tspan>
    </text>
  </g>
 </svg>`;
 }
--- a/packages/gateway/src/modules/subscription-discovery.ts
+++ b/packages/gateway/src/modules/subscription-discovery.ts
@ -0,0 +1,303 @@
 /**
 * Subscription Discovery
 *
 * Auto-detects locally installed CLI subscriptions (Claude Code, GitHub Copilot,
 * ChatGPT, Gemini, etc.) and reports their authentication status. The discovery
 * results drive automatic bridge spawning and dynamic provider registration.
 */
 import { execFile } from 'child_process';
 import { promisify } from 'util';
 import { existsSync } from 'fs';
 import { logger } from '../observability/logger.js';
 const execFileAsync = promisify(execFile);
 export type SubscriptionId =
  | 'claude-code'
  | 'github-copilot'
  | 'microsoft-365-copilot'
  | 'chatgpt'
  | 'gemini'
  | 'codex'
  | 'aider';
 export interface SubscriptionDescriptor {
  id: SubscriptionId;
  /** Friendly display name */
  label: string;
  /** CLI binary required to use the subscription */
  command: string;
  /** Args used for the version probe */
  versionArgs: readonly string[];
  /** Args used for the auth probe (optional) */
  authProbeArgs?: readonly string[];
  /** Default port the bridge listens on */
  bridgePort: number;
  /** ENV var the gateway uses to find the bridge URL */
  bridgeEnvKey: string;
  /** Logical provider name in `external-providers.ts` */
  providerName: string;
  /** Models exposed via this subscription */
  models: ReadonlyArray<{ id: string; tier: 'fast' | 'medium' | 'large' | 'reasoning' }>;
  /** Bridge implementation path (relative to repo root or absolute) */
  bridgeImplementation: 'inline-claude' | 'inline-openai' | 'inline-copilot' | 'external-codex';
 }
 export interface SubscriptionStatus {
  descriptor: SubscriptionDescriptor;
  installed: boolean;
  authenticated: boolean | 'unknown';
  version?: string;
  error?: string;
  bridgeUrl?: string;
  bridgeRunning: boolean;
 }
 /**
 * Catalog of subscriptions the gateway knows how to bootstrap.
 * Adding a new entry here is enough to make it discoverable.
 */
 export const SUBSCRIPTION_CATALOG: readonly SubscriptionDescriptor[] = [
  {
    id: 'claude-code',
    label: 'Claude Code (Anthropic Subscription)',
    command: 'claude',
    versionArgs: ['--version'],
    bridgePort: 3250,
    bridgeEnvKey: 'CLAUDE_BRIDGE_URL',
    providerName: 'claude-bridge',
    bridgeImplementation: 'inline-claude',
    models: [
      { id: 'claude-opus-4-1', tier: 'reasoning' },
      { id: 'claude-sonnet-4-1', tier: 'large' },
      { id: 'claude-haiku-3', tier: 'fast' },
    ],
  },
  {
    id: 'github-copilot',
    label: 'GitHub Copilot Subscription',
    command: 'gh',
    versionArgs: ['copilot', '--version'],
    bridgePort: 3252,
    bridgeEnvKey: 'COPILOT_BRIDGE_URL',
    providerName: 'copilot-bridge',
    bridgeImplementation: 'inline-copilot',
    models: [
      { id: 'gpt-4', tier: 'reasoning' },
      { id: 'gpt-3.5-turbo', tier: 'medium' },
    ],
  },
  {
    id: 'microsoft-365-copilot',
    label: 'Microsoft 365 Copilot Subscription',
    command: 'node',
    versionArgs: ['--version'],
    bridgePort: 3257,
    bridgeEnvKey: 'M365_COPILOT_BRIDGE_URL',
    providerName: 'm365-copilot-bridge',
    bridgeImplementation: 'inline-openai',
    models: [
      { id: 'microsoft-365-copilot', tier: 'reasoning' },
      { id: 'm365-copilot-chat', tier: 'large' },
    ],
  },
  {
    id: 'chatgpt',
    label: 'OpenAI ChatGPT Plus Subscription',
    command: 'chatgpt',
    versionArgs: ['--version'],
    bridgePort: 3251,
    bridgeEnvKey: 'CHATGPT_BRIDGE_URL',
    providerName: 'chatgpt-bridge',
    bridgeImplementation: 'inline-openai',
    models: [
      { id: 'gpt-4-turbo', tier: 'reasoning' },
      { id: 'gpt-4', tier: 'large' },
      { id: 'gpt-3.5-turbo', tier: 'medium' },
    ],
  },
  {
    id: 'gemini',
    label: 'Google Gemini Advanced Subscription',
    command: 'gemini',
    versionArgs: ['--version'],
    bridgePort: 3254,
    bridgeEnvKey: 'GEMINI_BRIDGE_URL',
    providerName: 'gemini-bridge',
    bridgeImplementation: 'inline-openai',
    models: [
      { id: 'gemini-1.5-pro', tier: 'reasoning' },
      { id: 'gemini-1.5-flash', tier: 'fast' },
    ],
  },
  {
    id: 'codex',
    label: 'OpenAI Codex CLI Subscription',
    command: 'codex',
    versionArgs: ['--version'],
    authProbeArgs: ['login', 'status'],
    bridgePort: 3253,
    bridgeEnvKey: 'CODEX_BRIDGE_URL',
    providerName: 'codex-bridge',
    bridgeImplementation: 'external-codex',
    models: [
      { id: 'gpt-5.1-codex', tier: 'reasoning' },
      { id: 'gpt-5.1-codex-mini', tier: 'large' },
      { id: 'codex-mini-latest', tier: 'medium' },
    ],
  },
  {
    id: 'aider',
    label: 'Aider AI Pair Programmer',
    command: 'aider',
    versionArgs: ['--version'],
    bridgePort: 3256,
    bridgeEnvKey: 'AIDER_BRIDGE_URL',
    providerName: 'aider-bridge',
    bridgeImplementation: 'inline-openai',
    models: [
      { id: 'aider-default', tier: 'large' },
    ],
  },
 ];
 /**
 * Probe a CLI's --version with a 3s timeout. Returns null when not installed.
 */
 async function probeVersion(command: string, args: readonly string[]): Promise<string | null> {
  try {
    const { stdout, stderr } = await execFileAsync(command, args as string[], {
      timeout: 3000,
      maxBuffer: 64 * 1024,
    });
    const out = (stdout || stderr || '').trim().split('\n')[0];
    return out || 'installed';
  } catch (err: unknown) {
    const code = (err as NodeJS.ErrnoException).code;
    if (code === 'ENOENT') return null;
    // Non-zero exit code but command exists (e.g. auth required) — count as installed
    return 'installed';
  }
 }
 /**
 * Best-effort authentication check. Many CLI tools don't have a clean probe,
 * so we return 'unknown' rather than guessing wrong.
 */
 async function probeAuthenticated(desc: SubscriptionDescriptor): Promise<boolean | 'unknown'> {
  // Claude Code stores credentials in ~/.claude/.credentials.json
  if (desc.id === 'claude-code') {
    const home = process.env.HOME || '/root';
    return existsSync(`${home}/.claude/.credentials.json`);
  }
  // GitHub Copilot uses gh auth status
  if (desc.id === 'github-copilot') {
    try {
      await execFileAsync('gh', ['auth', 'status'], { timeout: 3000 });
      return true;
    } catch {
      return false;
    }
  }
  if (desc.id === 'microsoft-365-copilot') {
    return Boolean(
      process.env['MICROSOFT_GRAPH_ACCESS_TOKEN'] ||
      process.env['M365_COPILOT_ACCESS_TOKEN'] ||
      process.env['MICROSOFT_CLIENT_ID']
    );
  }
  if (desc.id === 'codex') {
    try {
      await execFileAsync('codex', ['login', 'status'], { timeout: 3000 });
      return true;
    } catch {
      return false;
    }
  }
  return 'unknown';
 }
 /**
 * Check whether a bridge URL is reachable.
 */
 async function probeBridge(url: string | undefined): Promise<boolean> {
  if (!url) return false;
  try {
    const controller = new AbortController();
    const timeoutId = setTimeout(() => controller.abort(), 1500);
    try {
      await fetch(`${url.replace(/\/$/, '')}/health`, { signal: controller.signal });
      return true;
    } finally {
      clearTimeout(timeoutId);
    }
  } catch {
    return false;
  }
 }
 /**
 * Resolve the bridge URL for a subscription:
 *   1. Explicit env var (CLAUDE_BRIDGE_URL etc.) — set by Settings or PM2 ecosystem
 *   2. Auto-detect: probe http://127.0.0.1:{bridgePort} for a /health endpoint
 *
 * This means a bridge running locally on its default port is picked up
 * automatically without any configuration.
 */
 async function resolveBridgeUrl(desc: SubscriptionDescriptor): Promise<{ url?: string; running: boolean }> {
  const explicit = process.env[desc.bridgeEnvKey];
  if (explicit) {
    const running = await probeBridge(explicit);
    return { url: explicit, running };
  }
  // Auto-detect on the default port
  const localUrl = `http://127.0.0.1:${desc.bridgePort}`;
  const running = await probeBridge(localUrl);
  return running ? { url: localUrl, running: true } : { running: false };
 }
 /**
 * Discover all subscriptions the gateway knows about. Probes the CLI binary,
 * authentication state, and any pre-configured bridge URL in the environment.
 */
 export async function discoverSubscriptions(): Promise<SubscriptionStatus[]> {
  const results = await Promise.all(
    SUBSCRIPTION_CATALOG.map(async (desc): Promise<SubscriptionStatus> => {
      // Always probe the bridge first — a running bridge is enough to count
      // as "available" even if the CLI isn't installed on this host (the
      // bridge could live on the user's machine).
      const bridge = await resolveBridgeUrl(desc);
      const version = await probeVersion(desc.command, desc.versionArgs);
      if (!version) {
        return {
          descriptor: desc,
          installed: bridge.running, // remote bridge counts as installed
          authenticated: bridge.running ? 'unknown' : false,
          bridgeUrl: bridge.url,
          bridgeRunning: bridge.running,
        };
      }
      const authenticated = await probeAuthenticated(desc);
      return {
        descriptor: desc,
        installed: true,
        authenticated,
        version,
        bridgeUrl: bridge.url,
        bridgeRunning: bridge.running,
      };
    })
  );
  logger.info(
    {
      detected: results.filter((r) => r.installed).length,
      bridgesLive: results.filter((r) => r.bridgeRunning).length,
      total: results.length,
    },
    'Subscription discovery completed'
  );
  return results;
 }
--- a/packages/gateway/src/modules/subscription-wallet.ts
+++ b/packages/gateway/src/modules/subscription-wallet.ts
@ -0,0 +1,271 @@
 /**
 * Subscription Pool Wallet
 *
 * Tracks usage of each CLI subscription against its known quota window
 * (Claude Plus = 80 msg / 3h, ChatGPT Plus = 80 msg / 3h, Copilot = …).
 * Used by the dashboard to show which subscription has the most headroom
 * and (future) by the router to load-balance across subscriptions.
 *
 * This is the feature competitors don't have: combining MULTIPLE personal
 * AI subscriptions into a single managed pool.
 */
 import type { Pool } from 'pg';
 import { logger } from '../observability/logger.js';
 export interface QuotaProfile {
  subscriptionId: string;
  label: string;
  /** Hard request quota inside the window. Null = unknown / unlimited. */
  requestQuota: number | null;
  /** Window length in seconds (Anthropic uses 3h = 10800s, OpenAI varies). */
  windowSeconds: number;
  /** Reset behaviour: 'rolling' = sliding window, 'fixed' = clock-aligned reset. */
  reset: 'rolling' | 'fixed';
 }
 /**
 * Known subscription quota profiles. Numbers are conservative defaults —
 * users can override via Settings if their plan differs.
 */
 export const QUOTA_PROFILES: Record<string, QuotaProfile> = {
  'claude-code':           { subscriptionId: 'claude-code',           label: 'Claude Code (Pro)',         requestQuota: 45,   windowSeconds: 5 * 3600,  reset: 'rolling' },
  'github-copilot':        { subscriptionId: 'github-copilot',        label: 'GitHub Copilot',            requestQuota: null, windowSeconds: 30 * 86400, reset: 'fixed' },
  'microsoft-365-copilot': { subscriptionId: 'microsoft-365-copilot', label: 'M365 Copilot',              requestQuota: null, windowSeconds: 30 * 86400, reset: 'fixed' },
  'chatgpt':               { subscriptionId: 'chatgpt',               label: 'ChatGPT Plus',              requestQuota: 80,   windowSeconds: 3 * 3600,  reset: 'rolling' },
  'gemini':                { subscriptionId: 'gemini',                label: 'Gemini Advanced',           requestQuota: null, windowSeconds: 30 * 86400, reset: 'fixed' },
  'codex':                 { subscriptionId: 'codex',                 label: 'OpenAI Codex',              requestQuota: 150,  windowSeconds: 5 * 3600,  reset: 'rolling' },
  'aider':                 { subscriptionId: 'aider',                 label: 'Aider',                     requestQuota: null, windowSeconds: 86400,     reset: 'fixed' },
 };
 /** Record a request against a subscription quota window. */
 export async function recordSubscriptionUsage(
  db: Pool,
  subscriptionId: string,
  tokensConsumed: number = 0
 ): Promise<void> {
  const profile = QUOTA_PROFILES[subscriptionId];
  if (!profile) return;
  // Compute the window-start timestamp this request belongs to.
  const now = new Date();
  let windowStart: Date;
  if (profile.reset === 'rolling') {
    // Floor to the most recent quarter-hour for grouping; rolling logic
    // applied at read-time by summing the last `windowSeconds`.
    const rounded = Math.floor(now.getTime() / 900_000) * 900_000;
    windowStart = new Date(rounded);
  } else {
    // Fixed reset — bucket into day windows
    const day = new Date(now);
    day.setUTCHours(0, 0, 0, 0);
    windowStart = day;
  }
  try {
    await db.query(
      `
      INSERT INTO subscription_quota_window
        (subscription_id, window_start, window_seconds, request_count, tokens_consumed, quota_limit, reset_at)
      VALUES ($1, $2, $3, 1, $4, $5, $6)
      ON CONFLICT (subscription_id, window_start)
      DO UPDATE SET
        request_count   = subscription_quota_window.request_count + 1,
        tokens_consumed = subscription_quota_window.tokens_consumed + EXCLUDED.tokens_consumed
      `,
      [
        subscriptionId,
        windowStart,
        profile.windowSeconds,
        tokensConsumed,
        profile.requestQuota,
        new Date(windowStart.getTime() + profile.windowSeconds * 1000),
      ]
    );
  } catch (err) {
    logger.warn({ err, subscriptionId }, 'subscription-wallet: usage record failed');
  }
 }
 export interface WalletEntry {
  subscriptionId: string;
  label: string;
  requestQuota: number | null;
  used: number;
  remaining: number | null;
  utilizationPercent: number | null;
  windowSeconds: number;
  resetAt: string | null;
  /** Predicted exhaustion timestamp based on current rate; null if no quota or no usage. */
  predictedExhaustionAt: string | null;
  recommendation: 'use-this' | 'available' | 'near-limit' | 'exhausted' | 'unknown';
 }
 /** Build the wallet snapshot for the dashboard. */
 export async function getSubscriptionWallet(db: Pool): Promise<WalletEntry[]> {
  const entries: WalletEntry[] = [];
  for (const profile of Object.values(QUOTA_PROFILES)) {
    let used = 0;
    let resetAt: string | null = null;
    let predictedExhaustionAt: string | null = null;
    try {
      const result = await db.query(
        `
        SELECT
          COALESCE(SUM(request_count), 0)::INT AS used,
          MAX(reset_at) AS reset_at
        FROM subscription_quota_window
        WHERE subscription_id = $1
          AND window_start > NOW() - MAKE_INTERVAL(secs => $2)
        `,
        [profile.subscriptionId, profile.windowSeconds]
      );
      used = parseInt(result.rows[0]?.used ?? '0', 10);
      resetAt = result.rows[0]?.reset_at ? new Date(result.rows[0].reset_at).toISOString() : null;
    } catch (err) {
      logger.warn({ err, sub: profile.subscriptionId }, 'wallet: read failed');
    }
    const remaining = profile.requestQuota !== null ? Math.max(profile.requestQuota - used, 0) : null;
    const utilizationPercent = profile.requestQuota
      ? Math.min(100, (used / profile.requestQuota) * 100)
      : null;
    // Linear extrapolation for predicted exhaustion.
    if (remaining !== null && used > 0 && profile.requestQuota) {
      const ratePerSecond = used / profile.windowSeconds;
      if (ratePerSecond > 0) {
        const secondsRemaining = remaining / ratePerSecond;
        predictedExhaustionAt = new Date(Date.now() + secondsRemaining * 1000).toISOString();
      }
    }
    let recommendation: WalletEntry['recommendation'] = 'unknown';
    if (utilizationPercent !== null) {
      if (utilizationPercent >= 100) recommendation = 'exhausted';
      else if (utilizationPercent >= 80) recommendation = 'near-limit';
      else if (utilizationPercent <= 30) recommendation = 'use-this';
      else recommendation = 'available';
    }
    entries.push({
      subscriptionId: profile.subscriptionId,
      label: profile.label,
      requestQuota: profile.requestQuota,
      used,
      remaining,
      utilizationPercent: utilizationPercent !== null ? Math.round(utilizationPercent * 10) / 10 : null,
      windowSeconds: profile.windowSeconds,
      resetAt,
      predictedExhaustionAt,
      recommendation,
    });
  }
  return entries;
 }
 /**
 * Map an Ollama / external model id to the subscription it belongs to,
 * if any. Returns null for non-subscription models (free APIs, local Ollama).
 */
 export function modelToSubscriptionId(model: string): string | null {
  const m = model.toLowerCase();
  if (m.startsWith('claude-') || m.includes('claude')) return 'claude-code';
  if (m.startsWith('gpt-5.1-codex') || m === 'codex-mini-latest') return 'codex';
  if (m.startsWith('gpt-')) return 'chatgpt';
  if (m.startsWith('gemini-')) return 'gemini';
  if (m.startsWith('github-copilot') || m === 'copilot-chat') return 'github-copilot';
  if (m === 'microsoft-365-copilot' || m === 'm365-copilot-chat') return 'microsoft-365-copilot';
  return null;
 }
 /**
 * Post-process a routing decision against the subscription wallet.
 *
 * If the picked model belongs to a subscription that is `exhausted` or
 * `near-limit` (>=80% utilization), we look at the same-tier siblings in
 * the fallback chain and re-pick the one with the most headroom.
 *
 * This is the Pool-Routing feature: distribute load across YOUR subscriptions
 * to maximize their value rather than always routing to the primary.
 */
 export async function applyPoolRouting(
  db: Pool,
  decision: { model: string; fallback_chain: string[]; tier: string },
  options: { forced?: boolean } = {}
 ): Promise<{ model: string; fallback_chain: string[]; reason: string } | null> {
  const wallet = await getSubscriptionWallet(db);
  const utilByModel = (model: string): number | null => {
    const sub = modelToSubscriptionId(model);
    if (!sub) return null;
    const w = wallet.find((entry) => entry.subscriptionId === sub);
    return w?.utilizationPercent ?? null;
  };
  const isExhausted = (model: string): boolean => {
    const sub = modelToSubscriptionId(model);
    if (!sub) return false;
    const w = wallet.find((entry) => entry.subscriptionId === sub);
    return w?.recommendation === 'exhausted';
  };
  const primaryUtil = utilByModel(decision.model);
  const primarySub = modelToSubscriptionId(decision.model);
  // No re-routing for non-subscription models or when primary has plenty of headroom
  if (!primarySub) return null;
  if (!options.forced && primaryUtil !== null && primaryUtil < 80 && !isExhausted(decision.model)) return null;
  // Find a sibling in the fallback chain with lower utilization
  const candidates = decision.fallback_chain.filter((m) => m !== decision.model);
  let bestModel = decision.model;
  let bestUtil = primaryUtil ?? 100;
  for (const candidate of candidates) {
    if (isExhausted(candidate)) continue;
    const util = utilByModel(candidate);
    if (util === null) continue; // unknown utilization — don't pick blindly over a known one
    if (util < bestUtil) {
      bestUtil = util;
      bestModel = candidate;
    }
  }
  if (bestModel === decision.model) return null;
  // Move chosen model to front of chain
  const newChain = [bestModel, ...decision.fallback_chain.filter((m) => m !== bestModel)];
  return {
    model: bestModel,
    fallback_chain: newChain,
    reason: `pool-route: primary ${decision.model} at ${primaryUtil?.toFixed(0) ?? '?'}% util, switched to ${bestModel} at ${bestUtil.toFixed(0)}%`,
  };
 }
 /** Pick the subscription with the most headroom for a given tier. */
 export async function pickBestSubscription(
  db: Pool,
  candidates: readonly string[]
 ): Promise<{ subscriptionId: string; reason: string } | null> {
  const wallet = await getSubscriptionWallet(db);
  const eligible = wallet.filter(
    (w) => candidates.includes(w.subscriptionId) && w.recommendation !== 'exhausted'
  );
  if (eligible.length === 0) return null;
  // Sort: lowest utilization first (most headroom). Unknown utilisation
  // sorts to the middle so paid quotas with usage data win over unknowns.
  eligible.sort((a, b) => {
    const ua = a.utilizationPercent ?? 50;
    const ub = b.utilizationPercent ?? 50;
    return ua - ub;
  });
  const winner = eligible[0];
  return {
    subscriptionId: winner.subscriptionId,
    reason: winner.utilizationPercent !== null
      ? `${winner.utilizationPercent.toFixed(0)}% used in window`
      : 'no quota tracking',
  };
 }
--- a/packages/gateway/src/pipeline/external-providers.ts
+++ b/packages/gateway/src/pipeline/external-providers.ts
@ -86,6 +86,17 @@ const PROVIDERS: readonly ExternalProvider[] = [
      { id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 4096 },
    ],
  },
  {
    name: 'm365-copilot-bridge',
    baseUrl: '', // constructed from M365_COPILOT_BRIDGE_URL env var
    envKey: 'M365_COPILOT_BRIDGE_URL',
    rateLimitRpm: 60,
    enabled: true,
    models: [
      { id: 'microsoft-365-copilot', tier: 'reasoning', contextLength: 128000 },
      { id: 'm365-copilot-chat', tier: 'large', contextLength: 128000 },
    ],
  },
  {
    name: 'cerebras',
    baseUrl: 'https://api.cerebras.ai/v1',
@ -146,12 +157,13 @@ const PROVIDERS: readonly ExternalProvider[] = [
  {
    name: 'openai-codex',
    baseUrl: 'https://api.openai.com/v1',
-    envKey: 'OPENAI_API_KEY',
+    envKey: 'OPENAI_CODEX_URL',
    rateLimitRpm: 60,
    enabled: true,
    models: [
-      { id: 'gpt-4-turbo', tier: 'reasoning', contextLength: 128000 },
+      { id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
-      { id: 'gpt-3.5-turbo', tier: 'fast', contextLength: 16384 },
+      { id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
      { id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
    ],
  },
  {
@ -169,16 +181,28 @@ const PROVIDERS: readonly ExternalProvider[] = [
  {
    name: 'codex',
    baseUrl: 'https://api.github.com/copilot_inner/v2',
-    envKey: 'GITHUB_CODEX_TOKEN',
+    envKey: 'CODEX_BRIDGE_URL',
    rateLimitRpm: 60,
    enabled: true,
    models: [
-      { id: 'github-copilot-x', tier: 'large', contextLength: 8192 },
+      { id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
-      { id: 'code-davinci-002', tier: 'medium', contextLength: 4096 },
+      { id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
      { id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
    ],
  },
 ];
 const AUTHLESS_BRIDGE_PROVIDERS = new Set([
  'claude-bridge',
  'claude-code',
  'openai-bridge',
  'chatgpt-bridge',
  'copilot-bridge',
  'm365-copilot-bridge',
 ]);
 const GENERATE_BRIDGE_PROVIDERS = new Set(['claude-bridge', 'claude-code']);
 // ─── Rate Limiter (simple sliding window) ───────────────────────────
 const requestTimestamps: Map<string, number[]> = new Map();
@ -213,25 +237,34 @@ function getApiKey(provider: ExternalProvider): string | undefined {
    return url ? 'claude-code-enabled' : undefined;
  }
  if (provider.name === 'openai-bridge') {
-    // openai-bridge uses OPENAI_API_KEY for auth, but also needs bridge URL
+    // Subscription bridge auth is handled by the bridge process/CLI session.
    const apiKey = process.env['OPENAI_API_KEY'];
    const url = process.env['OPENAI_BRIDGE_URL'];
-    return apiKey && url ? apiKey : undefined;
+    return url ? 'openai-bridge-enabled' : undefined;
  }
  if (provider.name === 'chatgpt-bridge') {
-    // chatgpt-bridge can use same URL as openai-bridge (same service), but needs API key
+    // ChatGPT Plus bridge can reuse the OpenAI bridge when configured that way.
    const apiKey = process.env['OPENAI_API_KEY'];
    const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'];
-    return apiKey && url ? apiKey : undefined;
+    return url ? 'chatgpt-bridge-enabled' : undefined;
  }
  if (provider.name === 'copilot-bridge') {
-    // copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api)
+    // copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api).
    // Just needs URL to be configured
    const url = process.env['COPILOT_BRIDGE_URL'];
    return url ? 'copilot-authenticated' : undefined;
  }
  if (provider.name === 'm365-copilot-bridge') {
    // Microsoft 365 Copilot uses Microsoft Graph delegated auth inside the bridge.
    const url = process.env['M365_COPILOT_BRIDGE_URL'];
    return url ? 'm365-copilot-bridge-enabled' : undefined;
  }
  if (provider.name === 'openai-codex') {
    const bridgeUrl = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
    if (bridgeUrl) return 'openai-codex-bridge-enabled';
    return process.env['OPENAI_API_KEY'] || undefined;
  }
  if (provider.name === 'codex') {
-    // codex uses GitHub Codex API token
+    // Codex can run through an authless local/subscription bridge. A token remains supported as fallback.
    const bridgeUrl = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
    if (bridgeUrl) return 'codex-bridge-enabled';
    const token = process.env['GITHUB_CODEX_TOKEN'];
    return token ? token : undefined;
  }
@ -241,11 +274,11 @@ function getApiKey(provider: ExternalProvider): string | undefined {
 function getBaseUrl(provider: ExternalProvider): string {
  if (provider.name === 'claude-bridge') {
    const url = process.env['CLAUDE_BRIDGE_URL'];
-    return url ? `${url}/v1` : '';
+    return url ?? '';
  }
  if (provider.name === 'claude-code') {
    const url = process.env['CLAUDE_CODE_URL'];
-    return url ? `${url}/v1` : '';
+    return url ?? '';
  }
  if (provider.name === 'openai-bridge') {
    const url = process.env['OPENAI_BRIDGE_URL'];
@ -257,7 +290,19 @@ function getBaseUrl(provider: ExternalProvider): string {
  }
  if (provider.name === 'copilot-bridge') {
    const url = process.env['COPILOT_BRIDGE_URL'];
-    return url ? `${url}` : '';
+    return url ? `${url}/v1` : '';
  }
  if (provider.name === 'm365-copilot-bridge') {
    const url = process.env['M365_COPILOT_BRIDGE_URL'];
    return url ? `${url}/v1` : '';
  }
  if (provider.name === 'openai-codex') {
    const url = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
    return url ? `${url}/v1` : provider.baseUrl;
  }
  if (provider.name === 'codex') {
    const url = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
    return url ? `${url}/v1` : provider.baseUrl;
  }
  if (provider.name === 'cloudflare') {
    const accountId = process.env['CLOUDFLARE_ACCOUNT_ID'];
@ -271,6 +316,11 @@ export function getAvailableProviders(): readonly ExternalProvider[] {
  return PROVIDERS.filter((p) => p.enabled && getApiKey(p));
 }
 /** Returns ALL configured providers (enabled or not, with or without API key). For dashboard listing. */
 export function getAllProviders(): readonly ExternalProvider[] {
  return PROVIDERS;
 }
 function findBestModel(
  provider: ExternalProvider,
  targetTier: 'fast' | 'medium' | 'large' | 'reasoning',
@ -296,7 +346,11 @@ function findBestModel(
 function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> {
  const headers: Record<string, string> = { 'Content-Type': 'application/json' };
-  if (!['claude-bridge', 'claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge'].includes(provider.name)) {
+  const usesAuthlessBridge = AUTHLESS_BRIDGE_PROVIDERS.has(provider.name)
    || (provider.name === 'openai-codex' && !!(process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL']))
    || (provider.name === 'codex' && !!(process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL']));
  if (!usesAuthlessBridge) {
    headers['Authorization'] = `Bearer ${apiKey}`;
  }
  return headers;
@ -311,13 +365,29 @@ function buildRequestPayload(model: ExternalModel, request: ExternalCompletionRe
  };
 }
 function buildGenerateBridgePayload(model: ExternalModel, request: ExternalCompletionRequest): Record<string, unknown> {
  const system = request.messages.find((m) => m.role === 'system')?.content;
  const prompt = request.messages
    .filter((m) => m.role !== 'system')
    .map((m) => `${m.role}: ${m.content}`)
    .join('\n\n');
  return {
    model: model.id,
    prompt,
    system,
    temperature: request.temperature ?? 0.3,
    max_tokens: request.max_tokens ?? 2048,
  };
 }
 function parseExternalResponse(
  data: any,
  model: ExternalModel,
  provider: ExternalProvider,
  start: number,
 ): ExternalCompletionResponse {
-  const content = data.choices?.[0]?.message?.content ?? '';
+  const content = data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? data.message?.content ?? '';
  recordRequest(provider.name);
  return {
    response: content,
@ -341,14 +411,15 @@ async function callProvider(
  const baseUrl = getBaseUrl(provider);
  if (!baseUrl) throw new Error(`No base URL for ${provider.name}`);
-  const url = `${baseUrl}/chat/completions`;
+  const generateBridge = GENERATE_BRIDGE_PROVIDERS.has(provider.name);
  const url = generateBridge ? `${baseUrl}/api/generate` : `${baseUrl}/chat/completions`;
  const controller = new AbortController();
  const timer = setTimeout(() => controller.abort(), timeoutMs);
  const start = Date.now();
  try {
    const headers = buildRequestHeaders(provider, apiKey);
-    const payload = buildRequestPayload(model, request);
+    const payload = generateBridge ? buildGenerateBridgePayload(model, request) : buildRequestPayload(model, request);
    const response = await fetch(url, {
      method: 'POST',
--- a/packages/gateway/src/routes/dashboard.ts
+++ b/packages/gateway/src/routes/dashboard.ts
--- a/packages/gateway/src/routes/static.ts
+++ b/packages/gateway/src/routes/static.ts
@ -11,6 +11,22 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
  logger.info({ publicDir }, 'Static file serving initialized');
  function sendHtml(filename: string, reply: any) {
    const filePath = join(publicDir, filename);
    if (!existsSync(filePath)) {
      logger.warn({ path: filePath }, `${filename} not found`);
      return reply.status(404).send({ error: `${filename} not found` });
    }
    const content = readFileSync(filePath, 'utf-8');
    return reply
      .header('Cache-Control', 'no-cache, no-store, must-revalidate, max-age=0')
      .header('Pragma', 'no-cache')
      .header('Expires', '0')
      .type('text/html')
      .send(content);
  }
  // Serve root path
  fastify.get('/', async (request, reply) => {
    logger.info({ method: request.method, url: request.url, host: request.hostname }, 'Root path requested');
@ -26,13 +42,47 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
  // Serve /dashboard.html
  fastify.get('/dashboard.html', async (_request, reply) => {
-    const dashboardPath = join(publicDir, 'dashboard.html');
+    return sendHtml('dashboard.html', reply);
-    if (!existsSync(dashboardPath)) {
+  });
-      logger.warn({ path: dashboardPath }, 'dashboard.html not found');
+
-      return reply.status(404).send({ error: 'dashboard.html not found' });
+  fastify.get('/dashboard-v2.html', async (_request, reply) => {
-    }
+    return sendHtml('dashboard-v2.html', reply);
-    const content = readFileSync(dashboardPath, 'utf-8');
+  });
-    return reply.type('text/html').send(content);
+
  fastify.get('/v2/dashboard', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/v2/dashboard/', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/v2', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/v2/', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/dashboard/v2', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/dashboard/v2/', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/api/dashboard-v2', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/api/v2/dashboard', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  fastify.get('/api/dashboard/v2', async (_request, reply) => {
    return sendHtml('dashboard-v2.html', reply);
  });
  // Serve /api/dashboard as HTML for compatibility
--- a/packages/gateway/src/server.ts
+++ b/packages/gateway/src/server.ts
@ -101,7 +101,7 @@ async function buildServer() {
  await server.register(fastifyRateLimit, {
    global: true,
-    max: 100,
+    max: 1000,
    timeWindow: '1 minute',
    keyGenerator: (request) => {
      const caller = (request.headers['x-caller-id'] as string) ?? 'default';