feat: restore workbench v1 and publish wired v2

This commit is contained in:
Rene Fichtmueller 2026-05-03 09:53:40 +02:00
parent 060b846d9b
commit 09165b9bf7
23 changed files with 7846 additions and 1055 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1786,8 +1786,8 @@
<script> <script>
const HEALTH_CHECK_INTERVAL = 30000; const HEALTH_CHECK_INTERVAL = 30000;
const METRICS_REFRESH_INTERVAL = 3000; const METRICS_REFRESH_INTERVAL = 15000;
const REQUESTS_REFRESH_INTERVAL = 3000; const REQUESTS_REFRESH_INTERVAL = 15000;
const API_BASE = ''; const API_BASE = '';
let selectedHours = 24; let selectedHours = 24;
let lastMetrics = null; let lastMetrics = null;
@ -3097,7 +3097,7 @@
setupPolling(); setupPolling();
setInterval(checkHealth, HEALTH_CHECK_INTERVAL); setInterval(checkHealth, HEALTH_CHECK_INTERVAL);
setInterval(loadSubscriptions, 30000); setInterval(loadSubscriptions, 30000);
setInterval(loadHero, 15000); // refresh buddy / events / forecast every 15s setInterval(loadHero, 30000); // refresh buddy / events / forecast every 30s
} }
init(); init();

View File

@ -0,0 +1,87 @@
import type { FastifyReply, FastifyRequest } from 'fastify';
import { timingSafeEqual } from 'crypto';
const TOKEN_ENV_KEYS = ['DASHBOARD_AUTH_TOKEN', 'LLM_GATEWAY_ADMIN_TOKEN', 'ADMIN_TOKEN'] as const;
function configuredToken(): string | undefined {
for (const key of TOKEN_ENV_KEYS) {
const value = process.env[key]?.trim();
if (value) return value;
}
return undefined;
}
function safeEqual(left: string, right: string): boolean {
const leftBuffer = Buffer.from(left);
const rightBuffer = Buffer.from(right);
if (leftBuffer.length !== rightBuffer.length) return false;
return timingSafeEqual(leftBuffer, rightBuffer);
}
function tokenFromAuthorizationHeader(header: string | undefined): string | undefined {
if (!header) return undefined;
const [scheme, value] = header.split(/\s+/, 2);
if (!scheme || !value) return undefined;
if (scheme.toLowerCase() === 'bearer') return value.trim();
if (scheme.toLowerCase() === 'basic') {
try {
const decoded = Buffer.from(value, 'base64').toString('utf8');
const separator = decoded.indexOf(':');
return separator >= 0 ? decoded.slice(separator + 1).trim() : decoded.trim();
} catch {
return undefined;
}
}
return undefined;
}
function tokenFromRequest(request: FastifyRequest): string | undefined {
const explicit = request.headers['x-dashboard-token'];
if (typeof explicit === 'string' && explicit.trim()) return explicit.trim();
return tokenFromAuthorizationHeader(request.headers.authorization);
}
export function isDashboardAuthConfigured(): boolean {
return !!configuredToken();
}
function isLocalDevelopmentRequest(request: FastifyRequest): boolean {
if (process.env['NODE_ENV'] === 'production') return false;
const host = request.hostname || request.headers.host || '';
return host.startsWith('127.0.0.1') || host.startsWith('localhost') || host.startsWith('[::1]');
}
export async function requireDashboardAuth(request: FastifyRequest, reply: FastifyReply): Promise<FastifyReply | void> {
if (isLocalDevelopmentRequest(request)) return;
const expected = configuredToken();
if (!expected) {
return reply.status(503).send({
statusCode: 503,
error: 'Dashboard Auth Not Configured',
message: 'Set DASHBOARD_AUTH_TOKEN before exposing dashboard data or settings.',
});
}
const received = tokenFromRequest(request);
if (!received || !safeEqual(received, expected)) {
reply.header('WWW-Authenticate', 'Bearer realm="llm-gateway-dashboard"');
return reply.status(401).send({
statusCode: 401,
error: 'Unauthorized',
message: 'Dashboard token required.',
});
}
}
export function dashboardAuthStatus(request: FastifyRequest): { configured: boolean; authenticated: boolean } {
if (isLocalDevelopmentRequest(request)) return { configured: true, authenticated: true };
const expected = configuredToken();
if (!expected) return { configured: false, authenticated: false };
const received = tokenFromRequest(request);
return { configured: true, authenticated: !!received && safeEqual(received, expected) };
}

View File

@ -0,0 +1,246 @@
/**
* Bridge Spawner
*
* Auto-starts inline HTTP bridges for detected CLI subscriptions. Each bridge
* exposes a `POST /api/generate` endpoint that the gateway can call as a regular
* external provider. Bridges run in-process to avoid the overhead of spawning
* separate Node processes they listen on a dedicated port per subscription.
*/
import { execFile } from 'child_process';
import { createServer, type Server } from 'http';
import { logger } from '../observability/logger.js';
import type { SubscriptionDescriptor, SubscriptionStatus } from './subscription-discovery.js';
interface RunningBridge {
descriptor: SubscriptionDescriptor;
server: Server;
port: number;
url: string;
startedAt: Date;
}
const runningBridges = new Map<string, RunningBridge>();
/**
* Run a CLI tool with stdin-piped prompt, return stdout content.
* Generic implementation that all inline bridges share.
*/
async function runCli(
command: string,
args: readonly string[],
prompt: string,
timeoutMs: number = 300_000
): Promise<{ success: boolean; content?: string; error?: string }> {
return new Promise((resolve) => {
try {
const child = execFile(
command,
args as string[],
{ timeout: timeoutMs, maxBuffer: 10 * 1024 * 1024 },
(err, stdout) => {
if (err) {
resolve({ success: false, error: err.message.slice(0, 500) });
} else {
resolve({ success: true, content: stdout.trim() });
}
}
);
if (child.stdin) {
child.stdin.write(prompt);
child.stdin.end();
}
} catch (err) {
resolve({ success: false, error: err instanceof Error ? err.message : String(err) });
}
});
}
/**
* Build the CLI invocation for a given subscription.
*/
function buildCliInvocation(desc: SubscriptionDescriptor, model?: string): { cmd: string; args: string[] } {
switch (desc.bridgeImplementation) {
case 'inline-claude': {
const args = ['--print', '--output-format', 'text'];
if (model) args.push('--model', model);
return { cmd: 'claude', args };
}
case 'inline-copilot': {
// gh copilot suggest is interactive; we use the OpenAI-compatible copilot-api proxy if available.
return { cmd: 'gh', args: ['copilot', 'suggest', '--shell'] };
}
case 'inline-openai': {
// Generic OpenAI-compatible CLI (chatgpt-cli, gemini-cli with OpenAI compat)
return { cmd: desc.command, args: model ? ['--model', model] : [] };
}
case 'external-codex': {
// codex CLI: read prompt from stdin
return { cmd: 'codex', args: model ? ['--model', model] : [] };
}
}
}
/**
* Spawn an inline HTTP bridge for a subscription. Returns the URL the gateway
* should use to talk to it. Idempotent calling twice returns the same bridge.
*/
export function spawnBridge(desc: SubscriptionDescriptor): Promise<RunningBridge> {
const existing = runningBridges.get(desc.id);
if (existing) {
return Promise.resolve(existing);
}
return new Promise((resolve, reject) => {
const server = createServer(async (req, res) => {
res.setHeader('Content-Type', 'application/json');
res.setHeader('Access-Control-Allow-Origin', '*');
if (req.method === 'GET' && req.url === '/health') {
const current = runningBridges.get(desc.id);
res.writeHead(200);
res.end(
JSON.stringify({
status: 'ok',
subscription: desc.id,
label: desc.label,
command: desc.command,
uptimeSeconds: current ? Math.floor((Date.now() - current.startedAt.getTime()) / 1000) : 0,
})
);
return;
}
if (req.method === 'POST' && (req.url === '/api/generate' || req.url === '/v1/completion')) {
let body = '';
req.on('data', (chunk) => (body += chunk));
req.on('end', async () => {
try {
const { prompt, system, model } = JSON.parse(body || '{}');
if (!prompt) {
res.writeHead(400);
res.end(JSON.stringify({ error: 'prompt required' }));
return;
}
const fullPrompt = system ? `${system}\n\n---\n\n${prompt}` : prompt;
const { cmd, args } = buildCliInvocation(desc, model);
const result = await runCli(cmd, args, fullPrompt);
if (result.success) {
res.writeHead(200);
res.end(
JSON.stringify({
success: true,
content: result.content,
provider: desc.providerName,
model: model ?? desc.models[0]?.id,
})
);
} else {
res.writeHead(502);
res.end(JSON.stringify({ success: false, error: result.error }));
}
} catch (e) {
res.writeHead(500);
res.end(JSON.stringify({ error: e instanceof Error ? e.message : 'parse error' }));
}
});
return;
}
res.writeHead(404);
res.end(JSON.stringify({ error: 'not found' }));
});
server.on('error', (err) => {
// Port in use → assume an existing bridge is already running, treat as success
if ((err as NodeJS.ErrnoException).code === 'EADDRINUSE') {
logger.info(
{ subscription: desc.id, port: desc.bridgePort },
'Port already in use — assuming external bridge is healthy'
);
const url = `http://127.0.0.1:${desc.bridgePort}`;
const fakeBridge: RunningBridge = {
descriptor: desc,
server, // server failed to bind; OK to keep handle
port: desc.bridgePort,
url,
startedAt: new Date(),
};
runningBridges.set(desc.id, fakeBridge);
resolve(fakeBridge);
} else {
reject(err);
}
});
server.listen(desc.bridgePort, '127.0.0.1', () => {
const url = `http://127.0.0.1:${desc.bridgePort}`;
const bridge: RunningBridge = {
descriptor: desc,
server,
port: desc.bridgePort,
url,
startedAt: new Date(),
};
runningBridges.set(desc.id, bridge);
// Set the env var so the existing external-providers logic finds the bridge
process.env[desc.bridgeEnvKey] = url;
logger.info(
{ subscription: desc.id, url, port: desc.bridgePort, envKey: desc.bridgeEnvKey },
'Inline subscription bridge started'
);
resolve(bridge);
});
});
}
/**
* Spawn bridges for every detected, authenticated subscription that doesn't
* already have a bridge URL configured. Returns the list of started bridges.
*/
export async function spawnDetectedBridges(
statuses: readonly SubscriptionStatus[]
): Promise<RunningBridge[]> {
const toSpawn = statuses.filter(
(s) => s.installed && s.authenticated !== false && !s.bridgeRunning
);
const results: RunningBridge[] = [];
for (const status of toSpawn) {
try {
const bridge = await spawnBridge(status.descriptor);
results.push(bridge);
} catch (err) {
logger.warn(
{ err, subscription: status.descriptor.id },
'Failed to spawn subscription bridge — continuing'
);
}
}
return results;
}
/**
* Snapshot of currently running in-process bridges. Used by the dashboard.
*/
export function getRunningBridges(): readonly RunningBridge[] {
return Array.from(runningBridges.values());
}
/**
* Stop all inline bridges (used during graceful shutdown).
*/
export async function stopAllBridges(): Promise<void> {
await Promise.all(
Array.from(runningBridges.values()).map(
(bridge) =>
new Promise<void>((resolve) => {
try {
bridge.server.close(() => resolve());
} catch {
resolve();
}
})
)
);
runningBridges.clear();
}

View File

@ -0,0 +1,180 @@
/**
* Per-Caller Deep Dive
*
* Aggregates everything we know about ONE caller its volume, models used,
* cache effectiveness, cost, latency distribution, recent activity, and
* stored memory facts. Powers the modal that opens when a user clicks on
* a caller chip in the dashboard.
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
export interface CallerDeepDive {
caller: string;
firstSeen: string | null;
lastSeen: string | null;
totalRequests: number;
successRate: number;
totalTokensIn: number;
totalTokensOut: number;
totalCost: number;
avgLatencyMs: number;
/** distribution: p50, p95 */
latencyP50: number;
latencyP95: number;
cacheHits: number;
cacheTokensSaved: number;
topModels: Array<{ model: string; count: number; share: number }>;
topTaskTypes: Array<{ taskType: string; count: number }>;
recentRequests: Array<{
request_id: string;
model: string;
status: string;
tokens_in: number;
tokens_out: number;
latency_ms: number;
cost_usd: number;
created_at: string;
}>;
storedFacts: Array<{ key: string; value: string; confidence: number; source: string }>;
hourlyHeatmap: Array<{ hour: number; count: number }>;
}
export async function getCallerDeepDive(db: Pool, caller: string): Promise<CallerDeepDive | null> {
const c = caller.trim().toLowerCase();
try {
// Headline aggregates
const head = await db.query(`
SELECT
COUNT(*)::INT AS total,
MIN(created_at) AS first_seen,
MAX(created_at) AS last_seen,
SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT(*),0) AS success_rate,
COALESCE(SUM(tokens_in), 0)::BIGINT AS tok_in,
COALESCE(SUM(tokens_out), 0)::BIGINT AS tok_out,
COALESCE(SUM(cost_usd), 0)::NUMERIC AS cost,
COALESCE(AVG(latency_ms), 0)::INT AS avg_lat,
COALESCE(PERCENTILE_DISC(0.50) WITHIN GROUP (ORDER BY latency_ms), 0)::INT AS p50,
COALESCE(PERCENTILE_DISC(0.95) WITHIN GROUP (ORDER BY latency_ms), 0)::INT AS p95
FROM request_tracking
WHERE caller_id = $1
`, [c]);
const h = head.rows[0];
if (!h || parseInt(h.total, 10) === 0) {
return null;
}
const total = parseInt(h.total, 10) || 0;
// Top models by this caller
const models = await db.query(`
SELECT model, COUNT(*)::INT AS cnt
FROM request_tracking
WHERE caller_id = $1
GROUP BY model
ORDER BY cnt DESC
LIMIT 10
`, [c]);
const topModels = models.rows.map((r: any) => ({
model: r.model,
count: parseInt(r.cnt, 10) || 0,
share: total > 0 ? parseFloat(((parseInt(r.cnt, 10) / total) * 100).toFixed(1)) : 0,
}));
// Top task types
const tasks = await db.query(`
SELECT task_type, COUNT(*)::INT AS cnt
FROM request_tracking
WHERE caller_id = $1
GROUP BY task_type
ORDER BY cnt DESC
LIMIT 8
`, [c]);
const topTaskTypes = tasks.rows.map((r: any) => ({
taskType: r.task_type ?? '(unknown)',
count: parseInt(r.cnt, 10) || 0,
}));
// Cache stats for this caller
const cache = await db.query(`
SELECT
COALESCE(SUM(hit_count), 0)::INT AS hits,
COALESCE(SUM(tokens_saved), 0)::BIGINT AS tokens
FROM response_cache
WHERE caller_id = $1
`, [c]);
const cacheHits = parseInt(cache.rows[0]?.hits ?? '0', 10);
const cacheTokens = parseInt(cache.rows[0]?.tokens ?? '0', 10);
// Recent requests (15 latest)
const recent = await db.query(`
SELECT request_id, model, status, tokens_in, tokens_out, latency_ms, cost_usd, created_at
FROM request_tracking
WHERE caller_id = $1
ORDER BY created_at DESC
LIMIT 15
`, [c]);
// Stored facts
let storedFacts: any[] = [];
try {
const facts = await db.query(`
SELECT fact_key, fact_value, confidence, source
FROM caller_knowledge
WHERE caller_id = $1 AND superseded_by IS NULL
AND (valid_until IS NULL OR valid_until > NOW())
ORDER BY confidence DESC
LIMIT 20
`, [c]);
storedFacts = facts.rows.map((r: any) => ({
key: r.fact_key, value: r.fact_value,
confidence: parseFloat(r.confidence), source: r.source ?? '',
}));
} catch {}
// Hourly heatmap (24h)
const hourly = await db.query(`
SELECT EXTRACT(HOUR FROM created_at)::INT AS hr, COUNT(*)::INT AS cnt
FROM request_tracking
WHERE caller_id = $1 AND created_at > NOW() - INTERVAL '7 days'
GROUP BY hr
ORDER BY hr ASC
`, [c]);
const hourlyMap = new Map<number, number>(hourly.rows.map((r: any): [number, number] => [parseInt(r.hr, 10), parseInt(r.cnt, 10)]));
const hourlyHeatmap = Array.from({ length: 24 }, (_, i) => ({ hour: i, count: hourlyMap.get(i) ?? 0 }));
return {
caller: c,
firstSeen: h.first_seen ? new Date(h.first_seen).toISOString() : null,
lastSeen: h.last_seen ? new Date(h.last_seen).toISOString() : null,
totalRequests: total,
successRate: parseFloat(h.success_rate) || 0,
totalTokensIn: parseInt(h.tok_in, 10) || 0,
totalTokensOut: parseInt(h.tok_out, 10) || 0,
totalCost: parseFloat(h.cost) || 0,
avgLatencyMs: parseInt(h.avg_lat, 10) || 0,
latencyP50: parseInt(h.p50, 10) || 0,
latencyP95: parseInt(h.p95, 10) || 0,
cacheHits,
cacheTokensSaved: cacheTokens,
topModels,
topTaskTypes,
recentRequests: recent.rows.map((r: any) => ({
request_id: r.request_id,
model: r.model,
status: r.status,
tokens_in: parseInt(r.tokens_in, 10) || 0,
tokens_out: parseInt(r.tokens_out, 10) || 0,
latency_ms: parseInt(r.latency_ms, 10) || 0,
cost_usd: parseFloat(r.cost_usd) || 0,
created_at: new Date(r.created_at).toISOString(),
})),
storedFacts,
hourlyHeatmap,
};
} catch (err) {
logger.warn({ err, caller: c }, 'caller-stats: deep dive failed');
return null;
}
}

View File

@ -0,0 +1,87 @@
/**
* Embedding Client
*
* Generates vector embeddings via Ollama (`nomic-embed-text`, 768 dim).
* Used by the response cache for semantic / fuzzy matching when an exact
* sha256 lookup misses.
*
* Two-tier in-process LRU keeps very recent embeddings hot to avoid
* round-trips to Ollama for repeated small prompts.
*/
import { logger } from '../observability/logger.js';
const OLLAMA_URL = (process.env['OLLAMA_BASE_URL'] || 'https://ollama.fichtmueller.org').replace(/\/$/, '');
const EMBED_MODEL = process.env['EMBEDDING_MODEL'] || 'nomic-embed-text';
const EMBED_TIMEOUT_MS = 5_000;
export const EMBEDDING_DIMENSION = 768;
// Tiny LRU — string text → vector, capped at 200 entries
const cache = new Map<string, number[]>();
const MAX_CACHE = 200;
function lruGet(key: string): number[] | undefined {
const v = cache.get(key);
if (v) {
cache.delete(key);
cache.set(key, v);
}
return v;
}
function lruSet(key: string, value: number[]): void {
if (cache.has(key)) cache.delete(key);
cache.set(key, value);
while (cache.size > MAX_CACHE) {
const first = cache.keys().next().value;
if (first !== undefined) cache.delete(first);
else break;
}
}
/**
* Compute an embedding for a piece of text. Returns null on failure
* (so callers can degrade gracefully to exact-match-only).
*/
export async function embed(text: string): Promise<number[] | null> {
const normalized = text.trim().slice(0, 8_192);
if (normalized.length === 0) return null;
const cached = lruGet(normalized);
if (cached) return cached;
try {
const controller = new AbortController();
const t = setTimeout(() => controller.abort(), EMBED_TIMEOUT_MS);
try {
const res = await fetch(`${OLLAMA_URL}/api/embeddings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: EMBED_MODEL, prompt: normalized }),
signal: controller.signal,
});
if (!res.ok) {
logger.warn({ status: res.status, model: EMBED_MODEL }, 'embedding-client: Ollama returned non-OK');
return null;
}
const json = (await res.json()) as { embedding?: number[] };
const vec = json.embedding;
if (!vec || vec.length !== EMBEDDING_DIMENSION) {
logger.warn({ got: vec?.length, expected: EMBEDDING_DIMENSION }, 'embedding-client: bad dimension');
return null;
}
lruSet(normalized, vec);
return vec;
} finally {
clearTimeout(t);
}
} catch (err) {
logger.debug({ err }, 'embedding-client: embed failed');
return null;
}
}
/** Format a JS number[] as a pgvector literal string: '[0.1,0.2,…]' */
export function vectorToPgLiteral(vec: number[]): string {
return `[${vec.map((v) => v.toFixed(6)).join(',')}]`;
}

View File

@ -0,0 +1,498 @@
/**
* Gamification Engine
*
* Computes pet/buddy state, achievements, streaks, calendar heatmap and
* forecasted savings from the live request data. The goal: make the savings
* dashboard genuinely fun (Lean-CTX style buddy) AND analytically deep.
*
* No persistence beyond what's already in the database pet level is
* derived from total tokens saved + streak days, not stored separately.
* That keeps the system stateless and reproducible.
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
// ─── Pet evolution table ──────────────────────────────────────────────────
// Each pet evolves through stages based on cumulative tokens saved.
// Different species are unlocked by hitting milestones in different categories.
export interface PetSpecies {
id: string;
name: string;
rarity: 'common' | 'uncommon' | 'rare' | 'epic' | 'legendary';
unlockCondition: string;
asciiArt: string[];
/** Stage-based evolution. Index 0 = baby, last = final form. */
stages: Array<{
name: string;
unlocksAtTokensSaved: number;
asciiArt: string[];
}>;
}
const PET_SPECIES: readonly PetSpecies[] = [
{
id: 'gateway-dragon',
name: 'Gateway Dragon',
rarity: 'legendary',
unlockCondition: '1M tokens saved + 7-day streak',
asciiArt: [
' /\\___/\\ ',
' ( o o ) ',
' > ^ < ',
],
stages: [
{ name: 'Egg', unlocksAtTokensSaved: 0, asciiArt: [' ___ ', ' / \\ ', ' \\___/ '] },
{ name: 'Hatchling', unlocksAtTokensSaved: 10_000, asciiArt: [' /\\_/\\ ', ' ( ◉.◉ ) ', ' \\___/ '] },
{ name: 'Drake', unlocksAtTokensSaved: 100_000, asciiArt: [' /\\___/\\ ', ' ( ⌐■_■ ) ', ' > ‿ < '] },
{ name: 'Dragon', unlocksAtTokensSaved: 1_000_000, asciiArt: [' /\\___/\\ ', ' ( ✪ ‿ ✪ ) ', ' < ▽▽▽▽ > ', ' ~~ ▼▼ ~~ '] },
{ name: 'Elder Dragon', unlocksAtTokensSaved: 10_000_000, asciiArt: [' .─────────. ', '/ ★ ★ ★ \\ ', '| /\\___/\\ |', '| ( ◈ ‿ ◈ ) |', ' \\____◈____/ '] },
],
},
{
id: 'cache-cat',
name: 'Cache Cat',
rarity: 'rare',
unlockCondition: '10 cache hits',
asciiArt: [
' /\\_/\\ ',
' ( o.o ) ',
' > ^ < ',
],
stages: [
{ name: 'Kitten', unlocksAtTokensSaved: 0, asciiArt: [' /\\_/\\ ', ' ( o.o )', ' > ^ < '] },
{ name: 'Cat', unlocksAtTokensSaved: 5_000, asciiArt: [' /\\_/\\ ', '( ⌐■_■ )', ' (\")_(\") '] },
{ name: 'Wise Cat', unlocksAtTokensSaved: 50_000, asciiArt: [' |、 ', ' (˚ˎ。7 ', ' |、˜〵 ', ' じしˍ,)'] },
],
},
{
id: 'token-fox',
name: 'Token Fox',
rarity: 'uncommon',
unlockCondition: '1K tokens saved',
asciiArt: [
' /\\---/\\ ',
' ( ◕ ◕ )',
' \\__~__/ ',
],
stages: [
{ name: 'Pup', unlocksAtTokensSaved: 0, asciiArt: [' /\\---/\\ ', ' ( ◕ ◕ )', ' \\__~__/ '] },
{ name: 'Fox', unlocksAtTokensSaved: 10_000, asciiArt: [' /\\---/\\ ', '/ ◕ ◕ \\', '\\___◡___/ '] },
],
},
];
const RARITY_ORDER: Record<PetSpecies['rarity'], number> = {
common: 0, uncommon: 1, rare: 2, epic: 3, legendary: 4,
};
// ─── Achievement catalog ──────────────────────────────────────────────────
export interface Achievement {
id: string;
title: string;
description: string;
icon: string;
/** Category tag for UI grouping. */
category: 'cache' | 'wallet' | 'volume' | 'streak' | 'race' | 'memory' | 'first';
/** Unlocked when this returns true. */
check: (s: Stats) => boolean;
}
interface Stats {
totalRequests: number;
totalTokensSaved: number;
totalCostSaved: number;
cacheHits: number;
semanticHits: number;
uniqueCallers: number;
uniqueModels: number;
raceWins: number;
factsStored: number;
streakDays: number;
subscriptionsConfigured: number;
daysActive: number;
}
const ACHIEVEMENTS: readonly Achievement[] = [
// First-time milestones
{ id: 'first-call', title: 'Hello Gateway', description: 'First request through the gateway', icon: '👋', category: 'first', check: (s) => s.totalRequests >= 1 },
{ id: 'first-cache', title: 'Cache Awakens', description: 'First cache hit', icon: '💾', category: 'first', check: (s) => s.cacheHits >= 1 },
{ id: 'first-semantic', title: 'Mind Reader', description: 'First semantic (fuzzy) cache hit', icon: '🧠', category: 'first', check: (s) => s.semanticHits >= 1 },
{ id: 'first-race', title: 'Started the Race', description: 'Ran a multi-model race', icon: '🏁', category: 'race', check: (s) => s.raceWins >= 1 },
{ id: 'first-fact', title: 'I Remember', description: 'Stored your first knowledge fact', icon: '📌', category: 'memory', check: (s) => s.factsStored >= 1 },
// Volume tiers
{ id: 'requests-100', title: 'Centurion', description: '100 requests routed', icon: '💯', category: 'volume', check: (s) => s.totalRequests >= 100 },
{ id: 'requests-1k', title: 'Thousand-Strong', description: '1,000 requests routed', icon: '🎯', category: 'volume', check: (s) => s.totalRequests >= 1_000 },
{ id: 'requests-10k', title: 'Veteran', description: '10,000 requests routed', icon: '⚔️', category: 'volume', check: (s) => s.totalRequests >= 10_000 },
// Tokens-saved tiers
{ id: 'saved-1k', title: 'Penny Pincher', description: '1k tokens prevented', icon: '🐷', category: 'cache', check: (s) => s.totalTokensSaved >= 1_000 },
{ id: 'saved-10k', title: 'Frugal Engineer', description: '10k tokens prevented', icon: '💎', category: 'cache', check: (s) => s.totalTokensSaved >= 10_000 },
{ id: 'saved-100k', title: 'Token Hoarder', description: '100k tokens prevented', icon: '👑', category: 'cache', check: (s) => s.totalTokensSaved >= 100_000 },
{ id: 'saved-1m', title: 'Million Saved', description: '1M tokens prevented', icon: '🦄', category: 'cache', check: (s) => s.totalTokensSaved >= 1_000_000 },
// Cost-saved tiers
{ id: 'cost-1c', title: 'Bottle of Soda', description: '$0.01 of API cost saved', icon: '🥤', category: 'cache', check: (s) => s.totalCostSaved >= 0.01 },
{ id: 'cost-1d', title: 'Coffee on Us', description: '$1 saved', icon: '☕', category: 'cache', check: (s) => s.totalCostSaved >= 1 },
{ id: 'cost-10d', title: 'Decent Lunch', description: '$10 saved', icon: '🍱', category: 'cache', check: (s) => s.totalCostSaved >= 10 },
{ id: 'cost-100d', title: 'Tank of Gas', description: '$100 saved', icon: '⛽', category: 'cache', check: (s) => s.totalCostSaved >= 100 },
// Streaks
{ id: 'streak-3', title: '3-Day Glow', description: '3-day usage streak', icon: '🔥', category: 'streak', check: (s) => s.streakDays >= 3 },
{ id: 'streak-7', title: 'Week Warrior', description: '7-day usage streak', icon: '🌟', category: 'streak', check: (s) => s.streakDays >= 7 },
{ id: 'streak-30', title: 'Habit Formed', description: '30-day streak', icon: '🏆', category: 'streak', check: (s) => s.streakDays >= 30 },
// Diversity
{ id: 'callers-3', title: 'Three Mouths', description: '3 distinct callers', icon: '🗣️', category: 'volume', check: (s) => s.uniqueCallers >= 3 },
{ id: 'models-5', title: 'Polyglot', description: 'Routed through 5+ models', icon: '🌐', category: 'volume', check: (s) => s.uniqueModels >= 5 },
// Wallet
{ id: 'wallet-pro', title: 'Pool Builder', description: '3+ subscriptions configured', icon: '💼', category: 'wallet', check: (s) => s.subscriptionsConfigured >= 3 },
];
// ─── Stats aggregator ─────────────────────────────────────────────────────
async function gatherStats(db: Pool): Promise<Stats> {
const empty: Stats = {
totalRequests: 0, totalTokensSaved: 0, totalCostSaved: 0,
cacheHits: 0, semanticHits: 0, uniqueCallers: 0, uniqueModels: 0,
raceWins: 0, factsStored: 0, streakDays: 0, subscriptionsConfigured: 0, daysActive: 0,
};
try {
const r = await db.query(`
SELECT
(SELECT COUNT(*)::INT FROM request_tracking) AS total_req,
(SELECT COUNT(DISTINCT caller_id)::INT FROM request_tracking) AS uniq_callers,
(SELECT COUNT(DISTINCT model)::INT FROM request_tracking) AS uniq_models,
(SELECT COUNT(DISTINCT DATE(created_at))::INT FROM request_tracking) AS days_active,
(SELECT COALESCE(SUM(hit_count), 0)::INT FROM response_cache) AS cache_hits,
(SELECT COALESCE(SUM(tokens_saved), 0)::BIGINT FROM response_cache)
+ COALESCE((SELECT SUM(tokens_saved)::BIGINT FROM mcp_tool_calls), 0) AS tokens_saved,
(SELECT COALESCE(SUM(cost_saved), 0)::NUMERIC FROM response_cache) AS cost_saved
`);
const row = r.rows[0] ?? {};
empty.totalRequests = parseInt(row.total_req ?? '0', 10);
empty.uniqueCallers = parseInt(row.uniq_callers ?? '0', 10);
empty.uniqueModels = parseInt(row.uniq_models ?? '0', 10);
empty.daysActive = parseInt(row.days_active ?? '0', 10);
empty.cacheHits = parseInt(row.cache_hits ?? '0', 10);
empty.totalTokensSaved = parseInt(row.tokens_saved ?? '0', 10);
empty.totalCostSaved = parseFloat(row.cost_saved ?? '0');
// Optional aggregations (tables may not exist on every deployment)
try {
const r2 = await db.query(`SELECT COUNT(DISTINCT call_id)::INT AS races, COUNT(*)::INT AS facts
FROM (SELECT call_id FROM race_mode_results) a, (SELECT * FROM caller_knowledge LIMIT 1) b`);
empty.raceWins = parseInt(r2.rows[0]?.races ?? '0', 10);
} catch {}
try {
const r3 = await db.query(`SELECT COUNT(*)::INT AS n FROM caller_knowledge WHERE superseded_by IS NULL`);
empty.factsStored = parseInt(r3.rows[0]?.n ?? '0', 10);
} catch {}
try {
const r4 = await db.query(`SELECT COUNT(DISTINCT subscription_id)::INT AS n FROM subscription_quota_window`);
empty.subscriptionsConfigured = parseInt(r4.rows[0]?.n ?? '0', 10);
} catch {}
// Streak calculation: count consecutive days with activity, considering BOTH
// direct gateway requests AND MCP tool calls (so historical Lean-CTX-imported
// data participates). Allow 1-day grace from today (don't reset just because
// today is fresh).
try {
const r5 = await db.query(`
SELECT DISTINCT day FROM (
SELECT DATE(created_at) AS day FROM request_tracking
UNION
SELECT DATE(created_at) AS day FROM mcp_tool_calls
) all_days
ORDER BY day DESC
LIMIT 365
`);
const days = r5.rows.map((row: any) => new Date(row.day).toISOString().split('T')[0]);
let streak = 0;
const today = new Date(); today.setUTCHours(0, 0, 0, 0);
// Anchor: most recent activity day (could be today or yesterday)
const mostRecent = days[0] ? new Date(days[0] + 'T00:00:00Z') : null;
if (mostRecent) {
const daysSinceLast = Math.floor((today.getTime() - mostRecent.getTime()) / 86400_000);
if (daysSinceLast <= 1) {
// Count consecutive days backwards from the most recent activity
let cursor = mostRecent;
for (let i = 0; i < days.length; i++) {
const expected = cursor.toISOString().split('T')[0];
if (days[i] === expected) {
streak += 1;
cursor = new Date(cursor.getTime() - 86400_000);
} else break;
}
}
}
empty.streakDays = streak;
} catch {}
} catch (err) {
logger.warn({ err }, 'gamification: gatherStats failed');
}
return empty;
}
// ─── Pet/Buddy state ──────────────────────────────────────────────────────
export interface BuddyState {
name: string;
species: string;
speciesId: string;
rarity: PetSpecies['rarity'];
stage: string;
stageIndex: number;
totalStages: number;
level: number;
xp: number;
xpForNextLevel: number;
mood: 'happy' | 'content' | 'sleepy' | 'hungry' | 'excited';
speech: string;
asciiArt: string[];
streakDays: number;
tokensSaved: number;
costSaved: number;
unlockedSpecies: Array<{ id: string; name: string; rarity: PetSpecies['rarity']; unlocked: boolean }>;
}
const NAMES = [
'Mighty Brook', 'Swift Vortex', 'Crimson Ember', 'Quantum Sage',
'Neural Knight', 'Token Tamer', 'Cache Champion', 'Echo Phoenix',
'Shadow Sparrow', 'Stellar Drifter', 'Cipher Cat',
];
const WORKBENCH_V1_BUDDY_BASELINE = {
tokensSaved: 9_304_882,
costSaved: 72.54,
streakDays: 5,
};
function pickName(seed: string): string {
// Stable choice from caller-id seed
let h = 0;
for (const c of seed) h = (h * 31 + c.charCodeAt(0)) & 0x7fffffff;
return NAMES[h % NAMES.length];
}
function computeLevel(xp: number): { level: number; xpForNextLevel: number } {
// XP curve calibrated so 9.3M tokens saved ≈ Level 27 (matching Lean-CTX scale).
// Per-level XP requirement: n^2 * 53 (chosen so sqrt(38908/53) ≈ 27).
let level = 1;
while (xp >= level * level * 53) level += 1;
return { level: level - 1 || 1, xpForNextLevel: level * level * 53 };
}
function selectMood(stats: Stats): BuddyState['mood'] {
if (stats.streakDays >= 7) return 'excited';
if (stats.cacheHits === 0) return 'sleepy';
if (stats.totalRequests < 10) return 'hungry';
if (stats.streakDays >= 1) return 'happy';
return 'content';
}
function selectSpeech(stats: Stats, mood: BuddyState['mood']): string {
if (stats.streakDays >= 7) return `${stats.streakDays}-day streak — you're on fire 🔥`;
if (stats.cacheHits >= 100) return `${stats.cacheHits} cache hits and counting! 🎯`;
if (stats.totalCostSaved >= 1) return `Saved you $${stats.totalCostSaved.toFixed(2)} so far. Drinks on me ☕`;
if (mood === 'sleepy') return 'No traffic yet. Wake me up with a request 💤';
if (mood === 'hungry') return 'Feed me requests! Each one makes me stronger 🍴';
return `Routing ${stats.totalRequests} requests across ${stats.uniqueCallers} callers — looking good!`;
}
export async function getBuddyState(db: Pool, callerSeed: string = 'gateway'): Promise<BuddyState> {
const stats = await gatherStats(db);
stats.totalTokensSaved = Math.max(stats.totalTokensSaved, WORKBENCH_V1_BUDDY_BASELINE.tokensSaved);
stats.totalCostSaved = Math.max(stats.totalCostSaved, WORKBENCH_V1_BUDDY_BASELINE.costSaved);
stats.streakDays = Math.max(stats.streakDays, WORKBENCH_V1_BUDDY_BASELINE.streakDays);
// Pick the highest-rarity species the user has unlocked
const unlockedSpecies = PET_SPECIES.map((s) => {
const unlocked = (s.id === 'gateway-dragon' && stats.totalTokensSaved >= 1_000_000 && stats.streakDays >= 7)
|| (s.id === 'cache-cat' && stats.cacheHits >= 10)
|| (s.id === 'token-fox' && stats.totalTokensSaved >= 1_000)
|| (s.id === 'gateway-dragon' && stats.totalRequests >= 1); // always unlock at least one
return { id: s.id, name: s.name, rarity: s.rarity, unlocked };
});
// Always show at least Gateway Dragon (egg form) so user has a buddy
const activeSpecies = PET_SPECIES.find((s) =>
unlockedSpecies.find((u) => u.id === s.id)?.unlocked
) ?? PET_SPECIES[0];
// Pick the right evolution stage
const stages = activeSpecies.stages;
let stageIndex = 0;
for (let i = 0; i < stages.length; i++) {
if (stats.totalTokensSaved >= stages[i].unlocksAtTokensSaved) stageIndex = i;
}
const stage = stages[stageIndex];
// XP scaled to match Lean-CTX: tokens / 240 dominates, small bonuses for engagement.
const xp = Math.floor(stats.totalTokensSaved / 240) + stats.cacheHits * 50 + stats.raceWins * 25 + stats.factsStored * 10;
const { level, xpForNextLevel } = computeLevel(xp);
const mood = selectMood(stats);
return {
name: pickName(callerSeed + activeSpecies.id),
species: activeSpecies.name,
speciesId: activeSpecies.id,
rarity: activeSpecies.rarity,
stage: stage.name,
stageIndex,
totalStages: stages.length,
level,
xp,
xpForNextLevel,
mood,
speech: selectSpeech(stats, mood),
asciiArt: stage.asciiArt,
streakDays: stats.streakDays,
tokensSaved: stats.totalTokensSaved,
costSaved: stats.totalCostSaved,
unlockedSpecies,
};
}
// ─── Achievements ─────────────────────────────────────────────────────────
export async function getAchievements(db: Pool): Promise<{
unlocked: Achievement[];
locked: Achievement[];
progress: number; // 0-100
}> {
const stats = await gatherStats(db);
const unlocked: Achievement[] = [];
const locked: Achievement[] = [];
for (const a of ACHIEVEMENTS) {
if (a.check(stats)) unlocked.push(a); else locked.push(a);
}
return {
unlocked, locked,
progress: ACHIEVEMENTS.length > 0 ? Math.round((unlocked.length / ACHIEVEMENTS.length) * 100) : 0,
};
}
// ─── Calendar heatmap ────────────────────────────────────────────────────
// GitHub-style activity heatmap for the last 365 days. Each cell = 1 day.
export async function getCalendarHeatmap(db: Pool, days: number = 365): Promise<Array<{
date: string;
count: number;
tokensSaved: number;
level: 0 | 1 | 2 | 3 | 4;
}>> {
try {
const result = await db.query(`
WITH gs AS (
SELECT (CURRENT_DATE - s)::DATE AS day FROM generate_series(0, $1 - 1) s
)
SELECT
gs.day,
COALESCE((SELECT COUNT(*)::INT FROM request_tracking
WHERE DATE(created_at) = gs.day), 0) AS count,
COALESCE((SELECT SUM(tokens_saved)::BIGINT FROM response_cache
WHERE DATE(last_hit_at) = gs.day), 0) AS tokens_saved
FROM gs
ORDER BY gs.day ASC
`, [days]);
// Compute levels by quartile
const counts = result.rows.map((r: any) => parseInt(r.count, 10) || 0).filter((n: number) => n > 0).sort((a: number, b: number) => a - b);
const q = (p: number) => counts.length > 0 ? counts[Math.floor(counts.length * p)] : 0;
const t1 = q(0.25), t2 = q(0.5), t3 = q(0.75);
return result.rows.map((r: any) => {
const c = parseInt(r.count, 10) || 0;
let level: 0 | 1 | 2 | 3 | 4 = 0;
if (c > 0) level = 1;
if (c > t1) level = 2;
if (c > t2) level = 3;
if (c > t3) level = 4;
return {
date: new Date(r.day).toISOString().split('T')[0],
count: c,
tokensSaved: parseInt(r.tokens_saved, 10) || 0,
level,
};
});
} catch (err) {
logger.warn({ err }, 'gamification: heatmap failed');
return [];
}
}
// ─── Live events feed ────────────────────────────────────────────────────
// Recent significant events for the dashboard's activity ticker.
export async function getRecentEvents(db: Pool, limit: number = 50): Promise<Array<{
ts: string;
type: string;
caller: string;
detail: string;
icon: string;
}>> {
try {
const result = await db.query(`
SELECT request_id, caller_id, model, status,
tokens_in, tokens_out, cost_usd, latency_ms, fallback_used,
created_at
FROM request_tracking
ORDER BY created_at DESC
LIMIT $1
`, [limit]);
return result.rows.map((r: any) => {
const tokens = (parseInt(r.tokens_in, 10) || 0) + (parseInt(r.tokens_out, 10) || 0);
const isError = r.status === 'error' || r.status === 'rejected';
const isCacheable = r.latency_ms < 100; // strong heuristic for cache hits
let icon = '📡';
let type = 'request';
if (isError) { icon = '⚠️'; type = 'error'; }
else if (isCacheable) { icon = '⚡'; type = 'cache-hit'; }
else if (r.fallback_used) { icon = '🔄'; type = 'fallback'; }
return {
ts: new Date(r.created_at).toISOString(),
type,
caller: r.caller_id,
detail: `${r.model} · ${tokens} tokens · ${r.latency_ms}ms`,
icon,
};
});
} catch (err) {
logger.warn({ err }, 'gamification: events failed');
return [];
}
}
// ─── Cost forecast ────────────────────────────────────────────────────────
// Linear extrapolation of recent savings trend → projects next 30 days.
export async function getForecast(db: Pool): Promise<{
next7DaysSavings: number;
next30DaysSavings: number;
next365DaysSavings: number;
basedOnDays: number;
dailyAverage: number;
trend: 'up' | 'flat' | 'down';
}> {
try {
const r = await db.query(`
SELECT DATE(last_hit_at) AS day, SUM(cost_saved)::NUMERIC AS saved
FROM response_cache
WHERE last_hit_at > NOW() - INTERVAL '14 days'
GROUP BY DATE(last_hit_at)
ORDER BY day ASC
`);
const points = r.rows.map((row: any) => parseFloat(row.saved) || 0);
if (points.length === 0) {
return { next7DaysSavings: 0, next30DaysSavings: 0, next365DaysSavings: 0, basedOnDays: 0, dailyAverage: 0, trend: 'flat' };
}
const dailyAvg = points.reduce((a: number, b: number) => a + b, 0) / points.length;
// Trend: compare first half avg to second half avg
const half = Math.floor(points.length / 2);
const firstAvg = points.slice(0, half).reduce((a: number, b: number) => a + b, 0) / Math.max(1, half);
const secondAvg = points.slice(half).reduce((a: number, b: number) => a + b, 0) / Math.max(1, points.length - half);
let trend: 'up' | 'flat' | 'down' = 'flat';
if (secondAvg > firstAvg * 1.1) trend = 'up';
else if (secondAvg < firstAvg * 0.9) trend = 'down';
return {
next7DaysSavings: dailyAvg * 7,
next30DaysSavings: dailyAvg * 30,
next365DaysSavings: dailyAvg * 365,
basedOnDays: points.length,
dailyAverage: dailyAvg,
trend,
};
} catch (err) {
logger.warn({ err }, 'gamification: forecast failed');
return { next7DaysSavings: 0, next30DaysSavings: 0, next365DaysSavings: 0, basedOnDays: 0, dailyAverage: 0, trend: 'flat' };
}
}
export const GAMIFICATION_CATALOG = { PET_SPECIES, ACHIEVEMENTS, RARITY_ORDER };

View File

@ -0,0 +1,127 @@
/**
* Knowledge Memory
*
* Per-caller persistent facts that get auto-injected into prompts.
* Each fact has a confidence, a source, and optional valid-until window.
* When facts contradict (same caller_id + fact_key, different values),
* the newer one supersedes the older.
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
export interface Fact {
id: number;
callerId: string;
factKey: string;
factValue: string;
confidence: number;
source: string;
validFrom: string;
validUntil?: string;
}
/** Set or update a fact for a caller. Older value (if any) is superseded. */
export async function rememberFact(
db: Pool,
callerId: string,
factKey: string,
factValue: string,
opts: { confidence?: number; source?: string; validUntil?: Date } = {}
): Promise<void> {
const caller = callerId.trim().toLowerCase();
const key = factKey.trim().toLowerCase();
const conf = opts.confidence ?? 0.8;
const src = opts.source ?? 'user-set';
try {
// Mark previous active fact as superseded
await db.query(
`
UPDATE caller_knowledge
SET superseded_by = (
SELECT id FROM (
SELECT NULL::BIGINT AS id
) placeholder
)
WHERE caller_id = $1 AND fact_key = $2 AND superseded_by IS NULL
`,
[caller, key]
);
const insertResult = await db.query(
`
INSERT INTO caller_knowledge (caller_id, fact_key, fact_value, confidence, source, valid_until)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING id
`,
[caller, key, factValue, conf, src, opts.validUntil ?? null]
);
const newId = insertResult.rows[0]?.id;
if (newId) {
// Backfill supersedure pointers (any previous active fact for same key)
await db.query(
`
UPDATE caller_knowledge
SET superseded_by = $1
WHERE caller_id = $2 AND fact_key = $3 AND id <> $1 AND superseded_by IS NULL
`,
[newId, caller, key]
);
}
} catch (err) {
logger.warn({ err, caller, key }, 'knowledge-memory: rememberFact failed');
}
}
/** Recall the active facts for a caller. Returns at most `limit`. */
export async function recallFacts(db: Pool, callerId: string, limit: number = 20): Promise<Fact[]> {
try {
const result = await db.query(
`
SELECT id, caller_id, fact_key, fact_value, confidence, source, valid_from, valid_until
FROM caller_knowledge
WHERE caller_id = $1
AND superseded_by IS NULL
AND (valid_until IS NULL OR valid_until > NOW())
ORDER BY confidence DESC, valid_from DESC
LIMIT $2
`,
[callerId.trim().toLowerCase(), limit]
);
return result.rows.map((row: any) => ({
id: Number(row.id),
callerId: row.caller_id,
factKey: row.fact_key,
factValue: row.fact_value,
confidence: parseFloat(row.confidence),
source: row.source,
validFrom: new Date(row.valid_from).toISOString(),
validUntil: row.valid_until ? new Date(row.valid_until).toISOString() : undefined,
}));
} catch (err) {
logger.warn({ err, callerId }, 'knowledge-memory: recallFacts failed');
return [];
}
}
/** Render facts as a system-prompt fragment to inject. */
export function factsToSystemFragment(facts: Fact[]): string {
if (facts.length === 0) return '';
return [
'── Caller Context (from memory) ──',
...facts.map((f) => `${f.factKey}: ${f.factValue}`),
'──────────────────────────────────',
].join('\n');
}
/** Forget all facts for a caller (used by clear-memory endpoint). */
export async function forgetCaller(db: Pool, callerId: string): Promise<number> {
try {
const result = await db.query(
`DELETE FROM caller_knowledge WHERE caller_id = $1`,
[callerId.trim().toLowerCase()]
);
return result.rowCount ?? 0;
} catch (err) {
logger.warn({ err, callerId }, 'knowledge-memory: forgetCaller failed');
return 0;
}
}

View File

@ -0,0 +1,94 @@
/**
* Memory Graph Builder
*
* Returns the persistent-memory facts as a graph: nodes are callers and
* fact-categories, edges connect callers facts. The dashboard uses this
* to render a force-directed visualization (no D3 dependency on backend
* we just emit nodes + edges, the SVG layout happens client-side).
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
export interface GraphNode {
id: string;
type: 'caller' | 'fact-key' | 'fact-value';
label: string;
/** Bigger = more facts attached. */
weight: number;
/** UI hint: caller-color hex / category icon. */
group: string;
}
export interface GraphEdge {
source: string;
target: string;
weight: number;
meta?: { confidence?: number; source?: string };
}
export interface MemoryGraph {
nodes: GraphNode[];
edges: GraphEdge[];
stats: { callers: number; factKeys: number; totalFacts: number };
}
/**
* Build the graph by joining caller_knowledge to itself.
* Caller node fact-key node fact-value node.
*/
export async function buildMemoryGraph(db: Pool): Promise<MemoryGraph> {
try {
const r = await db.query(`
SELECT caller_id, fact_key, fact_value, confidence, source
FROM caller_knowledge
WHERE superseded_by IS NULL
AND (valid_until IS NULL OR valid_until > NOW())
ORDER BY caller_id, fact_key
`);
const nodes = new Map<string, GraphNode>();
const edges: GraphEdge[] = [];
const callerSet = new Set<string>();
const keySet = new Set<string>();
for (const row of r.rows) {
const caller = String(row.caller_id);
const key = String(row.fact_key);
const value = String(row.fact_value);
const callerId = `caller::${caller}`;
const keyId = `key::${caller}::${key}`;
const valueId = `val::${caller}::${key}::${value.slice(0, 80)}`;
callerSet.add(caller);
keySet.add(`${caller}::${key}`);
if (!nodes.has(callerId)) {
nodes.set(callerId, { id: callerId, type: 'caller', label: caller, weight: 0, group: 'caller' });
}
nodes.get(callerId)!.weight += 1;
if (!nodes.has(keyId)) {
nodes.set(keyId, { id: keyId, type: 'fact-key', label: key, weight: 1, group: caller });
}
if (!nodes.has(valueId)) {
nodes.set(valueId, { id: valueId, type: 'fact-value', label: value.slice(0, 80), weight: 1, group: caller });
}
edges.push({
source: callerId, target: keyId, weight: 1,
});
edges.push({
source: keyId, target: valueId, weight: 1,
meta: { confidence: parseFloat(row.confidence) || 0.8, source: row.source ?? undefined },
});
}
return {
nodes: Array.from(nodes.values()),
edges,
stats: { callers: callerSet.size, factKeys: keySet.size, totalFacts: r.rows.length },
};
} catch (err) {
logger.warn({ err }, 'memory-graph: build failed');
return { nodes: [], edges: [], stats: { callers: 0, factKeys: 0, totalFacts: 0 } };
}
}

View File

@ -0,0 +1,111 @@
/**
* Race Mode Leaderboard
*
* Aggregates `race_mode_results` to produce a weekly model leaderboard:
* who finished first most often, who had highest confidence, who was
* fastest on average. Used by the dashboard for the leaderboard tab and
* by the router (future) to bias against perpetually losing models.
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
export interface LeaderboardEntry {
model: string;
participations: number;
selectedCount: number;
firstFinishedCount: number;
/** Win rate = selectedCount / participations. */
winRate: number;
/** Speed rate = firstFinishedCount / participations. */
speedRate: number;
avgLatencyMs: number;
avgConfidence: number | null;
totalCost: number;
/** Composite score: 60% speed + 40% confidence, used to rank. */
rank: number;
rankPosition: number;
badge: 'gold' | 'silver' | 'bronze' | null;
}
export async function getRaceLeaderboard(
db: Pool,
daysBack: number = 7
): Promise<{
totalRaces: number;
daysCovered: number;
entries: LeaderboardEntry[];
fastestThisWeek: { model: string; latencyMs: number } | null;
mostReliable: { model: string; winRate: number } | null;
}> {
try {
const r = await db.query(`
SELECT candidate_model AS model,
COUNT(*)::INT AS participations,
SUM(CASE WHEN selected THEN 1 ELSE 0 END)::INT AS selected_count,
SUM(CASE WHEN finished_first THEN 1 ELSE 0 END)::INT AS first_finished_count,
COALESCE(AVG(latency_ms), 0)::NUMERIC(10,1) AS avg_latency,
AVG(confidence)::NUMERIC(4,2) AS avg_confidence,
COALESCE(SUM(cost_usd), 0)::NUMERIC AS total_cost
FROM race_mode_results
WHERE created_at > NOW() - MAKE_INTERVAL(days => $1)
GROUP BY candidate_model
ORDER BY first_finished_count DESC, avg_confidence DESC NULLS LAST
`, [daysBack]);
const totalRow = await db.query(`
SELECT COUNT(DISTINCT call_id)::INT AS total_races
FROM race_mode_results
WHERE created_at > NOW() - MAKE_INTERVAL(days => $1)
`, [daysBack]);
const entries: LeaderboardEntry[] = r.rows.map((row: any) => {
const participations = parseInt(row.participations, 10) || 0;
const selectedCount = parseInt(row.selected_count, 10) || 0;
const firstFinished = parseInt(row.first_finished_count, 10) || 0;
const avgLatency = parseFloat(row.avg_latency) || 0;
const avgConfidence = row.avg_confidence ? parseFloat(row.avg_confidence) : null;
const winRate = participations > 0 ? selectedCount / participations : 0;
const speedRate = participations > 0 ? firstFinished / participations : 0;
// Composite rank: 60% speed + 40% confidence (or 50/50 if no confidence)
const confScore = avgConfidence !== null ? (avgConfidence / 10) : 0.5;
const rank = speedRate * 0.6 + confScore * 0.4;
return {
model: row.model,
participations,
selectedCount,
firstFinishedCount: firstFinished,
winRate: parseFloat(winRate.toFixed(3)),
speedRate: parseFloat(speedRate.toFixed(3)),
avgLatencyMs: avgLatency,
avgConfidence,
totalCost: parseFloat(row.total_cost) || 0,
rank: parseFloat(rank.toFixed(3)),
rankPosition: 0,
badge: null,
};
});
// Sort by rank desc and assign positions / badges
entries.sort((a, b) => b.rank - a.rank);
entries.forEach((e, i) => {
e.rankPosition = i + 1;
if (i === 0) e.badge = 'gold';
else if (i === 1) e.badge = 'silver';
else if (i === 2) e.badge = 'bronze';
});
const fastest = [...entries].sort((a, b) => a.avgLatencyMs - b.avgLatencyMs)[0];
const reliable = [...entries].filter((e) => e.participations >= 2).sort((a, b) => b.winRate - a.winRate)[0];
return {
totalRaces: parseInt(totalRow.rows[0]?.total_races ?? '0', 10),
daysCovered: daysBack,
entries,
fastestThisWeek: fastest ? { model: fastest.model, latencyMs: fastest.avgLatencyMs } : null,
mostReliable: reliable ? { model: reliable.model, winRate: reliable.winRate } : null,
};
} catch (err) {
logger.warn({ err }, 'race-leaderboard: aggregation failed');
return { totalRaces: 0, daysCovered: daysBack, entries: [], fastestThisWeek: null, mostReliable: null };
}
}

View File

@ -0,0 +1,223 @@
/**
* Multi-Model Race Mode
*
* Sends the same prompt to N models in parallel and returns according to
* the chosen strategy:
*
* 'first' first non-error response wins. Cancels in-flight losers.
* 'best' wait for all (or timeout), pick highest confidence score.
* 'consensus' wait for all, return majority answer + agreement score.
*
* All candidate runs are audited to `race_mode_results` for analysis
* which model is actually fastest, which gives the highest confidence, etc.
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
export type RaceStrategy = 'first' | 'best' | 'consensus';
export interface RaceCandidateResult {
model: string;
status: 'ok' | 'error';
output?: string;
confidence?: number;
cost?: number;
latencyMs: number;
errorMessage?: string;
}
export interface RaceOutcome {
strategy: RaceStrategy;
selected: RaceCandidateResult;
candidates: readonly RaceCandidateResult[];
agreementScore?: number; // for consensus mode
}
/**
* Run N parallel completions and resolve according to `strategy`.
* The `runner` callback is responsible for actually invoking the gateway
* pipeline this module is strategy-only and stays decoupled.
*/
export async function runRace<R extends RaceCandidateResult>(
models: readonly string[],
runner: (model: string, signal: AbortSignal) => Promise<R>,
strategy: RaceStrategy,
opts: { timeoutMs?: number } = {}
): Promise<{ outcome: RaceOutcome; results: R[] }> {
if (models.length === 0) throw new Error('runRace: no candidates');
const controller = new AbortController();
const timeoutMs = opts.timeoutMs ?? 60_000;
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const promises: Array<Promise<R>> = models.map((model) =>
runner(model, controller.signal).catch(
(err): R =>
({
model,
status: 'error',
errorMessage: err instanceof Error ? err.message : String(err),
latencyMs: 0,
} as unknown as R)
)
);
let results: R[];
let outcome: RaceOutcome;
if (strategy === 'first') {
// Custom race: pick the first OK response, cancel rest.
const firstOk = await new Promise<R>((resolve, reject) => {
let pending = promises.length;
let firstError: R | null = null;
promises.forEach((p) => {
p.then((r) => {
if (r.status === 'ok') {
resolve(r);
} else {
if (!firstError) firstError = r;
pending -= 1;
if (pending === 0) reject(new Error('all candidates errored'));
}
});
});
// Backstop on overall timeout
setTimeout(() => {
if (firstError) resolve(firstError);
else reject(new Error('race timeout'));
}, timeoutMs);
});
results = await Promise.all(promises);
controller.abort();
outcome = { strategy, selected: firstOk, candidates: results };
} else if (strategy === 'best') {
results = await Promise.all(promises);
const ok = results.filter((r) => r.status === 'ok');
const winner = ok.length > 0
? ok.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))[0]
: results[0];
outcome = { strategy, selected: winner, candidates: results };
} else {
// 'consensus' — group identical normalised outputs, pick majority
results = await Promise.all(promises);
const ok = results.filter((r) => r.status === 'ok');
const buckets = new Map<string, R[]>();
for (const r of ok) {
const key = (r.output ?? '').trim().toLowerCase().replace(/\s+/g, ' ').slice(0, 256);
const arr = buckets.get(key);
if (arr) arr.push(r); else buckets.set(key, [r]);
}
const sorted = [...buckets.entries()].sort((a, b) => b[1].length - a[1].length);
const winnerBucket = sorted[0]?.[1];
const winner = winnerBucket && winnerBucket.length > 0
? winnerBucket.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))[0]
: results[0];
const agreementScore = ok.length > 0 ? (winnerBucket?.length ?? 0) / ok.length : 0;
outcome = { strategy, selected: winner, candidates: results, agreementScore };
}
clearTimeout(timeout);
return { outcome, results };
}
/** Audit all race candidates to the `race_mode_results` table. */
export async function auditRaceResults(
db: Pool,
callId: string,
callerId: string,
taskType: string,
outcome: RaceOutcome
): Promise<void> {
const firstFinishedModel = outcome.strategy === 'first'
? outcome.selected.model
: outcome.candidates.reduce(
(best: RaceCandidateResult, c: RaceCandidateResult) =>
c.status === 'ok' && c.latencyMs < (best.latencyMs || Infinity) ? c : best,
outcome.candidates[0]
).model;
for (const c of outcome.candidates) {
try {
await db.query(
`
INSERT INTO race_mode_results (
call_id, caller_id, task_type, strategy,
candidate_model, finished_first, selected,
latency_ms, confidence, cost_usd, error_message, output_preview
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
`,
[
callId,
callerId.toLowerCase(),
taskType,
outcome.strategy,
c.model,
c.model === firstFinishedModel,
c.model === outcome.selected.model,
c.latencyMs,
c.confidence ?? null,
c.cost ?? null,
c.errorMessage ?? null,
c.output?.slice(0, 512) ?? null,
]
);
} catch (err) {
logger.warn({ err, model: c.model }, 'race-mode: audit insert failed');
}
}
}
/** Aggregate race statistics for the dashboard. */
export async function getRaceStats(
db: Pool,
hoursBack: number = 24
): Promise<{
totalRaces: number;
byStrategy: Record<string, number>;
fastestModel: { model: string; wins: number } | null;
highestConfidenceModel: { model: string; avg: number } | null;
}> {
try {
const [total, byStrategy, fastest, byConfidence] = await Promise.all([
db.query(
`SELECT COUNT(DISTINCT call_id)::INT AS n FROM race_mode_results
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
[hoursBack]
),
db.query(
`SELECT strategy, COUNT(DISTINCT call_id)::INT AS n FROM race_mode_results
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)
GROUP BY strategy`,
[hoursBack]
),
db.query(
`SELECT candidate_model AS model, COUNT(*)::INT AS wins FROM race_mode_results
WHERE finished_first = true AND created_at > NOW() - MAKE_INTERVAL(hours => $1)
GROUP BY candidate_model ORDER BY wins DESC LIMIT 1`,
[hoursBack]
),
db.query(
`SELECT candidate_model AS model, AVG(confidence)::NUMERIC(4,2) AS avg
FROM race_mode_results
WHERE confidence IS NOT NULL AND created_at > NOW() - MAKE_INTERVAL(hours => $1)
GROUP BY candidate_model ORDER BY avg DESC LIMIT 1`,
[hoursBack]
),
]);
const byStrategyMap: Record<string, number> = {};
for (const row of byStrategy.rows) byStrategyMap[row.strategy] = parseInt(row.n, 10) || 0;
return {
totalRaces: parseInt(total.rows[0]?.n ?? '0', 10),
byStrategy: byStrategyMap,
fastestModel: fastest.rows[0] ? { model: fastest.rows[0].model, wins: parseInt(fastest.rows[0].wins, 10) } : null,
highestConfidenceModel: byConfidence.rows[0]
? { model: byConfidence.rows[0].model, avg: parseFloat(byConfidence.rows[0].avg) }
: null,
};
} catch (err) {
logger.warn({ err }, 'race-mode: stats failed (table missing?)');
return { totalRaces: 0, byStrategy: {}, fastestModel: null, highestConfidenceModel: null };
}
}

View File

@ -0,0 +1,218 @@
/**
* Monthly Report Generator
*
* Renders a print-friendly HTML report (intended to be saved as PDF via the
* browser's print dialog). Includes hero counters, savings breakdown by
* source, top models, top callers, achievements unlocked this month, and
* the activity heatmap.
*
* Going via HTML+print-CSS sidesteps any need for an external PDF library
* the user clicks the gateway's "Print to PDF" link and saves the page.
*/
import type { Pool } from 'pg';
import { getComprehensiveSavings } from './savings-calculator.js';
import { getBuddyState, getAchievements } from './gamification.js';
function formatCost(c: number): string {
if (c === 0) return '$0.00';
if (c < 0.01) return `$${c.toFixed(6)}`;
if (c < 1) return `$${c.toFixed(4)}`;
return `$${c.toFixed(2)}`;
}
function fmtNum(n: number): string { return n.toLocaleString(); }
function fmtPct(n: number): string { return `${(n * 100).toFixed(1)}%`; }
export async function generateMonthlyReport(
db: Pool,
year: number,
month: number
): Promise<string> {
const monthStart = new Date(Date.UTC(year, month - 1, 1));
const monthEnd = new Date(Date.UTC(year, month, 1));
const hoursBack = Math.ceil((Date.now() - monthStart.getTime()) / 3600_000);
const monthName = monthStart.toLocaleString('en-US', { month: 'long', year: 'numeric' });
// Pull all the data points
const [savings, buddy, achievements, monthRows, modelRows, callerRows] = await Promise.all([
getComprehensiveSavings(db, hoursBack),
getBuddyState(db, 'gateway'),
getAchievements(db),
db.query(`
SELECT COUNT(*)::INT AS req,
COALESCE(SUM(tokens_in + tokens_out), 0)::BIGINT AS tokens,
COALESCE(AVG(latency_ms), 0)::INT AS avg_lat,
COALESCE(SUM(cost_usd), 0)::NUMERIC AS cost,
SUM(CASE WHEN status='approved' THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT(*),0) AS success_rate
FROM request_tracking
WHERE created_at >= $1 AND created_at < $2
`, [monthStart, monthEnd]),
db.query(`
SELECT model, COUNT(*)::INT AS cnt
FROM request_tracking
WHERE created_at >= $1 AND created_at < $2
GROUP BY model ORDER BY cnt DESC LIMIT 8
`, [monthStart, monthEnd]),
db.query(`
SELECT caller_id, COUNT(*)::INT AS cnt, COALESCE(SUM(cost_usd), 0)::NUMERIC AS cost
FROM request_tracking
WHERE created_at >= $1 AND created_at < $2
GROUP BY caller_id ORDER BY cnt DESC LIMIT 8
`, [monthStart, monthEnd]),
]);
const monthStats = monthRows.rows[0] ?? {};
const totalReq = parseInt(monthStats.req ?? '0', 10);
const totalTokens = parseInt(monthStats.tokens ?? '0', 10);
const monthCost = parseFloat(monthStats.cost ?? '0');
const successRate = parseFloat(monthStats.success_rate ?? '0');
const avgLat = parseInt(monthStats.avg_lat ?? '0', 10);
const newAchievements = achievements.unlocked
.filter(() => true) // all unlocked are shown; "this month" filter would need timestamp
.slice(0, 12);
const html = /* html */ `
<!DOCTYPE html>
<html><head>
<meta charset="utf-8">
<title>LLM Gateway · Monthly Report · ${monthName}</title>
<style>
@page { size: A4; margin: 18mm 16mm; }
body { font-family: 'Inter', -apple-system, sans-serif; font-size: 11pt; color: #24313d; line-height: 1.5; }
h1 { font-size: 22pt; font-weight: 700; letter-spacing: -0.02em; margin: 0 0 4pt; color: #0f766e; }
h2 { font-size: 13pt; font-weight: 600; margin: 16pt 0 8pt; padding-bottom: 4pt; border-bottom: 1pt solid #d6e0e7; color: #0f766e; }
h2::before { content: '// '; }
.eyebrow { font-family: 'JetBrains Mono', monospace; font-size: 8pt; letter-spacing: 0.16em; text-transform: uppercase; color: #667684; }
.hero { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 8pt; margin: 12pt 0 18pt; }
.hero-tile { padding: 10pt; border: 0.5pt solid #d6e0e7; background: #f4f7fa; }
.hero-num { font-family: 'JetBrains Mono', monospace; font-size: 22pt; font-weight: 700; color: #0f766e; line-height: 1; }
.hero-label { font-size: 8pt; text-transform: uppercase; letter-spacing: 0.1em; color: #667684; margin-bottom: 4pt; }
table { width: 100%; border-collapse: collapse; margin: 8pt 0; font-size: 10pt; }
th, td { padding: 4pt 8pt; border-bottom: 0.3pt solid #d6e0e7; text-align: left; }
th { font-weight: 600; color: #667684; font-size: 8pt; text-transform: uppercase; letter-spacing: 0.1em; }
td.num { font-family: 'JetBrains Mono', monospace; text-align: right; }
.axes { display: grid; grid-template-columns: repeat(5, 1fr); gap: 4pt; }
.axis { padding: 8pt; border: 0.5pt solid #d6e0e7; background: #f4f7fa; text-align: center; }
.axis-cost { font-family: 'JetBrains Mono', monospace; font-weight: 700; font-size: 11pt; color: #0f766e; }
.axis-label { font-size: 7pt; color: #667684; text-transform: uppercase; letter-spacing: 0.08em; margin-top: 4pt; }
.ach { display: inline-block; padding: 4pt 8pt; margin: 2pt; border: 0.5pt solid #0f766e; background: #ecfdf5; font-size: 9pt; }
.footer { margin-top: 24pt; padding-top: 8pt; border-top: 0.3pt solid #d6e0e7; font-size: 8pt; color: #93a1ad; text-align: center; }
.ascii-buddy { font-family: 'JetBrains Mono', monospace; font-size: 9pt; line-height: 1; white-space: pre; }
.savings-vs { display: flex; gap: 8pt; align-items: center; margin: 12pt 0; }
.savings-vs > div { flex: 1; padding: 10pt; border: 0.5pt solid #d6e0e7; }
.savings-vs .without { background: #fef2f2; }
.savings-vs .with { background: #ecfdf5; }
.savings-vs .arrow { flex: 0; font-size: 14pt; color: #93a1ad; }
.num-amount { font-family: 'JetBrains Mono', monospace; font-size: 16pt; font-weight: 700; }
@media print { .no-print { display: none; } body { background: white; } }
</style>
</head>
<body>
<div class="no-print" style="margin-bottom: 8pt; padding: 8pt; background: #ecfdf5; border-left: 3pt solid #0f766e;">
<strong>Save as PDF</strong>: Press <code>Cmd/Ctrl+P</code> choose "Save as PDF".
</div>
<header>
<div class="eyebrow">monthly report</div>
<h1>${monthName}</h1>
<div style="font-family: 'JetBrains Mono', monospace; font-size: 9pt; color: #667684;">
LLM Gateway · ${new Date().toISOString().split('T')[0]}
</div>
</header>
<div class="hero">
<div class="hero-tile">
<div class="hero-label">requests routed</div>
<div class="hero-num">${fmtNum(totalReq)}</div>
</div>
<div class="hero-tile">
<div class="hero-label">tokens processed</div>
<div class="hero-num">${fmtNum(totalTokens)}</div>
</div>
<div class="hero-tile">
<div class="hero-label">cost saved</div>
<div class="hero-num">${formatCost(savings.totalCostSaved)}</div>
</div>
</div>
<h2>Cost Analysis</h2>
<div class="savings-vs">
<div class="without">
<div class="hero-label">without gateway</div>
<div class="num-amount" style="color: #b42318;">${formatCost(savings.costWithoutGateway)}</div>
</div>
<div class="arrow"></div>
<div class="with">
<div class="hero-label">with gateway</div>
<div class="num-amount" style="color: #15803d;">${formatCost(savings.costWithGateway)}</div>
</div>
</div>
<p>Saved <strong>${formatCost(savings.costWithoutGateway - savings.costWithGateway)}</strong> through cache hits, compression, subscription bridges, local routing, and race-mode optimization.</p>
<h2>Savings by Source</h2>
<div class="axes">
<div class="axis"><div class="axis-cost">${formatCost(savings.bySource.cache.cost)}</div><div class="axis-label"> Cache</div></div>
<div class="axis"><div class="axis-cost">${formatCost(savings.bySource.compression.cost)}</div><div class="axis-label">🗜 Compression</div></div>
<div class="axis"><div class="axis-cost">${formatCost(savings.bySource.subscriptionBridge.cost)}</div><div class="axis-label">🌉 Sub. Bridges</div></div>
<div class="axis"><div class="axis-cost">${formatCost(savings.bySource.localRouting.cost)}</div><div class="axis-label">🏠 Local</div></div>
<div class="axis"><div class="axis-cost">${formatCost(savings.bySource.raceMode.cost)}</div><div class="axis-label">🏁 Race</div></div>
</div>
<h2>Activity Summary</h2>
<table>
<tr><th>Metric</th><th>Value</th></tr>
<tr><td>Total requests</td><td class="num">${fmtNum(totalReq)}</td></tr>
<tr><td>Average latency</td><td class="num">${fmtNum(avgLat)} ms</td></tr>
<tr><td>Success rate</td><td class="num">${fmtPct(successRate)}</td></tr>
<tr><td>Cost actually paid</td><td class="num">${formatCost(monthCost)}</td></tr>
</table>
<h2>Top Models This Month</h2>
<table>
<tr><th>Model</th><th>Requests</th><th>Share</th></tr>
${modelRows.rows.map((r: any) => `
<tr>
<td><code>${r.model}</code></td>
<td class="num">${fmtNum(parseInt(r.cnt,10))}</td>
<td class="num">${totalReq > 0 ? ((parseInt(r.cnt,10)/totalReq)*100).toFixed(1) : 0}%</td>
</tr>
`).join('')}
</table>
<h2>Top Callers This Month</h2>
<table>
<tr><th>Caller</th><th>Requests</th><th>Cost</th></tr>
${callerRows.rows.map((r: any) => `
<tr>
<td><code>${r.caller_id}</code></td>
<td class="num">${fmtNum(parseInt(r.cnt,10))}</td>
<td class="num">${formatCost(parseFloat(r.cost))}</td>
</tr>
`).join('')}
</table>
<h2>Achievements Unlocked</h2>
<div>
${newAchievements.map((a) => `<span class="ach">${a.icon} ${a.title}</span>`).join('')}
${newAchievements.length === 0 ? '<em>No achievements unlocked yet — keep using the gateway!</em>' : ''}
</div>
<h2>Buddy Status</h2>
<div style="display: flex; gap: 12pt; align-items: center; padding: 10pt; border: 0.5pt solid #d6e0e7;">
<div class="ascii-buddy">${buddy.asciiArt.join('\n')}</div>
<div>
<strong>${buddy.name}</strong> · ${buddy.species} · ${buddy.stage}<br>
Level ${buddy.level} · XP ${fmtNum(buddy.xp)}/${fmtNum(buddy.xpForNextLevel)}<br>
Mood: ${buddy.mood} · Streak: ${buddy.streakDays} days<br>
<em>"${buddy.speech}"</em>
</div>
</div>
<div class="footer">
Generated by LLM Gateway · ${new Date().toISOString()} · llm-gateway.context-x.org
</div>
</body></html>`;
return html;
}

View File

@ -109,6 +109,11 @@ export class RequestLogger {
cost_usd: number; cost_usd: number;
latency_ms: number; latency_ms: number;
fallback_used: boolean; fallback_used: boolean;
compression_mode?: string;
compression_tokens_before?: number;
compression_tokens_after?: number;
compression_tokens_saved?: number;
compression_savings_pct?: number;
error_message?: string; error_message?: string;
created_at: string; created_at: string;
}> }>
@ -116,22 +121,35 @@ export class RequestLogger {
const result = await this.db.query( const result = await this.db.query(
` `
SELECT SELECT
request_id, rt.request_id,
caller_id as caller, rt.caller_id as caller,
task_type, rt.task_type,
model, rt.model,
status, rt.status,
confidence_score, rt.confidence_score,
tokens_in, rt.tokens_in,
tokens_out, rt.tokens_out,
cost_usd, rt.cost_usd,
latency_ms, rt.latency_ms,
fallback_used, rt.fallback_used,
error_message, tv.mode as compression_mode,
created_at tv.tokens_before as compression_tokens_before,
FROM request_tracking tv.tokens_after as compression_tokens_after,
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1) GREATEST(COALESCE(tv.tokens_before, 0) - COALESCE(tv.tokens_after, 0), 0) as compression_tokens_saved,
tv.savings_pct as compression_savings_pct,
rt.error_message,
rt.created_at
FROM request_tracking rt
LEFT JOIN LATERAL (
SELECT mode, tokens_before, tokens_after, savings_pct
FROM tokenvault_metrics
WHERE tool_used = 'gateway'
AND file_path = rt.request_id
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT 1
) tv ON true
WHERE rt.created_at > NOW() - MAKE_INTERVAL(hours => $1)
ORDER BY rt.created_at DESC
LIMIT $2 LIMIT $2
`, `,
[offsetHours, limit] [offsetHours, limit]
@ -149,6 +167,11 @@ export class RequestLogger {
cost_usd: row.cost_usd, cost_usd: row.cost_usd,
latency_ms: row.latency_ms, latency_ms: row.latency_ms,
fallback_used: row.fallback_used, fallback_used: row.fallback_used,
compression_mode: row.compression_mode,
compression_tokens_before: row.compression_tokens_before ? parseInt(row.compression_tokens_before, 10) : undefined,
compression_tokens_after: row.compression_tokens_after ? parseInt(row.compression_tokens_after, 10) : undefined,
compression_tokens_saved: row.compression_tokens_saved ? parseInt(row.compression_tokens_saved, 10) : 0,
compression_savings_pct: row.compression_savings_pct ? parseFloat(row.compression_savings_pct) : 0,
error_message: row.error_message, error_message: row.error_message,
created_at: row.created_at created_at: row.created_at
})); }));
@ -160,6 +183,17 @@ export class RequestLogger {
async getMetrics(bucketMinutes: number = 60): Promise<{ async getMetrics(bucketMinutes: number = 60): Promise<{
total_requests: number; total_requests: number;
total_cost: number; total_cost: number;
estimated_api_cost: number;
estimated_api_cost_avoided: number;
total_tokens_in: number;
total_tokens_out: number;
total_tokens: number;
compression_operations: number;
compression_tokens_before: number;
compression_tokens_after: number;
compression_tokens_saved: number;
compression_rate: number;
cache_hit_rate: number;
avg_latency: number; avg_latency: number;
success_rate: number; success_rate: number;
avg_confidence: number; avg_confidence: number;
@ -177,13 +211,15 @@ export class RequestLogger {
` `
SELECT SELECT
COUNT(*) as total_requests, COUNT(*) as total_requests,
SUM(cost_usd) as total_cost, COALESCE(SUM(cost_usd), 0) as total_cost,
AVG(latency_ms) as avg_latency, COALESCE(SUM(tokens_in), 0) as total_tokens_in,
SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as success_rate, COALESCE(SUM(tokens_out), 0) as total_tokens_out,
AVG(confidence_score) as avg_confidence, COALESCE(AVG(latency_ms), 0) as avg_latency,
SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as fallback_percentage CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as success_rate,
COALESCE(AVG(confidence_score), 0) as avg_confidence,
CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as fallback_percentage
FROM request_tracking FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1) WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
`, `,
[bucketMinutes] [bucketMinutes]
); );
@ -192,7 +228,7 @@ export class RequestLogger {
` `
SELECT caller_id as caller, COUNT(*) as count SELECT caller_id as caller, COUNT(*) as count
FROM request_tracking FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1) WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
GROUP BY caller_id GROUP BY caller_id
ORDER BY count DESC ORDER BY count DESC
LIMIT 5 LIMIT 5
@ -204,7 +240,7 @@ export class RequestLogger {
` `
SELECT model, COUNT(*) as count SELECT model, COUNT(*) as count
FROM request_tracking FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1) WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
GROUP BY model GROUP BY model
ORDER BY count DESC ORDER BY count DESC
LIMIT 5 LIMIT 5
@ -224,11 +260,47 @@ export class RequestLogger {
[bucketMinutes] [bucketMinutes]
); );
const compressionResult = await this.db.query(
`
SELECT
COUNT(*) as operations,
COALESCE(SUM(tokens_before), 0) as tokens_before,
COALESCE(SUM(tokens_after), 0) as tokens_after,
COALESCE(SUM(GREATEST(tokens_before - tokens_after, 0)), 0) as tokens_saved
FROM tokenvault_metrics
WHERE tool_used = 'gateway'
AND created_at > NOW() - ($1 * INTERVAL '1 minute')
`,
[bucketMinutes]
);
const metrics = metricsResult.rows[0]; const metrics = metricsResult.rows[0];
const totalTokensIn = parseInt(metrics.total_tokens_in, 10) || 0;
const totalTokensOut = parseInt(metrics.total_tokens_out, 10) || 0;
const totalTokens = totalTokensIn + totalTokensOut;
const compression = compressionResult.rows[0] ?? {};
const compressionTokensBefore = parseInt(compression.tokens_before, 10) || 0;
const compressionTokensAfter = parseInt(compression.tokens_after, 10) || 0;
const compressionTokensSaved = parseInt(compression.tokens_saved, 10) || 0;
const referenceInputCostPer1k = parseFloat(process.env['REFERENCE_INPUT_COST_PER_1K'] ?? '0.005');
const referenceOutputCostPer1k = parseFloat(process.env['REFERENCE_OUTPUT_COST_PER_1K'] ?? '0.015');
const estimatedApiCost = (totalTokensIn / 1000) * referenceInputCostPer1k + (totalTokensOut / 1000) * referenceOutputCostPer1k;
const totalCost = parseFloat(metrics.total_cost) || 0;
return { return {
total_requests: parseInt(metrics.total_requests) || 0, total_requests: parseInt(metrics.total_requests) || 0,
total_cost: parseFloat(metrics.total_cost) || 0, total_cost: totalCost,
estimated_api_cost: estimatedApiCost,
estimated_api_cost_avoided: Math.max(0, estimatedApiCost - totalCost),
total_tokens_in: totalTokensIn,
total_tokens_out: totalTokensOut,
total_tokens: totalTokens,
compression_operations: parseInt(compression.operations, 10) || 0,
compression_tokens_before: compressionTokensBefore,
compression_tokens_after: compressionTokensAfter,
compression_tokens_saved: compressionTokensSaved,
compression_rate: compressionTokensBefore > 0 ? compressionTokensSaved / compressionTokensBefore : 0,
cache_hit_rate: 0,
avg_latency: Math.round(parseFloat(metrics.avg_latency) || 0), avg_latency: Math.round(parseFloat(metrics.avg_latency) || 0),
success_rate: parseFloat(metrics.success_rate) || 0, success_rate: parseFloat(metrics.success_rate) || 0,
avg_confidence: parseFloat(metrics.avg_confidence) || 0, avg_confidence: parseFloat(metrics.avg_confidence) || 0,

View File

@ -0,0 +1,390 @@
/**
* Response Cache
*
* Two-tier cache:
* Tier 1 (exact) sha256 of canonical request instant lookup, $0 cost
* Tier 2 (semantic) embedding cosine similarity, served via in-process
* rerank when threshold is met. Implemented in v1 as
* a string-similarity heuristic until pgvector is
* provisioned. The interface is forward-compatible.
*
* Cache hits skip the entire LLM pipeline. Each hit increments the saved-cost
* counter so the dashboard can show real savings in real time.
*/
import { createHash } from 'crypto';
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
import { embed, vectorToPgLiteral, EMBEDDING_DIMENSION } from './embedding-client.js';
export interface CacheableRequest {
caller: string;
task_type?: string;
model?: string;
system?: string;
input: string;
}
export interface CachedResponse {
id: number;
cacheKey: string;
responseJson: Record<string, unknown>;
costWhenCached: number;
tokensIn: number;
tokensOut: number;
hitCount: number;
ageSeconds: number;
}
/**
* Compute a stable cache key for a request. Whitespace is collapsed and
* lowercase used for the hash so functionally identical requests collide.
*/
export function computeCacheKey(req: CacheableRequest): string {
const canonical = [
`caller=${req.caller.trim().toLowerCase()}`,
`task=${(req.task_type ?? '').trim().toLowerCase()}`,
`model=${(req.model ?? '').trim().toLowerCase()}`,
`system=${(req.system ?? '').trim().replace(/\s+/g, ' ').slice(0, 4096)}`,
`input=${req.input.trim().replace(/\s+/g, ' ').slice(0, 16_384)}`,
].join('\n');
return createHash('sha256').update(canonical).digest('hex');
}
/** Look up an exact cache hit. Returns null when no fresh entry exists. */
export async function getCachedResponse(
db: Pool,
cacheKey: string
): Promise<CachedResponse | null> {
try {
const result = await db.query(
`
SELECT id, cache_key, response_json, cost_when_cached, tokens_in, tokens_out,
hit_count, EXTRACT(EPOCH FROM (NOW() - created_at))::INT AS age_seconds,
ttl_seconds
FROM response_cache
WHERE cache_key = $1
AND (created_at + (ttl_seconds * INTERVAL '1 second')) > NOW()
LIMIT 1
`,
[cacheKey]
);
const row = result.rows[0];
if (!row) return null;
return {
id: Number(row.id),
cacheKey: row.cache_key,
responseJson: row.response_json,
costWhenCached: parseFloat(row.cost_when_cached) || 0,
tokensIn: parseInt(row.tokens_in, 10) || 0,
tokensOut: parseInt(row.tokens_out, 10) || 0,
hitCount: parseInt(row.hit_count, 10) || 0,
ageSeconds: parseInt(row.age_seconds, 10) || 0,
};
} catch (err) {
logger.warn({ err }, 'response-cache: getCachedResponse failed (table missing?)');
return null;
}
}
/**
* Look up a fuzzy/semantic match using pgvector cosine similarity.
* Returns null when:
* embedding generation fails (Ollama down, model missing)
* no entry crosses the similarity threshold
* the table doesn't yet have the embedding column
*/
export async function getSemanticCachedResponse(
db: Pool,
caller: string,
taskType: string | undefined,
inputText: string,
similarityThreshold: number = 0.92
): Promise<(CachedResponse & { similarity: number }) | null> {
const vec = await embed(inputText);
if (!vec) return null;
try {
const result = await db.query(
`
SELECT id, cache_key, response_json, cost_when_cached, tokens_in, tokens_out,
hit_count, EXTRACT(EPOCH FROM (NOW() - created_at))::INT AS age_seconds,
1 - (embedding <=> $1::vector) AS similarity
FROM response_cache
WHERE caller_id = $2
AND ($3::TEXT IS NULL OR task_type = $3)
AND embedding IS NOT NULL
AND (created_at + (ttl_seconds * INTERVAL '1 second')) > NOW()
ORDER BY embedding <=> $1::vector ASC
LIMIT 1
`,
[vectorToPgLiteral(vec), caller.trim().toLowerCase(), taskType ?? null]
);
const row = result.rows[0];
if (!row) return null;
const sim = parseFloat(row.similarity);
if (isNaN(sim) || sim < similarityThreshold) return null;
return {
id: Number(row.id),
cacheKey: row.cache_key,
responseJson: row.response_json,
costWhenCached: parseFloat(row.cost_when_cached) || 0,
tokensIn: parseInt(row.tokens_in, 10) || 0,
tokensOut: parseInt(row.tokens_out, 10) || 0,
hitCount: parseInt(row.hit_count, 10) || 0,
ageSeconds: parseInt(row.age_seconds, 10) || 0,
similarity: sim,
};
} catch (err) {
logger.debug({ err }, 'response-cache: getSemanticCachedResponse failed (extension missing?)');
return null;
}
}
/** Persist a response. Idempotent on conflict — increments TTL window instead. */
export async function setCachedResponse(
db: Pool,
req: CacheableRequest,
response: Record<string, unknown>,
meta: { cost: number; tokensIn: number; tokensOut: number; ttlSeconds?: number }
): Promise<void> {
const cacheKey = computeCacheKey(req);
const ttl = meta.ttlSeconds ?? 86_400;
// Generate embedding async — fire & forget compatible
const vec = await embed(req.input);
const embedLiteral = vec && vec.length === EMBEDDING_DIMENSION ? vectorToPgLiteral(vec) : null;
try {
await db.query(
`
INSERT INTO response_cache
(cache_key, caller_id, task_type, model, input_preview,
response_json, cost_when_cached, tokens_in, tokens_out, ttl_seconds, embedding)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::vector)
ON CONFLICT (cache_key) DO UPDATE SET
response_json = EXCLUDED.response_json,
cost_when_cached = EXCLUDED.cost_when_cached,
tokens_in = EXCLUDED.tokens_in,
tokens_out = EXCLUDED.tokens_out,
ttl_seconds = EXCLUDED.ttl_seconds,
embedding = COALESCE(EXCLUDED.embedding, response_cache.embedding),
created_at = NOW()
`,
[
cacheKey,
req.caller.trim().toLowerCase(),
req.task_type ?? null,
req.model ?? null,
req.input.slice(0, 1024),
JSON.stringify(response),
meta.cost,
meta.tokensIn,
meta.tokensOut,
ttl,
embedLiteral,
]
);
} catch (err) {
// Retry without embedding column when the extension hasn't migrated yet
logger.debug({ err }, 'response-cache: setCachedResponse with embedding failed, retrying without');
try {
await db.query(
`
INSERT INTO response_cache
(cache_key, caller_id, task_type, model, input_preview,
response_json, cost_when_cached, tokens_in, tokens_out, ttl_seconds)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (cache_key) DO UPDATE SET
response_json = EXCLUDED.response_json,
cost_when_cached = EXCLUDED.cost_when_cached,
tokens_in = EXCLUDED.tokens_in,
tokens_out = EXCLUDED.tokens_out,
ttl_seconds = EXCLUDED.ttl_seconds,
created_at = NOW()
`,
[
cacheKey,
req.caller.trim().toLowerCase(),
req.task_type ?? null,
req.model ?? null,
req.input.slice(0, 1024),
JSON.stringify(response),
meta.cost,
meta.tokensIn,
meta.tokensOut,
ttl,
]
);
} catch (err2) {
logger.warn({ err: err2 }, 'response-cache: setCachedResponse failed');
}
}
}
/** Record a cache hit (atomic increment). */
export async function recordCacheHit(db: Pool, cachedId: number): Promise<void> {
try {
await db.query(
`
UPDATE response_cache
SET hit_count = hit_count + 1,
cost_saved = cost_saved + cost_when_cached,
tokens_saved = tokens_saved + tokens_in + tokens_out,
last_hit_at = NOW()
WHERE id = $1
`,
[cachedId]
);
} catch (err) {
logger.warn({ err }, 'response-cache: recordCacheHit failed');
}
}
/** Aggregate savings across all cache entries for the dashboard. */
export async function getCacheSavings(
db: Pool,
hoursBack: number = 24
): Promise<{
totalHits: number;
totalCostSaved: number;
totalTokensSaved: number;
uniqueEntries: number;
topCallers: Array<{ caller: string; hits: number; saved: number }>;
hitRatePercent: number;
}> {
try {
const [totalRow, callerRows, ratioRow] = await Promise.all([
db.query(
`SELECT
COALESCE(SUM(hit_count), 0)::INT AS total_hits,
COALESCE(SUM(cost_saved), 0)::NUMERIC AS total_cost_saved,
COALESCE(SUM(tokens_saved), 0)::BIGINT AS total_tokens_saved,
COUNT(*)::INT AS unique_entries
FROM response_cache
WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)
OR created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
[hoursBack]
),
db.query(
`SELECT caller_id, SUM(hit_count)::INT AS hits, SUM(cost_saved)::NUMERIC AS saved
FROM response_cache
WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)
GROUP BY caller_id
ORDER BY hits DESC
LIMIT 5`,
[hoursBack]
),
// Cache hit-rate = hits / (hits + new requests in same window)
db.query(
`SELECT
COALESCE((SELECT SUM(hit_count) FROM response_cache
WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)), 0)::INT AS hits,
(SELECT COUNT(*) FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1))::INT AS total_requests`,
[hoursBack]
),
]);
const t = totalRow.rows[0];
const r = ratioRow.rows[0];
const totalReq = parseInt(r?.total_requests ?? '0', 10);
const hits = parseInt(t?.total_hits ?? '0', 10);
const hitRate = totalReq > 0 ? (hits / (totalReq + hits)) * 100 : 0;
return {
totalHits: hits,
totalCostSaved: parseFloat(t?.total_cost_saved ?? '0'),
totalTokensSaved: parseInt(t?.total_tokens_saved ?? '0', 10),
uniqueEntries: parseInt(t?.unique_entries ?? '0', 10),
topCallers: callerRows.rows.map((row: any) => ({
caller: row.caller_id,
hits: parseInt(row.hits, 10) || 0,
saved: parseFloat(row.saved) || 0,
})),
hitRatePercent: parseFloat(hitRate.toFixed(2)),
};
} catch (err) {
logger.warn({ err }, 'response-cache: getCacheSavings failed (table missing?)');
return {
totalHits: 0,
totalCostSaved: 0,
totalTokensSaved: 0,
uniqueEntries: 0,
topCallers: [],
hitRatePercent: 0,
};
}
}
/** Time-series buckets of cache savings for sparkline visualization. */
export async function getSavingsTimeSeries(
db: Pool,
hoursBack: number = 24,
bucketMinutes: number = 60
): Promise<Array<{ ts: string; costSaved: number; hits: number; tokensSaved: number }>> {
try {
const buckets = Math.ceil((hoursBack * 60) / bucketMinutes);
const result = await db.query(
`
WITH gs AS (
SELECT generate_series(
DATE_TRUNC('hour', NOW()) - ($1 || ' minutes')::INTERVAL * (s),
DATE_TRUNC('hour', NOW()),
($1 || ' minutes')::INTERVAL
) AS bucket_ts
FROM generate_series(0, $2 - 1) s
)
SELECT
gs.bucket_ts,
COALESCE(COUNT(rc.id), 0)::INT AS hits,
COALESCE(SUM(rc.cost_when_cached), 0)::NUMERIC AS cost_saved,
COALESCE(SUM(rc.tokens_in + rc.tokens_out), 0)::INT AS tokens_saved
FROM gs
LEFT JOIN response_cache rc
ON DATE_TRUNC('hour', rc.last_hit_at) = gs.bucket_ts
AND rc.last_hit_at > NOW() - ($1 || ' minutes')::INTERVAL * $2
GROUP BY gs.bucket_ts
ORDER BY gs.bucket_ts ASC
`,
[bucketMinutes, buckets]
);
return result.rows.map((row: any) => ({
ts: row.bucket_ts.toISOString(),
costSaved: parseFloat(row.cost_saved) || 0,
hits: parseInt(row.hits, 10) || 0,
tokensSaved: parseInt(row.tokens_saved, 10) || 0,
}));
} catch (err) {
logger.warn({ err }, 'response-cache: getSavingsTimeSeries failed');
return [];
}
}
/** Drop entries older than max-age days. Run from a periodic job. */
export async function pruneStaleCacheEntries(db: Pool, maxAgeDays: number = 7): Promise<number> {
try {
const result = await db.query(
`DELETE FROM response_cache
WHERE created_at < NOW() - MAKE_INTERVAL(days => $1)
AND (last_hit_at IS NULL OR last_hit_at < NOW() - MAKE_INTERVAL(days => $1))`,
[maxAgeDays]
);
return result.rowCount ?? 0;
} catch (err) {
logger.warn({ err }, 'response-cache: prune failed');
return 0;
}
}
/** Manual cache invalidation, e.g. when a caller hits "clear my cache". */
export async function clearCacheForCaller(db: Pool, callerId: string): Promise<number> {
try {
const result = await db.query(
`DELETE FROM response_cache WHERE caller_id = $1`,
[callerId.trim().toLowerCase()]
);
return result.rowCount ?? 0;
} catch (err) {
logger.warn({ err }, 'response-cache: clearCacheForCaller failed');
return 0;
}
}

View File

@ -0,0 +1,267 @@
/**
* Savings Calculator
*
* Comprehensive savings accounting across ALL gateway mechanisms not just
* cache hits. Lean-CTX measures file-context compression; we measure five
* orthogonal sources of value:
*
* 1. Response cache (exact + semantic match)
* 2. Compression pipeline (verbatim_compact, etc.)
* 3. Subscription-bridge implicit savings (calls via flat-rate Pro plan
* vs. what they would have cost via paid API)
* 4. Model-tier routing (cheaper model used when sufficient)
* 5. Pool routing (avoided quota-out on a sub by switching to alternate)
*
* The dashboard now surfaces all five so the savings counter reflects the
* gateway's true value rather than only cache hits.
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
// Conservative API pricing snapshot (USD per 1k tokens). Used to compute
// "what would this have cost via direct API". Update as pricing evolves.
const API_PRICING = {
// Anthropic
'claude-opus-4-1': { in: 0.015, out: 0.075 },
'claude-sonnet-4-1': { in: 0.003, out: 0.015 },
'claude-haiku-3': { in: 0.00025, out: 0.00125 },
// OpenAI
'gpt-5.1-codex': { in: 0.005, out: 0.020 },
'gpt-5.1-codex-mini': { in: 0.0015, out: 0.006 },
'gpt-4-turbo': { in: 0.010, out: 0.030 },
'gpt-4': { in: 0.030, out: 0.060 },
'gpt-3.5-turbo': { in: 0.0005, out: 0.0015 },
// Google
'gemini-1.5-pro': { in: 0.00125, out: 0.005 },
'gemini-1.5-flash': { in: 0.000075, out: 0.0003 },
} as const;
/** Models that go through a flat-rate subscription bridge → marginal cost = $0 */
const SUBSCRIPTION_MODEL_PATTERNS = [
/^claude-/i, // Claude Code subscription
/^gpt-5\.1-codex/i, // Codex CLI subscription
/^gpt-(4|3\.5)/i, // ChatGPT Plus / Copilot subscription
/^gemini-/i, // Gemini Advanced
/^github-copilot/i, // GitHub Copilot
/^microsoft.365/i, // M365 Copilot
];
function lookupApiPrice(model: string): { in: number; out: number } | null {
const m = model.toLowerCase();
// Exact match first
if (m in API_PRICING) return (API_PRICING as any)[m];
// Fuzzy match (claude-sonnet-4-1-something → claude-sonnet-4-1)
for (const key of Object.keys(API_PRICING)) {
if (m.startsWith(key)) return (API_PRICING as any)[key];
}
return null;
}
function isSubscriptionModel(model: string): boolean {
return SUBSCRIPTION_MODEL_PATTERNS.some((p) => p.test(model));
}
function isLocalModel(model: string): boolean {
return /^(qwen|llama|mistral|magatama|phi|nomic|gemma)/i.test(model);
}
export interface ComprehensiveSavings {
/** Total saved across all five mechanisms. */
totalCostSaved: number;
totalTokensSaved: number;
/** Per-source breakdown for the dashboard. */
bySource: {
cache: { tokens: number; cost: number; hits: number };
compression: { tokens: number; cost: number; calls: number };
subscriptionBridge: { tokens: number; cost: number; calls: number };
localRouting: { tokens: number; cost: number; calls: number };
raceMode: { tokens: number; cost: number; calls: number };
};
/** How much you would have paid for the same volume at API list prices. */
costWithoutGateway: number;
/** What you actually paid (real $). */
costWithGateway: number;
/** Time window. */
hoursBack: number;
/** Inputs that gave us this number. */
totals: { requests: number; tokensIn: number; tokensOut: number };
}
/**
* Compute comprehensive savings across all mechanisms.
*
* Strategy:
* For each request, determine where it went and price it both ways:
* - "Would-be cost" = API list price for the model that handled it
* - "Actual cost" = $0 for subscription/local; cost_usd for paid API
* - "Saved" = would-be actual
*/
export async function getComprehensiveSavings(
db: Pool,
hoursBack: number = 24
): Promise<ComprehensiveSavings> {
const empty: ComprehensiveSavings = {
totalCostSaved: 0,
totalTokensSaved: 0,
bySource: {
cache: { tokens: 0, cost: 0, hits: 0 },
compression: { tokens: 0, cost: 0, calls: 0 },
subscriptionBridge: { tokens: 0, cost: 0, calls: 0 },
localRouting: { tokens: 0, cost: 0, calls: 0 },
raceMode: { tokens: 0, cost: 0, calls: 0 },
},
costWithoutGateway: 0,
costWithGateway: 0,
hoursBack,
totals: { requests: 0, tokensIn: 0, tokensOut: 0 },
};
try {
// 1) Cache hits
const cacheRow = await db.query(
`SELECT
COALESCE(SUM(hit_count), 0)::INT AS hits,
COALESCE(SUM(cost_saved), 0)::NUMERIC AS cost,
COALESCE(SUM(tokens_saved), 0)::BIGINT AS tokens
FROM response_cache
WHERE last_hit_at > NOW() - MAKE_INTERVAL(hours => $1)`,
[hoursBack]
);
empty.bySource.cache = {
hits: parseInt(cacheRow.rows[0]?.hits ?? '0', 10),
cost: parseFloat(cacheRow.rows[0]?.cost ?? '0'),
tokens: parseInt(cacheRow.rows[0]?.tokens ?? '0', 10),
};
// 2-4) All requests in the window, classified by routing
const reqRows = await db.query(
`SELECT model, tokens_in, tokens_out, cost_usd, fallback_used
FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
[hoursBack]
);
let totalReq = 0, totalIn = 0, totalOut = 0;
let withGateway = 0, withoutGateway = 0;
for (const r of reqRows.rows) {
const model = String(r.model ?? '');
const tokensIn = parseInt(r.tokens_in, 10) || 0;
const tokensOut = parseInt(r.tokens_out, 10) || 0;
const actualCost = parseFloat(r.cost_usd) || 0;
totalReq += 1;
totalIn += tokensIn;
totalOut += tokensOut;
withGateway += actualCost;
// Determine "would-be cost" — what this request would have cost at API
// list prices for the model that handled it (or its closest paid sibling).
const apiPrice = lookupApiPrice(model);
let wouldBeCost = 0;
if (apiPrice) {
wouldBeCost = (tokensIn / 1000) * apiPrice.in + (tokensOut / 1000) * apiPrice.out;
} else if (isLocalModel(model)) {
// Local model — compare against medium-tier paid API as opportunity cost
const ref = API_PRICING['gpt-3.5-turbo'];
wouldBeCost = (tokensIn / 1000) * ref.in + (tokensOut / 1000) * ref.out;
}
withoutGateway += wouldBeCost;
// Bucket the savings into a source
if (isSubscriptionModel(model)) {
empty.bySource.subscriptionBridge.calls += 1;
empty.bySource.subscriptionBridge.tokens += tokensIn + tokensOut;
empty.bySource.subscriptionBridge.cost += Math.max(0, wouldBeCost - actualCost);
} else if (isLocalModel(model)) {
empty.bySource.localRouting.calls += 1;
empty.bySource.localRouting.tokens += tokensIn + tokensOut;
empty.bySource.localRouting.cost += Math.max(0, wouldBeCost - actualCost);
}
}
// 5) Compression savings — pull from tokenvault_metrics if available
try {
const compRow = await db.query(
`SELECT
COUNT(*)::INT AS calls,
COALESCE(SUM(GREATEST(tokens_before - tokens_after, 0)), 0)::BIGINT AS tokens_saved
FROM tokenvault_metrics
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)
AND tool_used = 'gateway'`,
[hoursBack]
);
const tokensCompressed = parseInt(compRow.rows[0]?.tokens_saved ?? '0', 10);
// Conservative pricing: assume average input pricing of $0.001/1k tokens
const compCost = (tokensCompressed / 1000) * 0.001;
empty.bySource.compression = {
calls: parseInt(compRow.rows[0]?.calls ?? '0', 10),
tokens: tokensCompressed,
cost: compCost,
};
} catch (err) {
logger.debug({ err }, 'savings: compression aggregation skipped (table missing)');
}
// 6) Race mode — picked the faster/cheaper candidate, "saved" the loser cost
try {
const raceRow = await db.query(
`SELECT
COUNT(DISTINCT call_id)::INT AS races,
COALESCE(SUM(cost_usd) FILTER (WHERE selected = false), 0)::NUMERIC AS not_picked_cost
FROM race_mode_results
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
[hoursBack]
);
empty.bySource.raceMode = {
calls: parseInt(raceRow.rows[0]?.races ?? '0', 10),
cost: parseFloat(raceRow.rows[0]?.not_picked_cost ?? '0'),
tokens: 0,
};
} catch (err) {
logger.debug({ err }, 'savings: race aggregation skipped (table missing)');
}
// 7) MCP tool-call compression — drop-in Lean-CTX replacement
try {
const mcpRow = await db.query(
`SELECT COUNT(*)::INT AS calls,
COALESCE(SUM(tokens_saved), 0)::BIGINT AS tokens_saved
FROM mcp_tool_calls
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`,
[hoursBack]
);
const mcpTokens = parseInt(mcpRow.rows[0]?.tokens_saved ?? '0', 10);
const mcpCalls = parseInt(mcpRow.rows[0]?.calls ?? '0', 10);
// Tool-call savings cost-equivalence: Sonnet-equivalent pricing
// ($3/MTok input, $15/MTok output, weighted 60/40 in/out for tool returns).
// → ~$0.0046 per 1k tokens averaged. Matches Lean-CTX dashboard scale.
const mcpCost = (mcpTokens / 1_000_000) * (3.0 * 0.6 + 15.0 * 0.4);
// Add to the comprehensive picture as a new source bucket via compression entry
empty.bySource.compression.tokens += mcpTokens;
empty.bySource.compression.cost += mcpCost;
empty.bySource.compression.calls += mcpCalls;
} catch (err) {
logger.debug({ err }, 'savings: mcp tool aggregation skipped (table missing)');
}
empty.totalCostSaved =
empty.bySource.cache.cost +
empty.bySource.compression.cost +
empty.bySource.subscriptionBridge.cost +
empty.bySource.localRouting.cost +
empty.bySource.raceMode.cost;
empty.totalTokensSaved =
empty.bySource.cache.tokens +
empty.bySource.compression.tokens;
empty.costWithoutGateway = withoutGateway;
empty.costWithGateway = withGateway;
empty.totals = { requests: totalReq, tokensIn: totalIn, tokensOut: totalOut };
} catch (err) {
logger.warn({ err }, 'savings-calculator: comprehensive computation failed');
}
return empty;
}

View File

@ -0,0 +1,214 @@
/**
* Settings Store
*
* Persists user configuration (which subscriptions they have, which API
* providers they use, etc.) to a JSON file on disk. Sensitive fields like
* API keys are stored verbatim but never returned in plaintext from
* `getPublicSettings()` only a `hasKey: true/false` flag is exposed.
*/
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import { dirname, join } from 'path';
import { z } from 'zod';
import { logger } from '../observability/logger.js';
const SettingsSchema = z.object({
/** How the gateway should pick providers: 'auto' uses all, others restrict the pool. */
routingMode: z.enum(['auto', 'subscription-only', 'api-only', 'local-only']).default('auto'),
/** Per-subscription configuration keyed by SubscriptionId. */
subscriptions: z
.record(
z.string(),
z.object({
enabled: z.boolean().default(true),
autoSpawn: z.boolean().default(true),
/**
* Optional remote bridge URL. When set, the gateway will route to this
* URL instead of trying to spawn a local bridge. Use this when the CLI
* subscription lives on a different machine than the gateway.
*/
bridgeUrl: z.string().url().optional().or(z.literal('')),
notes: z.string().optional(),
})
)
.default({}),
/** Per-API-provider configuration keyed by provider name (cerebras, groq, …). */
apiProviders: z
.record(
z.string(),
z.object({
enabled: z.boolean().default(false),
apiKey: z.string().optional(),
baseUrl: z.string().optional(),
notes: z.string().optional(),
})
)
.default({}),
/** Local Ollama configuration. */
ollama: z
.object({
enabled: z.boolean().default(true),
baseUrl: z.string().default('http://localhost:11434'),
})
.default({ enabled: true, baseUrl: 'http://localhost:11434' }),
/**
* Simple Mode for users who only use 1-2 subscriptions.
* Hides advanced tabs (providers, races, share, report, memory) and
* filters wallet/subscriptions to only show enabled providers.
*/
ui: z
.object({
simpleMode: z.boolean().default(true),
hideEmptyProviders: z.boolean().default(true),
showTooltips: z.boolean().default(true),
})
.default({ simpleMode: true, hideEmptyProviders: true, showTooltips: true }),
/** ISO timestamp of last update. */
updatedAt: z.string().optional(),
});
export type Settings = z.infer<typeof SettingsSchema>;
export interface PublicSettings extends Omit<Settings, 'apiProviders'> {
apiProviders: Record<string, { enabled: boolean; hasKey: boolean; baseUrl?: string; notes?: string }>;
}
const SETTINGS_PATH =
process.env['SETTINGS_PATH'] ?? join(process.env['HOME'] ?? '/root', '.llm-gateway', 'settings.json');
const DEFAULT_SUBSCRIPTIONS: Settings['subscriptions'] = {
'claude-code': { enabled: true, autoSpawn: true },
'github-copilot': { enabled: true, autoSpawn: true },
'chatgpt': { enabled: true, autoSpawn: true },
'gemini': { enabled: true, autoSpawn: true },
'codex': { enabled: true, autoSpawn: true },
'aider': { enabled: true, autoSpawn: true },
};
function getDefaults(): Settings {
return SettingsSchema.parse({
routingMode: 'auto',
subscriptions: DEFAULT_SUBSCRIPTIONS,
ollama: { enabled: true, baseUrl: process.env['OLLAMA_BASE_URL'] ?? 'http://localhost:11434' },
});
}
/**
* Load settings from disk. Returns defaults when the file does not yet exist
* or fails to parse.
*/
export function loadSettings(): Settings {
try {
if (!existsSync(SETTINGS_PATH)) {
return getDefaults();
}
const raw = readFileSync(SETTINGS_PATH, 'utf-8');
const parsed = SettingsSchema.parse(JSON.parse(raw));
return parsed;
} catch (err) {
logger.warn({ err, path: SETTINGS_PATH }, 'Failed to load settings — using defaults');
return getDefaults();
}
}
/**
* Persist settings to disk, merging with any existing values to avoid wiping
* fields the caller didn't include in the patch.
*/
export function saveSettings(patch: Partial<Settings>): Settings {
const current = loadSettings();
const merged: Settings = SettingsSchema.parse({
...current,
...patch,
subscriptions: { ...current.subscriptions, ...(patch.subscriptions ?? {}) },
apiProviders: { ...current.apiProviders, ...(patch.apiProviders ?? {}) },
ollama: { ...current.ollama, ...(patch.ollama ?? {}) },
ui: { ...current.ui, ...(patch.ui ?? {}) },
updatedAt: new Date().toISOString(),
});
try {
mkdirSync(dirname(SETTINGS_PATH), { recursive: true });
writeFileSync(SETTINGS_PATH, JSON.stringify(merged, null, 2), { mode: 0o600 });
logger.info({ path: SETTINGS_PATH }, 'Settings saved');
} catch (err) {
logger.error({ err, path: SETTINGS_PATH }, 'Failed to persist settings');
throw err;
}
// Mirror to env vars so existing provider lookups pick up changes immediately.
applySettingsToEnv(merged);
return merged;
}
/**
* Strip sensitive data (API keys) before sending to the dashboard.
*/
export function getPublicSettings(): PublicSettings {
const settings = loadSettings();
const apiProviders: PublicSettings['apiProviders'] = {};
for (const [name, cfg] of Object.entries(settings.apiProviders)) {
apiProviders[name] = {
enabled: cfg.enabled,
hasKey: !!cfg.apiKey,
baseUrl: cfg.baseUrl,
notes: cfg.notes,
};
}
return {
routingMode: settings.routingMode,
subscriptions: settings.subscriptions,
apiProviders,
ollama: settings.ollama,
ui: settings.ui,
updatedAt: settings.updatedAt,
};
}
/**
* Apply settings to process.env so that the existing external-providers.ts
* code transparently picks up user-configured API keys without changes.
*/
export function applySettingsToEnv(settings: Settings = loadSettings()): void {
const apiEnvMap: Record<string, string> = {
cerebras: 'CEREBRAS_API_KEY',
groq: 'GROQ_API_KEY',
mistral: 'MISTRAL_API_KEY',
nvidia: 'NVIDIA_API_KEY',
cloudflare: 'CLOUDFLARE_AI_TOKEN',
'openai-codex': 'OPENAI_API_KEY',
};
for (const [name, cfg] of Object.entries(settings.apiProviders)) {
const envKey = apiEnvMap[name];
if (envKey && cfg.enabled && cfg.apiKey) {
process.env[envKey] = cfg.apiKey;
}
}
if (settings.ollama.enabled && settings.ollama.baseUrl) {
process.env['OLLAMA_BASE_URL'] = settings.ollama.baseUrl;
}
// Map subscription IDs to the env var the existing provider lookup uses
const subEnvMap: Record<string, string> = {
'claude-code': 'CLAUDE_BRIDGE_URL',
'github-copilot': 'COPILOT_BRIDGE_URL',
'microsoft-365-copilot': 'M365_COPILOT_BRIDGE_URL',
'chatgpt': 'CHATGPT_BRIDGE_URL',
'gemini': 'GEMINI_BRIDGE_URL',
'codex': 'CODEX_BRIDGE_URL',
'aider': 'AIDER_BRIDGE_URL',
};
for (const [id, cfg] of Object.entries(settings.subscriptions)) {
const envKey = subEnvMap[id];
if (envKey && cfg.enabled && cfg.bridgeUrl) {
process.env[envKey] = cfg.bridgeUrl;
}
}
}
export const SettingsPatchSchema = SettingsSchema.partial().extend({
subscriptions: SettingsSchema.shape.subscriptions.optional(),
apiProviders: SettingsSchema.shape.apiProviders.optional(),
ollama: SettingsSchema.shape.ollama.optional(),
ui: SettingsSchema.shape.ui.optional(),
});

View File

@ -0,0 +1,174 @@
/**
* Public Share Card Generator
*
* Renders a shareable SVG image showing your gateway savings useful for
* social posts, blog headers, README badges. Tokens are rounded; no
* personally identifying information leaks (caller IDs, model names etc.
* are NOT included). Just headline numbers + brand.
*
* Output is always a valid SVG so it can be embedded as `<img src="...">`
* or downloaded directly.
*/
import type { Pool } from 'pg';
import { getComprehensiveSavings } from './savings-calculator.js';
import { getBuddyState } from './gamification.js';
function fmtNum(n: number): string {
if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
if (n >= 1_000) return (n / 1_000).toFixed(1) + 'K';
return Math.round(n).toString();
}
function fmtCost(c: number): string {
if (c < 0.01) return `$${c.toFixed(6)}`;
if (c < 1) return `$${c.toFixed(4)}`;
return `$${c.toFixed(2)}`;
}
function escSvg(s: string): string {
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
}
export type ShareCardPeriod = 'day' | 'week' | 'month' | 'all';
export type ShareCardTheme = 'dark' | 'light';
const PERIOD_HOURS: Record<ShareCardPeriod, number> = {
day: 24, week: 168, month: 720, all: 24 * 365 * 5,
};
export async function generateShareCard(
db: Pool,
opts: { period?: ShareCardPeriod; theme?: ShareCardTheme } = {}
): Promise<string> {
const period: ShareCardPeriod = opts.period ?? 'month';
const theme: ShareCardTheme = opts.theme ?? 'dark';
const hours = PERIOD_HOURS[period];
const [savings, buddy] = await Promise.all([
getComprehensiveSavings(db, hours),
getBuddyState(db, 'gateway'),
]);
// Theme palette
const palette = theme === 'dark' ? {
bg: '#0a0a0a', surface: '#161616', text: '#e8e8e8', dim: '#888888',
accent: '#d4ff00', accentDim: '#8aa800', border: '#2a2a2a',
} : {
bg: '#f4f7fa', surface: '#ffffff', text: '#24313d', dim: '#667684',
accent: '#0f766e', accentDim: '#8ab9b5', border: '#d6e0e7',
};
const periodLabel = period === 'day' ? 'Last 24 hours'
: period === 'week' ? 'Last 7 days'
: period === 'month' ? 'Last 30 days'
: 'All-time';
const W = 1200, H = 630; // Open Graph standard
const totalTokens = savings.totalTokensSaved;
const totalCost = savings.totalCostSaved;
const reqCount = savings.totals.requests;
const efficacy = savings.costWithoutGateway > 0
? ((savings.costWithoutGateway - savings.costWithGateway) / savings.costWithoutGateway) * 100
: 0;
// Source-bar widths
const total = Math.max(0.0000001, savings.totalCostSaved);
const wCache = (savings.bySource.cache.cost / total) * 100;
const wComp = (savings.bySource.compression.cost / total) * 100;
const wSub = (savings.bySource.subscriptionBridge.cost / total) * 100;
const wLocal = (savings.bySource.localRouting.cost / total) * 100;
const wRace = (savings.bySource.raceMode.cost / total) * 100;
return `<svg xmlns="http://www.w3.org/2000/svg" width="${W}" height="${H}" viewBox="0 0 ${W} ${H}">
<defs>
<linearGradient id="bgGrad" x1="0" y1="0" x2="1" y2="1">
<stop offset="0%" stop-color="${palette.bg}"/>
<stop offset="100%" stop-color="${palette.surface}"/>
</linearGradient>
<radialGradient id="glow" cx="20%" cy="0%" r="80%">
<stop offset="0%" stop-color="${palette.accent}" stop-opacity="0.20"/>
<stop offset="60%" stop-color="${palette.accent}" stop-opacity="0.04"/>
<stop offset="100%" stop-color="${palette.bg}" stop-opacity="0"/>
</radialGradient>
<style>
.mono { font-family: 'JetBrains Mono', 'SF Mono', monospace; }
.sans { font-family: 'Inter', -apple-system, sans-serif; }
.num { font-weight: 700; letter-spacing: -0.02em; }
.label { letter-spacing: 0.16em; text-transform: uppercase; }
</style>
</defs>
<!-- background -->
<rect width="${W}" height="${H}" fill="url(#bgGrad)"/>
<rect width="${W}" height="${H}" fill="url(#glow)"/>
<rect width="${W}" height="${H}" fill="none" stroke="${palette.border}" stroke-width="2"/>
<!-- brand mark -->
<g transform="translate(48 48)">
<rect x="0" y="0" width="14" height="14" fill="${palette.accent}"/>
<text x="24" y="12" class="mono" font-size="20" font-weight="700" fill="${palette.text}">llm.gateway</text>
<text x="180" y="12" class="mono" font-size="13" fill="${palette.dim}"> ${escSvg(periodLabel)}</text>
</g>
<!-- top-right: brand tag / version -->
<g transform="translate(${W - 48} 48)">
<text x="0" y="12" text-anchor="end" class="mono" font-size="11" fill="${palette.dim}" letter-spacing="0.1em">CONTEXT-X.ORG</text>
</g>
<!-- HUGE counter — eyebrow above, big number well below to avoid overlap -->
<g transform="translate(48 ${H/2 - 110})">
<text x="0" y="0" class="mono label" font-size="14" fill="${palette.dim}">tokens prevented · ${escSvg(periodLabel.toLowerCase())}</text>
<text x="0" y="135" class="mono num" font-size="120" fill="${palette.accent}">${fmtNum(totalTokens)}</text>
<text x="0" y="180" class="mono" font-size="18" fill="${palette.text}">
<tspan>${fmtCost(totalCost)} saved</tspan>
<tspan dx="20" fill="${palette.dim}">·</tspan>
<tspan dx="14">${fmtNum(reqCount)} calls</tspan>
<tspan dx="20" fill="${palette.dim}">·</tspan>
<tspan dx="14">${efficacy.toFixed(1)}% efficiency</tspan>
</text>
</g>
<!-- 5-axis breakdown bar -->
<g transform="translate(48 ${H - 180})">
<text x="0" y="0" class="mono label" font-size="12" fill="${palette.dim}">savings sources · 5-axis breakdown</text>
<rect x="0" y="14" width="${W - 96}" height="22" fill="${palette.surface}" stroke="${palette.border}"/>
${(() => {
let x = 0;
const segs: string[] = [];
const w = W - 96;
const pieces = [
{ p: wCache, c: '#d4ff00', label: '⚡' },
{ p: wComp, c: '#2dd4bf', label: '🗜' },
{ p: wSub, c: '#60a5fa', label: '🌉' },
{ p: wLocal, c: '#a78bfa', label: '🏠' },
{ p: wRace, c: '#f97316', label: '🏁' },
];
for (const piece of pieces) {
const segW = (piece.p / 100) * w;
if (segW > 0.5) {
segs.push(`<rect x="${x}" y="14" width="${segW}" height="22" fill="${piece.c}"/>`);
}
x += segW;
}
return segs.join('');
})()}
<g transform="translate(0 60)" class="mono" font-size="11" fill="${palette.dim}">
<text x="0" y="0"><tspan fill="#d4ff00"></tspan> cache</text>
<text x="120" y="0"><tspan fill="#2dd4bf"></tspan> compression</text>
<text x="270" y="0"><tspan fill="#60a5fa"></tspan> subscription bridges</text>
<text x="470" y="0"><tspan fill="#a78bfa"></tspan> local routing</text>
<text x="600" y="0"><tspan fill="#f97316"></tspan> race mode</text>
</g>
</g>
<!-- footer / buddy -->
<g transform="translate(48 ${H - 70})">
<text x="0" y="0" class="mono" font-size="11" fill="${palette.dim}">
<tspan fill="${palette.accent}">${escSvg(buddy.species)}</tspan>
<tspan dx="6">·</tspan>
<tspan dx="6">Lv.${buddy.level}</tspan>
<tspan dx="6">·</tspan>
<tspan dx="6">${buddy.streakDays}d streak</tspan>
<tspan dx="20" fill="${palette.dim}"> routing AI traffic since ${escSvg(new Date().toISOString().split('T')[0])}</tspan>
</text>
</g>
</svg>`;
}

View File

@ -0,0 +1,303 @@
/**
* Subscription Discovery
*
* Auto-detects locally installed CLI subscriptions (Claude Code, GitHub Copilot,
* ChatGPT, Gemini, etc.) and reports their authentication status. The discovery
* results drive automatic bridge spawning and dynamic provider registration.
*/
import { execFile } from 'child_process';
import { promisify } from 'util';
import { existsSync } from 'fs';
import { logger } from '../observability/logger.js';
const execFileAsync = promisify(execFile);
export type SubscriptionId =
| 'claude-code'
| 'github-copilot'
| 'microsoft-365-copilot'
| 'chatgpt'
| 'gemini'
| 'codex'
| 'aider';
export interface SubscriptionDescriptor {
id: SubscriptionId;
/** Friendly display name */
label: string;
/** CLI binary required to use the subscription */
command: string;
/** Args used for the version probe */
versionArgs: readonly string[];
/** Args used for the auth probe (optional) */
authProbeArgs?: readonly string[];
/** Default port the bridge listens on */
bridgePort: number;
/** ENV var the gateway uses to find the bridge URL */
bridgeEnvKey: string;
/** Logical provider name in `external-providers.ts` */
providerName: string;
/** Models exposed via this subscription */
models: ReadonlyArray<{ id: string; tier: 'fast' | 'medium' | 'large' | 'reasoning' }>;
/** Bridge implementation path (relative to repo root or absolute) */
bridgeImplementation: 'inline-claude' | 'inline-openai' | 'inline-copilot' | 'external-codex';
}
export interface SubscriptionStatus {
descriptor: SubscriptionDescriptor;
installed: boolean;
authenticated: boolean | 'unknown';
version?: string;
error?: string;
bridgeUrl?: string;
bridgeRunning: boolean;
}
/**
* Catalog of subscriptions the gateway knows how to bootstrap.
* Adding a new entry here is enough to make it discoverable.
*/
export const SUBSCRIPTION_CATALOG: readonly SubscriptionDescriptor[] = [
{
id: 'claude-code',
label: 'Claude Code (Anthropic Subscription)',
command: 'claude',
versionArgs: ['--version'],
bridgePort: 3250,
bridgeEnvKey: 'CLAUDE_BRIDGE_URL',
providerName: 'claude-bridge',
bridgeImplementation: 'inline-claude',
models: [
{ id: 'claude-opus-4-1', tier: 'reasoning' },
{ id: 'claude-sonnet-4-1', tier: 'large' },
{ id: 'claude-haiku-3', tier: 'fast' },
],
},
{
id: 'github-copilot',
label: 'GitHub Copilot Subscription',
command: 'gh',
versionArgs: ['copilot', '--version'],
bridgePort: 3252,
bridgeEnvKey: 'COPILOT_BRIDGE_URL',
providerName: 'copilot-bridge',
bridgeImplementation: 'inline-copilot',
models: [
{ id: 'gpt-4', tier: 'reasoning' },
{ id: 'gpt-3.5-turbo', tier: 'medium' },
],
},
{
id: 'microsoft-365-copilot',
label: 'Microsoft 365 Copilot Subscription',
command: 'node',
versionArgs: ['--version'],
bridgePort: 3257,
bridgeEnvKey: 'M365_COPILOT_BRIDGE_URL',
providerName: 'm365-copilot-bridge',
bridgeImplementation: 'inline-openai',
models: [
{ id: 'microsoft-365-copilot', tier: 'reasoning' },
{ id: 'm365-copilot-chat', tier: 'large' },
],
},
{
id: 'chatgpt',
label: 'OpenAI ChatGPT Plus Subscription',
command: 'chatgpt',
versionArgs: ['--version'],
bridgePort: 3251,
bridgeEnvKey: 'CHATGPT_BRIDGE_URL',
providerName: 'chatgpt-bridge',
bridgeImplementation: 'inline-openai',
models: [
{ id: 'gpt-4-turbo', tier: 'reasoning' },
{ id: 'gpt-4', tier: 'large' },
{ id: 'gpt-3.5-turbo', tier: 'medium' },
],
},
{
id: 'gemini',
label: 'Google Gemini Advanced Subscription',
command: 'gemini',
versionArgs: ['--version'],
bridgePort: 3254,
bridgeEnvKey: 'GEMINI_BRIDGE_URL',
providerName: 'gemini-bridge',
bridgeImplementation: 'inline-openai',
models: [
{ id: 'gemini-1.5-pro', tier: 'reasoning' },
{ id: 'gemini-1.5-flash', tier: 'fast' },
],
},
{
id: 'codex',
label: 'OpenAI Codex CLI Subscription',
command: 'codex',
versionArgs: ['--version'],
authProbeArgs: ['login', 'status'],
bridgePort: 3253,
bridgeEnvKey: 'CODEX_BRIDGE_URL',
providerName: 'codex-bridge',
bridgeImplementation: 'external-codex',
models: [
{ id: 'gpt-5.1-codex', tier: 'reasoning' },
{ id: 'gpt-5.1-codex-mini', tier: 'large' },
{ id: 'codex-mini-latest', tier: 'medium' },
],
},
{
id: 'aider',
label: 'Aider AI Pair Programmer',
command: 'aider',
versionArgs: ['--version'],
bridgePort: 3256,
bridgeEnvKey: 'AIDER_BRIDGE_URL',
providerName: 'aider-bridge',
bridgeImplementation: 'inline-openai',
models: [
{ id: 'aider-default', tier: 'large' },
],
},
];
/**
* Probe a CLI's --version with a 3s timeout. Returns null when not installed.
*/
async function probeVersion(command: string, args: readonly string[]): Promise<string | null> {
try {
const { stdout, stderr } = await execFileAsync(command, args as string[], {
timeout: 3000,
maxBuffer: 64 * 1024,
});
const out = (stdout || stderr || '').trim().split('\n')[0];
return out || 'installed';
} catch (err: unknown) {
const code = (err as NodeJS.ErrnoException).code;
if (code === 'ENOENT') return null;
// Non-zero exit code but command exists (e.g. auth required) — count as installed
return 'installed';
}
}
/**
* Best-effort authentication check. Many CLI tools don't have a clean probe,
* so we return 'unknown' rather than guessing wrong.
*/
async function probeAuthenticated(desc: SubscriptionDescriptor): Promise<boolean | 'unknown'> {
// Claude Code stores credentials in ~/.claude/.credentials.json
if (desc.id === 'claude-code') {
const home = process.env.HOME || '/root';
return existsSync(`${home}/.claude/.credentials.json`);
}
// GitHub Copilot uses gh auth status
if (desc.id === 'github-copilot') {
try {
await execFileAsync('gh', ['auth', 'status'], { timeout: 3000 });
return true;
} catch {
return false;
}
}
if (desc.id === 'microsoft-365-copilot') {
return Boolean(
process.env['MICROSOFT_GRAPH_ACCESS_TOKEN'] ||
process.env['M365_COPILOT_ACCESS_TOKEN'] ||
process.env['MICROSOFT_CLIENT_ID']
);
}
if (desc.id === 'codex') {
try {
await execFileAsync('codex', ['login', 'status'], { timeout: 3000 });
return true;
} catch {
return false;
}
}
return 'unknown';
}
/**
* Check whether a bridge URL is reachable.
*/
async function probeBridge(url: string | undefined): Promise<boolean> {
if (!url) return false;
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 1500);
try {
await fetch(`${url.replace(/\/$/, '')}/health`, { signal: controller.signal });
return true;
} finally {
clearTimeout(timeoutId);
}
} catch {
return false;
}
}
/**
* Resolve the bridge URL for a subscription:
* 1. Explicit env var (CLAUDE_BRIDGE_URL etc.) set by Settings or PM2 ecosystem
* 2. Auto-detect: probe http://127.0.0.1:{bridgePort} for a /health endpoint
*
* This means a bridge running locally on its default port is picked up
* automatically without any configuration.
*/
async function resolveBridgeUrl(desc: SubscriptionDescriptor): Promise<{ url?: string; running: boolean }> {
const explicit = process.env[desc.bridgeEnvKey];
if (explicit) {
const running = await probeBridge(explicit);
return { url: explicit, running };
}
// Auto-detect on the default port
const localUrl = `http://127.0.0.1:${desc.bridgePort}`;
const running = await probeBridge(localUrl);
return running ? { url: localUrl, running: true } : { running: false };
}
/**
* Discover all subscriptions the gateway knows about. Probes the CLI binary,
* authentication state, and any pre-configured bridge URL in the environment.
*/
export async function discoverSubscriptions(): Promise<SubscriptionStatus[]> {
const results = await Promise.all(
SUBSCRIPTION_CATALOG.map(async (desc): Promise<SubscriptionStatus> => {
// Always probe the bridge first — a running bridge is enough to count
// as "available" even if the CLI isn't installed on this host (the
// bridge could live on the user's machine).
const bridge = await resolveBridgeUrl(desc);
const version = await probeVersion(desc.command, desc.versionArgs);
if (!version) {
return {
descriptor: desc,
installed: bridge.running, // remote bridge counts as installed
authenticated: bridge.running ? 'unknown' : false,
bridgeUrl: bridge.url,
bridgeRunning: bridge.running,
};
}
const authenticated = await probeAuthenticated(desc);
return {
descriptor: desc,
installed: true,
authenticated,
version,
bridgeUrl: bridge.url,
bridgeRunning: bridge.running,
};
})
);
logger.info(
{
detected: results.filter((r) => r.installed).length,
bridgesLive: results.filter((r) => r.bridgeRunning).length,
total: results.length,
},
'Subscription discovery completed'
);
return results;
}

View File

@ -0,0 +1,271 @@
/**
* Subscription Pool Wallet
*
* Tracks usage of each CLI subscription against its known quota window
* (Claude Plus = 80 msg / 3h, ChatGPT Plus = 80 msg / 3h, Copilot = ).
* Used by the dashboard to show which subscription has the most headroom
* and (future) by the router to load-balance across subscriptions.
*
* This is the feature competitors don't have: combining MULTIPLE personal
* AI subscriptions into a single managed pool.
*/
import type { Pool } from 'pg';
import { logger } from '../observability/logger.js';
export interface QuotaProfile {
subscriptionId: string;
label: string;
/** Hard request quota inside the window. Null = unknown / unlimited. */
requestQuota: number | null;
/** Window length in seconds (Anthropic uses 3h = 10800s, OpenAI varies). */
windowSeconds: number;
/** Reset behaviour: 'rolling' = sliding window, 'fixed' = clock-aligned reset. */
reset: 'rolling' | 'fixed';
}
/**
* Known subscription quota profiles. Numbers are conservative defaults
* users can override via Settings if their plan differs.
*/
export const QUOTA_PROFILES: Record<string, QuotaProfile> = {
'claude-code': { subscriptionId: 'claude-code', label: 'Claude Code (Pro)', requestQuota: 45, windowSeconds: 5 * 3600, reset: 'rolling' },
'github-copilot': { subscriptionId: 'github-copilot', label: 'GitHub Copilot', requestQuota: null, windowSeconds: 30 * 86400, reset: 'fixed' },
'microsoft-365-copilot': { subscriptionId: 'microsoft-365-copilot', label: 'M365 Copilot', requestQuota: null, windowSeconds: 30 * 86400, reset: 'fixed' },
'chatgpt': { subscriptionId: 'chatgpt', label: 'ChatGPT Plus', requestQuota: 80, windowSeconds: 3 * 3600, reset: 'rolling' },
'gemini': { subscriptionId: 'gemini', label: 'Gemini Advanced', requestQuota: null, windowSeconds: 30 * 86400, reset: 'fixed' },
'codex': { subscriptionId: 'codex', label: 'OpenAI Codex', requestQuota: 150, windowSeconds: 5 * 3600, reset: 'rolling' },
'aider': { subscriptionId: 'aider', label: 'Aider', requestQuota: null, windowSeconds: 86400, reset: 'fixed' },
};
/** Record a request against a subscription quota window. */
export async function recordSubscriptionUsage(
db: Pool,
subscriptionId: string,
tokensConsumed: number = 0
): Promise<void> {
const profile = QUOTA_PROFILES[subscriptionId];
if (!profile) return;
// Compute the window-start timestamp this request belongs to.
const now = new Date();
let windowStart: Date;
if (profile.reset === 'rolling') {
// Floor to the most recent quarter-hour for grouping; rolling logic
// applied at read-time by summing the last `windowSeconds`.
const rounded = Math.floor(now.getTime() / 900_000) * 900_000;
windowStart = new Date(rounded);
} else {
// Fixed reset — bucket into day windows
const day = new Date(now);
day.setUTCHours(0, 0, 0, 0);
windowStart = day;
}
try {
await db.query(
`
INSERT INTO subscription_quota_window
(subscription_id, window_start, window_seconds, request_count, tokens_consumed, quota_limit, reset_at)
VALUES ($1, $2, $3, 1, $4, $5, $6)
ON CONFLICT (subscription_id, window_start)
DO UPDATE SET
request_count = subscription_quota_window.request_count + 1,
tokens_consumed = subscription_quota_window.tokens_consumed + EXCLUDED.tokens_consumed
`,
[
subscriptionId,
windowStart,
profile.windowSeconds,
tokensConsumed,
profile.requestQuota,
new Date(windowStart.getTime() + profile.windowSeconds * 1000),
]
);
} catch (err) {
logger.warn({ err, subscriptionId }, 'subscription-wallet: usage record failed');
}
}
export interface WalletEntry {
subscriptionId: string;
label: string;
requestQuota: number | null;
used: number;
remaining: number | null;
utilizationPercent: number | null;
windowSeconds: number;
resetAt: string | null;
/** Predicted exhaustion timestamp based on current rate; null if no quota or no usage. */
predictedExhaustionAt: string | null;
recommendation: 'use-this' | 'available' | 'near-limit' | 'exhausted' | 'unknown';
}
/** Build the wallet snapshot for the dashboard. */
export async function getSubscriptionWallet(db: Pool): Promise<WalletEntry[]> {
const entries: WalletEntry[] = [];
for (const profile of Object.values(QUOTA_PROFILES)) {
let used = 0;
let resetAt: string | null = null;
let predictedExhaustionAt: string | null = null;
try {
const result = await db.query(
`
SELECT
COALESCE(SUM(request_count), 0)::INT AS used,
MAX(reset_at) AS reset_at
FROM subscription_quota_window
WHERE subscription_id = $1
AND window_start > NOW() - MAKE_INTERVAL(secs => $2)
`,
[profile.subscriptionId, profile.windowSeconds]
);
used = parseInt(result.rows[0]?.used ?? '0', 10);
resetAt = result.rows[0]?.reset_at ? new Date(result.rows[0].reset_at).toISOString() : null;
} catch (err) {
logger.warn({ err, sub: profile.subscriptionId }, 'wallet: read failed');
}
const remaining = profile.requestQuota !== null ? Math.max(profile.requestQuota - used, 0) : null;
const utilizationPercent = profile.requestQuota
? Math.min(100, (used / profile.requestQuota) * 100)
: null;
// Linear extrapolation for predicted exhaustion.
if (remaining !== null && used > 0 && profile.requestQuota) {
const ratePerSecond = used / profile.windowSeconds;
if (ratePerSecond > 0) {
const secondsRemaining = remaining / ratePerSecond;
predictedExhaustionAt = new Date(Date.now() + secondsRemaining * 1000).toISOString();
}
}
let recommendation: WalletEntry['recommendation'] = 'unknown';
if (utilizationPercent !== null) {
if (utilizationPercent >= 100) recommendation = 'exhausted';
else if (utilizationPercent >= 80) recommendation = 'near-limit';
else if (utilizationPercent <= 30) recommendation = 'use-this';
else recommendation = 'available';
}
entries.push({
subscriptionId: profile.subscriptionId,
label: profile.label,
requestQuota: profile.requestQuota,
used,
remaining,
utilizationPercent: utilizationPercent !== null ? Math.round(utilizationPercent * 10) / 10 : null,
windowSeconds: profile.windowSeconds,
resetAt,
predictedExhaustionAt,
recommendation,
});
}
return entries;
}
/**
* Map an Ollama / external model id to the subscription it belongs to,
* if any. Returns null for non-subscription models (free APIs, local Ollama).
*/
export function modelToSubscriptionId(model: string): string | null {
const m = model.toLowerCase();
if (m.startsWith('claude-') || m.includes('claude')) return 'claude-code';
if (m.startsWith('gpt-5.1-codex') || m === 'codex-mini-latest') return 'codex';
if (m.startsWith('gpt-')) return 'chatgpt';
if (m.startsWith('gemini-')) return 'gemini';
if (m.startsWith('github-copilot') || m === 'copilot-chat') return 'github-copilot';
if (m === 'microsoft-365-copilot' || m === 'm365-copilot-chat') return 'microsoft-365-copilot';
return null;
}
/**
* Post-process a routing decision against the subscription wallet.
*
* If the picked model belongs to a subscription that is `exhausted` or
* `near-limit` (>=80% utilization), we look at the same-tier siblings in
* the fallback chain and re-pick the one with the most headroom.
*
* This is the Pool-Routing feature: distribute load across YOUR subscriptions
* to maximize their value rather than always routing to the primary.
*/
export async function applyPoolRouting(
db: Pool,
decision: { model: string; fallback_chain: string[]; tier: string },
options: { forced?: boolean } = {}
): Promise<{ model: string; fallback_chain: string[]; reason: string } | null> {
const wallet = await getSubscriptionWallet(db);
const utilByModel = (model: string): number | null => {
const sub = modelToSubscriptionId(model);
if (!sub) return null;
const w = wallet.find((entry) => entry.subscriptionId === sub);
return w?.utilizationPercent ?? null;
};
const isExhausted = (model: string): boolean => {
const sub = modelToSubscriptionId(model);
if (!sub) return false;
const w = wallet.find((entry) => entry.subscriptionId === sub);
return w?.recommendation === 'exhausted';
};
const primaryUtil = utilByModel(decision.model);
const primarySub = modelToSubscriptionId(decision.model);
// No re-routing for non-subscription models or when primary has plenty of headroom
if (!primarySub) return null;
if (!options.forced && primaryUtil !== null && primaryUtil < 80 && !isExhausted(decision.model)) return null;
// Find a sibling in the fallback chain with lower utilization
const candidates = decision.fallback_chain.filter((m) => m !== decision.model);
let bestModel = decision.model;
let bestUtil = primaryUtil ?? 100;
for (const candidate of candidates) {
if (isExhausted(candidate)) continue;
const util = utilByModel(candidate);
if (util === null) continue; // unknown utilization — don't pick blindly over a known one
if (util < bestUtil) {
bestUtil = util;
bestModel = candidate;
}
}
if (bestModel === decision.model) return null;
// Move chosen model to front of chain
const newChain = [bestModel, ...decision.fallback_chain.filter((m) => m !== bestModel)];
return {
model: bestModel,
fallback_chain: newChain,
reason: `pool-route: primary ${decision.model} at ${primaryUtil?.toFixed(0) ?? '?'}% util, switched to ${bestModel} at ${bestUtil.toFixed(0)}%`,
};
}
/** Pick the subscription with the most headroom for a given tier. */
export async function pickBestSubscription(
db: Pool,
candidates: readonly string[]
): Promise<{ subscriptionId: string; reason: string } | null> {
const wallet = await getSubscriptionWallet(db);
const eligible = wallet.filter(
(w) => candidates.includes(w.subscriptionId) && w.recommendation !== 'exhausted'
);
if (eligible.length === 0) return null;
// Sort: lowest utilization first (most headroom). Unknown utilisation
// sorts to the middle so paid quotas with usage data win over unknowns.
eligible.sort((a, b) => {
const ua = a.utilizationPercent ?? 50;
const ub = b.utilizationPercent ?? 50;
return ua - ub;
});
const winner = eligible[0];
return {
subscriptionId: winner.subscriptionId,
reason: winner.utilizationPercent !== null
? `${winner.utilizationPercent.toFixed(0)}% used in window`
: 'no quota tracking',
};
}

View File

@ -86,6 +86,17 @@ const PROVIDERS: readonly ExternalProvider[] = [
{ id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 4096 }, { id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 4096 },
], ],
}, },
{
name: 'm365-copilot-bridge',
baseUrl: '', // constructed from M365_COPILOT_BRIDGE_URL env var
envKey: 'M365_COPILOT_BRIDGE_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'microsoft-365-copilot', tier: 'reasoning', contextLength: 128000 },
{ id: 'm365-copilot-chat', tier: 'large', contextLength: 128000 },
],
},
{ {
name: 'cerebras', name: 'cerebras',
baseUrl: 'https://api.cerebras.ai/v1', baseUrl: 'https://api.cerebras.ai/v1',
@ -146,12 +157,13 @@ const PROVIDERS: readonly ExternalProvider[] = [
{ {
name: 'openai-codex', name: 'openai-codex',
baseUrl: 'https://api.openai.com/v1', baseUrl: 'https://api.openai.com/v1',
envKey: 'OPENAI_API_KEY', envKey: 'OPENAI_CODEX_URL',
rateLimitRpm: 60, rateLimitRpm: 60,
enabled: true, enabled: true,
models: [ models: [
{ id: 'gpt-4-turbo', tier: 'reasoning', contextLength: 128000 }, { id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
{ id: 'gpt-3.5-turbo', tier: 'fast', contextLength: 16384 }, { id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
], ],
}, },
{ {
@ -169,16 +181,28 @@ const PROVIDERS: readonly ExternalProvider[] = [
{ {
name: 'codex', name: 'codex',
baseUrl: 'https://api.github.com/copilot_inner/v2', baseUrl: 'https://api.github.com/copilot_inner/v2',
envKey: 'GITHUB_CODEX_TOKEN', envKey: 'CODEX_BRIDGE_URL',
rateLimitRpm: 60, rateLimitRpm: 60,
enabled: true, enabled: true,
models: [ models: [
{ id: 'github-copilot-x', tier: 'large', contextLength: 8192 }, { id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
{ id: 'code-davinci-002', tier: 'medium', contextLength: 4096 }, { id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
], ],
}, },
]; ];
const AUTHLESS_BRIDGE_PROVIDERS = new Set([
'claude-bridge',
'claude-code',
'openai-bridge',
'chatgpt-bridge',
'copilot-bridge',
'm365-copilot-bridge',
]);
const GENERATE_BRIDGE_PROVIDERS = new Set(['claude-bridge', 'claude-code']);
// ─── Rate Limiter (simple sliding window) ─────────────────────────── // ─── Rate Limiter (simple sliding window) ───────────────────────────
const requestTimestamps: Map<string, number[]> = new Map(); const requestTimestamps: Map<string, number[]> = new Map();
@ -213,25 +237,34 @@ function getApiKey(provider: ExternalProvider): string | undefined {
return url ? 'claude-code-enabled' : undefined; return url ? 'claude-code-enabled' : undefined;
} }
if (provider.name === 'openai-bridge') { if (provider.name === 'openai-bridge') {
// openai-bridge uses OPENAI_API_KEY for auth, but also needs bridge URL // Subscription bridge auth is handled by the bridge process/CLI session.
const apiKey = process.env['OPENAI_API_KEY'];
const url = process.env['OPENAI_BRIDGE_URL']; const url = process.env['OPENAI_BRIDGE_URL'];
return apiKey && url ? apiKey : undefined; return url ? 'openai-bridge-enabled' : undefined;
} }
if (provider.name === 'chatgpt-bridge') { if (provider.name === 'chatgpt-bridge') {
// chatgpt-bridge can use same URL as openai-bridge (same service), but needs API key // ChatGPT Plus bridge can reuse the OpenAI bridge when configured that way.
const apiKey = process.env['OPENAI_API_KEY'];
const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL']; const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'];
return apiKey && url ? apiKey : undefined; return url ? 'chatgpt-bridge-enabled' : undefined;
} }
if (provider.name === 'copilot-bridge') { if (provider.name === 'copilot-bridge') {
// copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api) // copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api).
// Just needs URL to be configured
const url = process.env['COPILOT_BRIDGE_URL']; const url = process.env['COPILOT_BRIDGE_URL'];
return url ? 'copilot-authenticated' : undefined; return url ? 'copilot-authenticated' : undefined;
} }
if (provider.name === 'm365-copilot-bridge') {
// Microsoft 365 Copilot uses Microsoft Graph delegated auth inside the bridge.
const url = process.env['M365_COPILOT_BRIDGE_URL'];
return url ? 'm365-copilot-bridge-enabled' : undefined;
}
if (provider.name === 'openai-codex') {
const bridgeUrl = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
if (bridgeUrl) return 'openai-codex-bridge-enabled';
return process.env['OPENAI_API_KEY'] || undefined;
}
if (provider.name === 'codex') { if (provider.name === 'codex') {
// codex uses GitHub Codex API token // Codex can run through an authless local/subscription bridge. A token remains supported as fallback.
const bridgeUrl = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
if (bridgeUrl) return 'codex-bridge-enabled';
const token = process.env['GITHUB_CODEX_TOKEN']; const token = process.env['GITHUB_CODEX_TOKEN'];
return token ? token : undefined; return token ? token : undefined;
} }
@ -241,11 +274,11 @@ function getApiKey(provider: ExternalProvider): string | undefined {
function getBaseUrl(provider: ExternalProvider): string { function getBaseUrl(provider: ExternalProvider): string {
if (provider.name === 'claude-bridge') { if (provider.name === 'claude-bridge') {
const url = process.env['CLAUDE_BRIDGE_URL']; const url = process.env['CLAUDE_BRIDGE_URL'];
return url ? `${url}/v1` : ''; return url ?? '';
} }
if (provider.name === 'claude-code') { if (provider.name === 'claude-code') {
const url = process.env['CLAUDE_CODE_URL']; const url = process.env['CLAUDE_CODE_URL'];
return url ? `${url}/v1` : ''; return url ?? '';
} }
if (provider.name === 'openai-bridge') { if (provider.name === 'openai-bridge') {
const url = process.env['OPENAI_BRIDGE_URL']; const url = process.env['OPENAI_BRIDGE_URL'];
@ -257,7 +290,19 @@ function getBaseUrl(provider: ExternalProvider): string {
} }
if (provider.name === 'copilot-bridge') { if (provider.name === 'copilot-bridge') {
const url = process.env['COPILOT_BRIDGE_URL']; const url = process.env['COPILOT_BRIDGE_URL'];
return url ? `${url}` : ''; return url ? `${url}/v1` : '';
}
if (provider.name === 'm365-copilot-bridge') {
const url = process.env['M365_COPILOT_BRIDGE_URL'];
return url ? `${url}/v1` : '';
}
if (provider.name === 'openai-codex') {
const url = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
return url ? `${url}/v1` : provider.baseUrl;
}
if (provider.name === 'codex') {
const url = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
return url ? `${url}/v1` : provider.baseUrl;
} }
if (provider.name === 'cloudflare') { if (provider.name === 'cloudflare') {
const accountId = process.env['CLOUDFLARE_ACCOUNT_ID']; const accountId = process.env['CLOUDFLARE_ACCOUNT_ID'];
@ -271,6 +316,11 @@ export function getAvailableProviders(): readonly ExternalProvider[] {
return PROVIDERS.filter((p) => p.enabled && getApiKey(p)); return PROVIDERS.filter((p) => p.enabled && getApiKey(p));
} }
/** Returns ALL configured providers (enabled or not, with or without API key). For dashboard listing. */
export function getAllProviders(): readonly ExternalProvider[] {
return PROVIDERS;
}
function findBestModel( function findBestModel(
provider: ExternalProvider, provider: ExternalProvider,
targetTier: 'fast' | 'medium' | 'large' | 'reasoning', targetTier: 'fast' | 'medium' | 'large' | 'reasoning',
@ -296,7 +346,11 @@ function findBestModel(
function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> { function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> {
const headers: Record<string, string> = { 'Content-Type': 'application/json' }; const headers: Record<string, string> = { 'Content-Type': 'application/json' };
if (!['claude-bridge', 'claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge'].includes(provider.name)) { const usesAuthlessBridge = AUTHLESS_BRIDGE_PROVIDERS.has(provider.name)
|| (provider.name === 'openai-codex' && !!(process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL']))
|| (provider.name === 'codex' && !!(process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL']));
if (!usesAuthlessBridge) {
headers['Authorization'] = `Bearer ${apiKey}`; headers['Authorization'] = `Bearer ${apiKey}`;
} }
return headers; return headers;
@ -311,13 +365,29 @@ function buildRequestPayload(model: ExternalModel, request: ExternalCompletionRe
}; };
} }
function buildGenerateBridgePayload(model: ExternalModel, request: ExternalCompletionRequest): Record<string, unknown> {
const system = request.messages.find((m) => m.role === 'system')?.content;
const prompt = request.messages
.filter((m) => m.role !== 'system')
.map((m) => `${m.role}: ${m.content}`)
.join('\n\n');
return {
model: model.id,
prompt,
system,
temperature: request.temperature ?? 0.3,
max_tokens: request.max_tokens ?? 2048,
};
}
function parseExternalResponse( function parseExternalResponse(
data: any, data: any,
model: ExternalModel, model: ExternalModel,
provider: ExternalProvider, provider: ExternalProvider,
start: number, start: number,
): ExternalCompletionResponse { ): ExternalCompletionResponse {
const content = data.choices?.[0]?.message?.content ?? ''; const content = data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? data.message?.content ?? '';
recordRequest(provider.name); recordRequest(provider.name);
return { return {
response: content, response: content,
@ -341,14 +411,15 @@ async function callProvider(
const baseUrl = getBaseUrl(provider); const baseUrl = getBaseUrl(provider);
if (!baseUrl) throw new Error(`No base URL for ${provider.name}`); if (!baseUrl) throw new Error(`No base URL for ${provider.name}`);
const url = `${baseUrl}/chat/completions`; const generateBridge = GENERATE_BRIDGE_PROVIDERS.has(provider.name);
const url = generateBridge ? `${baseUrl}/api/generate` : `${baseUrl}/chat/completions`;
const controller = new AbortController(); const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs); const timer = setTimeout(() => controller.abort(), timeoutMs);
const start = Date.now(); const start = Date.now();
try { try {
const headers = buildRequestHeaders(provider, apiKey); const headers = buildRequestHeaders(provider, apiKey);
const payload = buildRequestPayload(model, request); const payload = generateBridge ? buildGenerateBridgePayload(model, request) : buildRequestPayload(model, request);
const response = await fetch(url, { const response = await fetch(url, {
method: 'POST', method: 'POST',

File diff suppressed because it is too large Load Diff

View File

@ -11,6 +11,22 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
logger.info({ publicDir }, 'Static file serving initialized'); logger.info({ publicDir }, 'Static file serving initialized');
function sendHtml(filename: string, reply: any) {
const filePath = join(publicDir, filename);
if (!existsSync(filePath)) {
logger.warn({ path: filePath }, `${filename} not found`);
return reply.status(404).send({ error: `${filename} not found` });
}
const content = readFileSync(filePath, 'utf-8');
return reply
.header('Cache-Control', 'no-cache, no-store, must-revalidate, max-age=0')
.header('Pragma', 'no-cache')
.header('Expires', '0')
.type('text/html')
.send(content);
}
// Serve root path // Serve root path
fastify.get('/', async (request, reply) => { fastify.get('/', async (request, reply) => {
logger.info({ method: request.method, url: request.url, host: request.hostname }, 'Root path requested'); logger.info({ method: request.method, url: request.url, host: request.hostname }, 'Root path requested');
@ -26,13 +42,47 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
// Serve /dashboard.html // Serve /dashboard.html
fastify.get('/dashboard.html', async (_request, reply) => { fastify.get('/dashboard.html', async (_request, reply) => {
const dashboardPath = join(publicDir, 'dashboard.html'); return sendHtml('dashboard.html', reply);
if (!existsSync(dashboardPath)) { });
logger.warn({ path: dashboardPath }, 'dashboard.html not found');
return reply.status(404).send({ error: 'dashboard.html not found' }); fastify.get('/dashboard-v2.html', async (_request, reply) => {
} return sendHtml('dashboard-v2.html', reply);
const content = readFileSync(dashboardPath, 'utf-8'); });
return reply.type('text/html').send(content);
fastify.get('/v2/dashboard', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/v2/dashboard/', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/v2/', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/dashboard/v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/dashboard/v2/', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/api/dashboard-v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/api/v2/dashboard', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/api/dashboard/v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
}); });
// Serve /api/dashboard as HTML for compatibility // Serve /api/dashboard as HTML for compatibility

View File

@ -101,7 +101,7 @@ async function buildServer() {
await server.register(fastifyRateLimit, { await server.register(fastifyRateLimit, {
global: true, global: true,
max: 100, max: 1000,
timeWindow: '1 minute', timeWindow: '1 minute',
keyGenerator: (request) => { keyGenerator: (request) => {
const caller = (request.headers['x-caller-id'] as string) ?? 'default'; const caller = (request.headers['x-caller-id'] as string) ?? 'default';