feat: auto-track all Claude Code sessions via ANTHROPIC_BASE_URL proxy

- Add POST /v1/messages (Anthropic-format passthrough) to core with full
  ticket tracking — every Claude Code request creates a TokenVault ticket
- Add GET /v1/models passthrough so Anthropic SDK model listing works
- Proxy /v1/messages + /v1/models in dashboard so public URL forwards to core
- Add /health endpoint to dashboard for infra monitoring
- Overview tab now shows RTK savings banner (3.5M tokens, 753 commands)
  alongside LLM ticket stats — Overview no longer looks empty
- Provider split bar chart rendered when request data is available

ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org now set globally
in ~/.zshrc — every new Claude Code session is tracked automatically.
This commit is contained in:
Rene Fichtmueller 2026-04-14 21:32:20 +02:00
parent a290216183
commit bdde8cf115
4 changed files with 262 additions and 2 deletions

View File

@ -0,0 +1,159 @@
import type { FastifyInstance } from 'fastify';
import { createTicket, hashContent } from '../tickets/ticket-service.js';
import { logger } from '../observability/logger.js';
import { config } from '../config.js';
// Anthropic pricing per 1M tokens (USD)
const ANTHROPIC_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 },
'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 },
'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 },
'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 },
'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 },
'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 },
};
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
const pricing = ANTHROPIC_PRICING[model] ?? ANTHROPIC_PRICING['claude-sonnet-4-20250514']!;
const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input;
const outputCost = (outputTokens / 1_000_000) * pricing.output;
const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0;
return Math.max(0, inputCost + outputCost + cacheCost);
}
/**
* Anthropic API passthrough enables ANTHROPIC_BASE_URL routing.
*
* Claude Code (and any Anthropic SDK client) sends requests here when
* ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org is set.
* We forward the request to Anthropic using the client's own API key,
* then create a TokenVault ticket to track usage and cost.
*/
export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void> {
// ─── POST /v1/messages — Anthropic Messages API passthrough ──────────────
app.post('/v1/messages', async (req, reply) => {
const body = req.body as Record<string, unknown>;
const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514';
// Use the client's API key if provided, fall back to configured key
const apiKey = (req.headers['x-api-key'] as string)
|| (req.headers['authorization'] as string)?.replace(/^Bearer /, '')
|| config.providers.anthropic.apiKey;
const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01';
const anthropicBeta = req.headers['anthropic-beta'] as string | undefined;
const caller = req.headers['x-tokenvault-caller'] as string | undefined;
const project = req.headers['x-tokenvault-project'] as string | undefined;
const team = req.headers['x-tokenvault-team'] as string | undefined;
const start = Date.now();
// ── Forward to Anthropic ────────────────────────────────────────────────
const forwardHeaders: Record<string, string> = {
'Content-Type': 'application/json',
'x-api-key': apiKey,
'anthropic-version': anthropicVersion,
};
if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta;
const upstream = await fetch('https://api.anthropic.com/v1/messages', {
method: 'POST',
headers: forwardHeaders,
body: JSON.stringify(body),
});
const latency = Date.now() - start;
const responseBody = await upstream.json() as Record<string, unknown>;
// ── Track as TokenVault ticket (best-effort, non-blocking) ─────────────
if (upstream.ok) {
const usage = responseBody['usage'] as {
input_tokens?: number;
output_tokens?: number;
cache_read_input_tokens?: number;
cache_creation_input_tokens?: number;
} | undefined;
if (usage) {
const inputTokens = usage.input_tokens ?? 0;
const outputTokens = usage.output_tokens ?? 0;
const cachedTokens = usage.cache_read_input_tokens ?? 0;
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
const messages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? [];
const inputText = messages.map(m => {
if (typeof m.content === 'string') return m.content;
if (Array.isArray(m.content)) return m.content.map((c: Record<string, unknown>) => c['text'] ?? '').join('');
return '';
}).join('\n');
const outputContent = (responseBody['content'] as Array<{ text?: string }> | undefined) ?? [];
const outputText = outputContent.map(c => c.text ?? '').join('');
createTicket({
provider: 'anthropic',
model,
status: 'completed',
tokens_in: inputTokens,
tokens_out: outputTokens,
tokens_cached: cachedTokens,
tokens_saved: 0,
cost_usd: cost,
latency_ms: latency,
cache_hit: cachedTokens > 0,
caller: caller ?? 'claude-code',
project,
team,
input_hash: hashContent(inputText),
output_hash: hashContent(outputText),
}).catch(err => logger.warn({ err }, 'Failed to create ticket for Anthropic passthrough'));
}
} else {
// Track failed requests too
createTicket({
provider: 'anthropic',
model,
status: 'failed',
tokens_in: 0,
tokens_out: 0,
cost_usd: 0,
latency_ms: latency,
caller: caller ?? 'claude-code',
project,
team,
input_hash: hashContent(model),
output_hash: '',
}).catch(() => { /* best effort */ });
logger.warn({ model, status: upstream.status }, 'Anthropic upstream error');
}
// ── Return Anthropic response as-is ────────────────────────────────────
reply.code(upstream.status);
reply.header('Content-Type', 'application/json');
// Pass through useful Anthropic response headers
const anthropicReqId = upstream.headers.get('request-id');
if (anthropicReqId) reply.header('request-id', anthropicReqId);
return responseBody;
});
// ─── GET /v1/models — Anthropic models list passthrough ─────────────────
app.get('/v1/models', async (req, reply) => {
const apiKey = (req.headers['x-api-key'] as string)
|| config.providers.anthropic.apiKey;
const upstream = await fetch('https://api.anthropic.com/v1/models', {
headers: {
'x-api-key': apiKey,
'anthropic-version': '2023-06-01',
},
});
reply.code(upstream.status);
reply.header('Content-Type', 'application/json');
return upstream.json();
});
}

View File

@ -8,6 +8,7 @@ import { closePool } from './db/client.js';
import { initProviders } from './providers/index.js'; import { initProviders } from './providers/index.js';
import { healthRoutes } from './routes/health.js'; import { healthRoutes } from './routes/health.js';
import { proxyRoutes } from './routes/proxy.js'; import { proxyRoutes } from './routes/proxy.js';
import { anthropicProxyRoutes } from './routes/anthropic-proxy.js';
import { ticketRoutes } from './routes/tickets.js'; import { ticketRoutes } from './routes/tickets.js';
import { rtkRoutes } from './routes/rtk.js'; import { rtkRoutes } from './routes/rtk.js';
@ -20,6 +21,7 @@ await app.register(cors, { origin: true });
// ─── Routes ────────────────────────────────────────────────────────────────── // ─── Routes ──────────────────────────────────────────────────────────────────
await app.register(healthRoutes); await app.register(healthRoutes);
await app.register(proxyRoutes); await app.register(proxyRoutes);
await app.register(anthropicProxyRoutes);
await app.register(ticketRoutes); await app.register(ticketRoutes);
await app.register(rtkRoutes); await app.register(rtkRoutes);

View File

@ -161,9 +161,21 @@ tr:hover { background:#f8fafc; }
<div class="cards" id="stats-cards"> <div class="cards" id="stats-cards">
<div class="loading"><div class="spin"></div> Loading...</div> <div class="loading"><div class="spin"></div> Loading...</div>
</div> </div>
<!-- RTK savings banner — always visible since RTK data exists independently of LLM routing -->
<div id="rtk-overview-banner" style="display:none;margin:16px 0;background:linear-gradient(135deg,#f0fdf4,#dcfce7);border:1px solid #86efac;border-radius:12px;padding:20px 24px">
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px">
<span style="font-size:20px"></span>
<strong style="color:#16a34a;font-size:15px">RTK Token Savings (all-time)</strong>
<span style="margin-left:auto;font-size:12px;color:#15803d" id="rtk-banner-host"></span>
</div>
<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:12px" id="rtk-banner-cards"></div>
</div>
<div class="chart-row"> <div class="chart-row">
<div class="chart-placeholder" id="cost-timeline">Cost timeline will appear here after requests are tracked</div> <div class="chart-placeholder" id="cost-timeline">Cost timeline will appear here after requests are tracked</div>
<div class="chart-placeholder" id="provider-split">Provider split will appear here</div> <div id="provider-split" style="background:var(--surface);border:1px solid var(--border);border-radius:12px;padding:24px;flex:1;min-height:180px">
<div style="font-weight:600;margin-bottom:12px;color:var(--text-muted)">Provider Split</div>
<div id="provider-split-inner" style="color:var(--text-muted);font-size:13px;text-align:center;margin-top:40px">Provider split will appear here after requests are tracked</div>
</div>
</div> </div>
</div> </div>
@ -311,7 +323,10 @@ function fmtTime(d) { return new Date(d).toLocaleString(lang === 'de' ? 'de-DE'
async function loadStats() { async function loadStats() {
try { try {
const data = await (await fetch(API + '/tickets/stats?period=month')).json(); const [data, rtkStats] = await Promise.all([
(await fetch(API + '/tickets/stats?period=month')).json(),
fetch(API + '/rtk/stats?period=all').then(r => r.ok ? r.json() : null).catch(() => null),
]);
const labels = t[lang]; const labels = t[lang];
document.getElementById('stats-cards').innerHTML = ` document.getElementById('stats-cards').innerHTML = `
<div class="card"><div class="label">${labels.totalCost}</div><div class="value primary">$${fmt(data.total_cost_usd)}</div><div class="sub">Last 30 days</div></div> <div class="card"><div class="label">${labels.totalCost}</div><div class="value primary">$${fmt(data.total_cost_usd)}</div><div class="sub">Last 30 days</div></div>
@ -321,6 +336,53 @@ async function loadStats() {
<div class="card"><div class="label">${labels.tokensIn}</div><div class="value">${fmtK(data.total_tokens_in)}</div></div> <div class="card"><div class="label">${labels.tokensIn}</div><div class="value">${fmtK(data.total_tokens_in)}</div></div>
<div class="card"><div class="label">${labels.tokensOut}</div><div class="value">${fmtK(data.total_tokens_out)}</div></div> <div class="card"><div class="label">${labels.tokensOut}</div><div class="value">${fmtK(data.total_tokens_out)}</div></div>
`; `;
// ── RTK savings banner ──────────────────────────────────────────────────
if (rtkStats && rtkStats.total_commands > 0) {
const banner = document.getElementById('rtk-overview-banner');
banner.style.display = 'block';
document.getElementById('rtk-banner-host').textContent =
`${rtkStats.unique_hosts} host${rtkStats.unique_hosts !== 1 ? 's' : ''} · ${rtkStats.unique_projects} project${rtkStats.unique_projects !== 1 ? 's' : ''}`;
document.getElementById('rtk-banner-cards').innerHTML = `
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">TOKENS SAVED</div>
<div style="font-size:22px;font-weight:700;color:#16a34a">${fmtK(rtkStats.total_saved_tokens)}</div>
</div>
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">COMMANDS</div>
<div style="font-size:22px;font-weight:700;color:#6366f1">${fmtK(rtkStats.total_commands)}</div>
</div>
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">AVG SAVINGS</div>
<div style="font-size:22px;font-weight:700;color:#f59e0b">${rtkStats.avg_savings_pct.toFixed(1)}%</div>
</div>
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">INPUT TOKENS</div>
<div style="font-size:22px;font-weight:700">${fmtK(rtkStats.total_input_tokens)}</div>
</div>
`;
}
// ── Provider split (simple bar chart) ───────────────────────────────────
try {
const breakdown = await (await fetch(API + '/cost/breakdown?group_by=provider')).json();
const total = breakdown.reduce((s, b) => s + b.request_count, 0);
if (total > 0) {
const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' };
document.getElementById('provider-split-inner').innerHTML = breakdown.map(b => {
const pct = ((b.request_count / total) * 100).toFixed(0);
const col = colors[b.group_value] || '#94a3b8';
return `<div style="margin-bottom:10px">
<div style="display:flex;justify-content:space-between;font-size:12px;margin-bottom:3px">
<span style="font-weight:500">${b.group_value}</span><span style="color:var(--text-muted)">${b.request_count} req · ${pct}%</span>
</div>
<div style="background:var(--border);border-radius:4px;height:6px">
<div style="background:${col};width:${pct}%;height:6px;border-radius:4px"></div>
</div>
</div>`;
}).join('');
}
} catch {}
} catch { document.getElementById('stats-cards').innerHTML = '<div class="card"><div class="label">Status</div><div class="value">Offline</div><div class="sub">TokenVault core not reachable</div></div>'; } } catch { document.getElementById('stats-cards').innerHTML = '<div class="card"><div class="label">Status</div><div class="value">Offline</div><div class="sub">TokenVault core not reachable</div></div>'; }
} }

View File

@ -63,5 +63,42 @@ app.get('/api/rtk/hosts', async () => {
return res.json(); return res.json();
}); });
// ─── Anthropic API passthrough (ANTHROPIC_BASE_URL routing) ──────────────────
// Claude Code routes through here when ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org
// Forward to core which handles tracking + proxying to Anthropic.
app.post('/v1/messages', async (req, reply) => {
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
for (const key of ['x-api-key', 'anthropic-version', 'anthropic-beta', 'x-tokenvault-caller', 'x-tokenvault-project', 'x-tokenvault-team']) {
const val = req.headers[key];
if (val) headers[key] = val as string;
}
const res = await fetch(`${CORE_URL}/v1/messages`, {
method: 'POST',
headers,
body: JSON.stringify(req.body),
});
reply.code(res.status);
reply.header('Content-Type', 'application/json');
const reqId = res.headers.get('request-id');
if (reqId) reply.header('request-id', reqId);
return res.json();
});
app.get('/v1/models', async (req, reply) => {
const headers: Record<string, string> = {};
const apiKey = req.headers['x-api-key'];
if (apiKey) headers['x-api-key'] = apiKey as string;
const res = await fetch(`${CORE_URL}/v1/models`, { headers });
reply.code(res.status);
reply.header('Content-Type', 'application/json');
return res.json();
});
// ─── Health endpoint (needed for infra monitoring) ────────────────────────────
app.get('/health', async () => {
const res = await fetch(`${CORE_URL}/health`);
return res.json();
});
await app.listen({ port: PORT, host: '0.0.0.0' }); await app.listen({ port: PORT, host: '0.0.0.0' });
console.log(`TokenVault Dashboard running on http://localhost:${PORT}`); console.log(`TokenVault Dashboard running on http://localhost:${PORT}`);