feat: auto-track all Claude Code sessions via ANTHROPIC_BASE_URL proxy
- Add POST /v1/messages (Anthropic-format passthrough) to core with full ticket tracking — every Claude Code request creates a TokenVault ticket - Add GET /v1/models passthrough so Anthropic SDK model listing works - Proxy /v1/messages + /v1/models in dashboard so public URL forwards to core - Add /health endpoint to dashboard for infra monitoring - Overview tab now shows RTK savings banner (3.5M tokens, 753 commands) alongside LLM ticket stats — Overview no longer looks empty - Provider split bar chart rendered when request data is available ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org now set globally in ~/.zshrc — every new Claude Code session is tracked automatically.
This commit is contained in:
parent
a290216183
commit
bdde8cf115
159
packages/core/src/routes/anthropic-proxy.ts
Normal file
159
packages/core/src/routes/anthropic-proxy.ts
Normal file
@ -0,0 +1,159 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { createTicket, hashContent } from '../tickets/ticket-service.js';
|
||||
import { logger } from '../observability/logger.js';
|
||||
import { config } from '../config.js';
|
||||
|
||||
// Anthropic pricing per 1M tokens (USD)
|
||||
const ANTHROPIC_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
|
||||
'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 },
|
||||
'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 },
|
||||
'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 },
|
||||
'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 },
|
||||
'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 },
|
||||
'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 },
|
||||
};
|
||||
|
||||
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
|
||||
const pricing = ANTHROPIC_PRICING[model] ?? ANTHROPIC_PRICING['claude-sonnet-4-20250514']!;
|
||||
const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input;
|
||||
const outputCost = (outputTokens / 1_000_000) * pricing.output;
|
||||
const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0;
|
||||
return Math.max(0, inputCost + outputCost + cacheCost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Anthropic API passthrough — enables ANTHROPIC_BASE_URL routing.
|
||||
*
|
||||
* Claude Code (and any Anthropic SDK client) sends requests here when
|
||||
* ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org is set.
|
||||
* We forward the request to Anthropic using the client's own API key,
|
||||
* then create a TokenVault ticket to track usage and cost.
|
||||
*/
|
||||
export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void> {
|
||||
|
||||
// ─── POST /v1/messages — Anthropic Messages API passthrough ──────────────
|
||||
app.post('/v1/messages', async (req, reply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514';
|
||||
|
||||
// Use the client's API key if provided, fall back to configured key
|
||||
const apiKey = (req.headers['x-api-key'] as string)
|
||||
|| (req.headers['authorization'] as string)?.replace(/^Bearer /, '')
|
||||
|| config.providers.anthropic.apiKey;
|
||||
|
||||
const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01';
|
||||
const anthropicBeta = req.headers['anthropic-beta'] as string | undefined;
|
||||
|
||||
const caller = req.headers['x-tokenvault-caller'] as string | undefined;
|
||||
const project = req.headers['x-tokenvault-project'] as string | undefined;
|
||||
const team = req.headers['x-tokenvault-team'] as string | undefined;
|
||||
|
||||
const start = Date.now();
|
||||
|
||||
// ── Forward to Anthropic ────────────────────────────────────────────────
|
||||
const forwardHeaders: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': apiKey,
|
||||
'anthropic-version': anthropicVersion,
|
||||
};
|
||||
if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta;
|
||||
|
||||
const upstream = await fetch('https://api.anthropic.com/v1/messages', {
|
||||
method: 'POST',
|
||||
headers: forwardHeaders,
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
const latency = Date.now() - start;
|
||||
const responseBody = await upstream.json() as Record<string, unknown>;
|
||||
|
||||
// ── Track as TokenVault ticket (best-effort, non-blocking) ─────────────
|
||||
if (upstream.ok) {
|
||||
const usage = responseBody['usage'] as {
|
||||
input_tokens?: number;
|
||||
output_tokens?: number;
|
||||
cache_read_input_tokens?: number;
|
||||
cache_creation_input_tokens?: number;
|
||||
} | undefined;
|
||||
|
||||
if (usage) {
|
||||
const inputTokens = usage.input_tokens ?? 0;
|
||||
const outputTokens = usage.output_tokens ?? 0;
|
||||
const cachedTokens = usage.cache_read_input_tokens ?? 0;
|
||||
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
|
||||
|
||||
const messages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? [];
|
||||
const inputText = messages.map(m => {
|
||||
if (typeof m.content === 'string') return m.content;
|
||||
if (Array.isArray(m.content)) return m.content.map((c: Record<string, unknown>) => c['text'] ?? '').join('');
|
||||
return '';
|
||||
}).join('\n');
|
||||
const outputContent = (responseBody['content'] as Array<{ text?: string }> | undefined) ?? [];
|
||||
const outputText = outputContent.map(c => c.text ?? '').join('');
|
||||
|
||||
createTicket({
|
||||
provider: 'anthropic',
|
||||
model,
|
||||
status: 'completed',
|
||||
tokens_in: inputTokens,
|
||||
tokens_out: outputTokens,
|
||||
tokens_cached: cachedTokens,
|
||||
tokens_saved: 0,
|
||||
cost_usd: cost,
|
||||
latency_ms: latency,
|
||||
cache_hit: cachedTokens > 0,
|
||||
caller: caller ?? 'claude-code',
|
||||
project,
|
||||
team,
|
||||
input_hash: hashContent(inputText),
|
||||
output_hash: hashContent(outputText),
|
||||
}).catch(err => logger.warn({ err }, 'Failed to create ticket for Anthropic passthrough'));
|
||||
}
|
||||
} else {
|
||||
// Track failed requests too
|
||||
createTicket({
|
||||
provider: 'anthropic',
|
||||
model,
|
||||
status: 'failed',
|
||||
tokens_in: 0,
|
||||
tokens_out: 0,
|
||||
cost_usd: 0,
|
||||
latency_ms: latency,
|
||||
caller: caller ?? 'claude-code',
|
||||
project,
|
||||
team,
|
||||
input_hash: hashContent(model),
|
||||
output_hash: '',
|
||||
}).catch(() => { /* best effort */ });
|
||||
|
||||
logger.warn({ model, status: upstream.status }, 'Anthropic upstream error');
|
||||
}
|
||||
|
||||
// ── Return Anthropic response as-is ────────────────────────────────────
|
||||
reply.code(upstream.status);
|
||||
reply.header('Content-Type', 'application/json');
|
||||
|
||||
// Pass through useful Anthropic response headers
|
||||
const anthropicReqId = upstream.headers.get('request-id');
|
||||
if (anthropicReqId) reply.header('request-id', anthropicReqId);
|
||||
|
||||
return responseBody;
|
||||
});
|
||||
|
||||
// ─── GET /v1/models — Anthropic models list passthrough ─────────────────
|
||||
app.get('/v1/models', async (req, reply) => {
|
||||
const apiKey = (req.headers['x-api-key'] as string)
|
||||
|| config.providers.anthropic.apiKey;
|
||||
|
||||
const upstream = await fetch('https://api.anthropic.com/v1/models', {
|
||||
headers: {
|
||||
'x-api-key': apiKey,
|
||||
'anthropic-version': '2023-06-01',
|
||||
},
|
||||
});
|
||||
|
||||
reply.code(upstream.status);
|
||||
reply.header('Content-Type', 'application/json');
|
||||
return upstream.json();
|
||||
});
|
||||
}
|
||||
@ -8,6 +8,7 @@ import { closePool } from './db/client.js';
|
||||
import { initProviders } from './providers/index.js';
|
||||
import { healthRoutes } from './routes/health.js';
|
||||
import { proxyRoutes } from './routes/proxy.js';
|
||||
import { anthropicProxyRoutes } from './routes/anthropic-proxy.js';
|
||||
import { ticketRoutes } from './routes/tickets.js';
|
||||
import { rtkRoutes } from './routes/rtk.js';
|
||||
|
||||
@ -20,6 +21,7 @@ await app.register(cors, { origin: true });
|
||||
// ─── Routes ──────────────────────────────────────────────────────────────────
|
||||
await app.register(healthRoutes);
|
||||
await app.register(proxyRoutes);
|
||||
await app.register(anthropicProxyRoutes);
|
||||
await app.register(ticketRoutes);
|
||||
await app.register(rtkRoutes);
|
||||
|
||||
|
||||
@ -161,9 +161,21 @@ tr:hover { background:#f8fafc; }
|
||||
<div class="cards" id="stats-cards">
|
||||
<div class="loading"><div class="spin"></div> Loading...</div>
|
||||
</div>
|
||||
<!-- RTK savings banner — always visible since RTK data exists independently of LLM routing -->
|
||||
<div id="rtk-overview-banner" style="display:none;margin:16px 0;background:linear-gradient(135deg,#f0fdf4,#dcfce7);border:1px solid #86efac;border-radius:12px;padding:20px 24px">
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px">
|
||||
<span style="font-size:20px">⚡</span>
|
||||
<strong style="color:#16a34a;font-size:15px">RTK Token Savings (all-time)</strong>
|
||||
<span style="margin-left:auto;font-size:12px;color:#15803d" id="rtk-banner-host"></span>
|
||||
</div>
|
||||
<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:12px" id="rtk-banner-cards"></div>
|
||||
</div>
|
||||
<div class="chart-row">
|
||||
<div class="chart-placeholder" id="cost-timeline">Cost timeline will appear here after requests are tracked</div>
|
||||
<div class="chart-placeholder" id="provider-split">Provider split will appear here</div>
|
||||
<div id="provider-split" style="background:var(--surface);border:1px solid var(--border);border-radius:12px;padding:24px;flex:1;min-height:180px">
|
||||
<div style="font-weight:600;margin-bottom:12px;color:var(--text-muted)">Provider Split</div>
|
||||
<div id="provider-split-inner" style="color:var(--text-muted);font-size:13px;text-align:center;margin-top:40px">Provider split will appear here after requests are tracked</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -311,7 +323,10 @@ function fmtTime(d) { return new Date(d).toLocaleString(lang === 'de' ? 'de-DE'
|
||||
|
||||
async function loadStats() {
|
||||
try {
|
||||
const data = await (await fetch(API + '/tickets/stats?period=month')).json();
|
||||
const [data, rtkStats] = await Promise.all([
|
||||
(await fetch(API + '/tickets/stats?period=month')).json(),
|
||||
fetch(API + '/rtk/stats?period=all').then(r => r.ok ? r.json() : null).catch(() => null),
|
||||
]);
|
||||
const labels = t[lang];
|
||||
document.getElementById('stats-cards').innerHTML = `
|
||||
<div class="card"><div class="label">${labels.totalCost}</div><div class="value primary">$${fmt(data.total_cost_usd)}</div><div class="sub">Last 30 days</div></div>
|
||||
@ -321,6 +336,53 @@ async function loadStats() {
|
||||
<div class="card"><div class="label">${labels.tokensIn}</div><div class="value">${fmtK(data.total_tokens_in)}</div></div>
|
||||
<div class="card"><div class="label">${labels.tokensOut}</div><div class="value">${fmtK(data.total_tokens_out)}</div></div>
|
||||
`;
|
||||
|
||||
// ── RTK savings banner ──────────────────────────────────────────────────
|
||||
if (rtkStats && rtkStats.total_commands > 0) {
|
||||
const banner = document.getElementById('rtk-overview-banner');
|
||||
banner.style.display = 'block';
|
||||
document.getElementById('rtk-banner-host').textContent =
|
||||
`${rtkStats.unique_hosts} host${rtkStats.unique_hosts !== 1 ? 's' : ''} · ${rtkStats.unique_projects} project${rtkStats.unique_projects !== 1 ? 's' : ''}`;
|
||||
document.getElementById('rtk-banner-cards').innerHTML = `
|
||||
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
|
||||
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">TOKENS SAVED</div>
|
||||
<div style="font-size:22px;font-weight:700;color:#16a34a">${fmtK(rtkStats.total_saved_tokens)}</div>
|
||||
</div>
|
||||
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
|
||||
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">COMMANDS</div>
|
||||
<div style="font-size:22px;font-weight:700;color:#6366f1">${fmtK(rtkStats.total_commands)}</div>
|
||||
</div>
|
||||
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
|
||||
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">AVG SAVINGS</div>
|
||||
<div style="font-size:22px;font-weight:700;color:#f59e0b">${rtkStats.avg_savings_pct.toFixed(1)}%</div>
|
||||
</div>
|
||||
<div style="background:#fff;border-radius:8px;padding:12px;text-align:center">
|
||||
<div style="font-size:11px;color:#6b7280;margin-bottom:4px">INPUT TOKENS</div>
|
||||
<div style="font-size:22px;font-weight:700">${fmtK(rtkStats.total_input_tokens)}</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
// ── Provider split (simple bar chart) ───────────────────────────────────
|
||||
try {
|
||||
const breakdown = await (await fetch(API + '/cost/breakdown?group_by=provider')).json();
|
||||
const total = breakdown.reduce((s, b) => s + b.request_count, 0);
|
||||
if (total > 0) {
|
||||
const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' };
|
||||
document.getElementById('provider-split-inner').innerHTML = breakdown.map(b => {
|
||||
const pct = ((b.request_count / total) * 100).toFixed(0);
|
||||
const col = colors[b.group_value] || '#94a3b8';
|
||||
return `<div style="margin-bottom:10px">
|
||||
<div style="display:flex;justify-content:space-between;font-size:12px;margin-bottom:3px">
|
||||
<span style="font-weight:500">${b.group_value}</span><span style="color:var(--text-muted)">${b.request_count} req · ${pct}%</span>
|
||||
</div>
|
||||
<div style="background:var(--border);border-radius:4px;height:6px">
|
||||
<div style="background:${col};width:${pct}%;height:6px;border-radius:4px"></div>
|
||||
</div>
|
||||
</div>`;
|
||||
}).join('');
|
||||
}
|
||||
} catch {}
|
||||
} catch { document.getElementById('stats-cards').innerHTML = '<div class="card"><div class="label">Status</div><div class="value">Offline</div><div class="sub">TokenVault core not reachable</div></div>'; }
|
||||
}
|
||||
|
||||
|
||||
@ -63,5 +63,42 @@ app.get('/api/rtk/hosts', async () => {
|
||||
return res.json();
|
||||
});
|
||||
|
||||
// ─── Anthropic API passthrough (ANTHROPIC_BASE_URL routing) ──────────────────
|
||||
// Claude Code routes through here when ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org
|
||||
// Forward to core which handles tracking + proxying to Anthropic.
|
||||
app.post('/v1/messages', async (req, reply) => {
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
for (const key of ['x-api-key', 'anthropic-version', 'anthropic-beta', 'x-tokenvault-caller', 'x-tokenvault-project', 'x-tokenvault-team']) {
|
||||
const val = req.headers[key];
|
||||
if (val) headers[key] = val as string;
|
||||
}
|
||||
const res = await fetch(`${CORE_URL}/v1/messages`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(req.body),
|
||||
});
|
||||
reply.code(res.status);
|
||||
reply.header('Content-Type', 'application/json');
|
||||
const reqId = res.headers.get('request-id');
|
||||
if (reqId) reply.header('request-id', reqId);
|
||||
return res.json();
|
||||
});
|
||||
|
||||
app.get('/v1/models', async (req, reply) => {
|
||||
const headers: Record<string, string> = {};
|
||||
const apiKey = req.headers['x-api-key'];
|
||||
if (apiKey) headers['x-api-key'] = apiKey as string;
|
||||
const res = await fetch(`${CORE_URL}/v1/models`, { headers });
|
||||
reply.code(res.status);
|
||||
reply.header('Content-Type', 'application/json');
|
||||
return res.json();
|
||||
});
|
||||
|
||||
// ─── Health endpoint (needed for infra monitoring) ────────────────────────────
|
||||
app.get('/health', async () => {
|
||||
const res = await fetch(`${CORE_URL}/health`);
|
||||
return res.json();
|
||||
});
|
||||
|
||||
await app.listen({ port: PORT, host: '0.0.0.0' });
|
||||
console.log(`TokenVault Dashboard running on http://localhost:${PORT}`);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user