import type { FastifyInstance } from 'fastify'; import { createTicket, hashContent } from '../tickets/ticket-service.js'; import { logger } from '../observability/logger.js'; import { config } from '../config.js'; // Anthropic pricing per 1M tokens (USD) const ANTHROPIC_PRICING: Record = { 'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 }, 'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 }, 'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 }, 'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 }, 'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 }, 'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 }, }; function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number { const pricing = ANTHROPIC_PRICING[model] ?? ANTHROPIC_PRICING['claude-sonnet-4-20250514']!; const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input; const outputCost = (outputTokens / 1_000_000) * pricing.output; const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0; return Math.max(0, inputCost + outputCost + cacheCost); } /** * Anthropic API passthrough — enables ANTHROPIC_BASE_URL routing. * * Claude Code (and any Anthropic SDK client) sends requests here when * ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org is set. * We forward the request to Anthropic using the client's own API key, * then create a TokenVault ticket to track usage and cost. */ export async function anthropicProxyRoutes(app: FastifyInstance): Promise { // ─── POST /v1/messages — Anthropic Messages API passthrough ────────────── app.post('/v1/messages', async (req, reply) => { const body = req.body as Record; const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514'; // Use the client's API key if provided, fall back to configured key const apiKey = (req.headers['x-api-key'] as string) || (req.headers['authorization'] as string)?.replace(/^Bearer /, '') || config.providers.anthropic.apiKey; const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01'; const anthropicBeta = req.headers['anthropic-beta'] as string | undefined; const caller = req.headers['x-tokenvault-caller'] as string | undefined; const project = req.headers['x-tokenvault-project'] as string | undefined; const team = req.headers['x-tokenvault-team'] as string | undefined; const start = Date.now(); // ── Forward to Anthropic ──────────────────────────────────────────────── const forwardHeaders: Record = { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': anthropicVersion, }; if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta; const upstream = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: forwardHeaders, body: JSON.stringify(body), }); const latency = Date.now() - start; const responseBody = await upstream.json() as Record; // ── Track as TokenVault ticket (best-effort, non-blocking) ───────────── if (upstream.ok) { const usage = responseBody['usage'] as { input_tokens?: number; output_tokens?: number; cache_read_input_tokens?: number; cache_creation_input_tokens?: number; } | undefined; if (usage) { const inputTokens = usage.input_tokens ?? 0; const outputTokens = usage.output_tokens ?? 0; const cachedTokens = usage.cache_read_input_tokens ?? 0; const cost = calcCost(model, inputTokens, outputTokens, cachedTokens); const messages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? []; const inputText = messages.map(m => { if (typeof m.content === 'string') return m.content; if (Array.isArray(m.content)) return m.content.map((c: Record) => c['text'] ?? '').join(''); return ''; }).join('\n'); const outputContent = (responseBody['content'] as Array<{ text?: string }> | undefined) ?? []; const outputText = outputContent.map(c => c.text ?? '').join(''); createTicket({ provider: 'anthropic', model, status: 'completed', tokens_in: inputTokens, tokens_out: outputTokens, tokens_cached: cachedTokens, tokens_saved: 0, cost_usd: cost, latency_ms: latency, cache_hit: cachedTokens > 0, caller: caller ?? 'claude-code', project, team, input_hash: hashContent(inputText), output_hash: hashContent(outputText), }).catch(err => logger.warn({ err }, 'Failed to create ticket for Anthropic passthrough')); } } else { // Track failed requests too createTicket({ provider: 'anthropic', model, status: 'failed', tokens_in: 0, tokens_out: 0, cost_usd: 0, latency_ms: latency, caller: caller ?? 'claude-code', project, team, input_hash: hashContent(model), output_hash: '', }).catch(() => { /* best effort */ }); logger.warn({ model, status: upstream.status }, 'Anthropic upstream error'); } // ── Return Anthropic response as-is ──────────────────────────────────── reply.code(upstream.status); reply.header('Content-Type', 'application/json'); // Pass through useful Anthropic response headers const anthropicReqId = upstream.headers.get('request-id'); if (anthropicReqId) reply.header('request-id', anthropicReqId); return responseBody; }); }