tokenvault/packages/core/src/routes/anthropic-proxy.ts
Rene Fichtmueller 998e8d8aee feat: add OpenAI/ChatGPT passthrough + per-AI savings in Overview
- New openai-proxy.ts: POST /v1/chat/completions forwards client's API key
  to OpenAI, creates tracking ticket with full cost calculation for all
  gpt-4o, gpt-4o-mini, o1, o3-mini, gpt-4-turbo, gpt-3.5-turbo models
- GET /v1/models unified in openai-proxy (removed duplicate from anthropic-proxy)
- Dashboard: POST /v1/chat/completions forwarded to core (OPENAI_BASE_URL works)
- Overview 'Savings per AI' panel: shows emoji label, request count, cost,
  tokens in/out, and savings per provider (🟣 Claude 🟢 ChatGPT 🟡 Ollama)
- Old pipeline-based proxy.ts replaced by direct API passthroughs
2026-04-14 22:55:13 +02:00

144 lines
6.1 KiB
TypeScript

import type { FastifyInstance } from 'fastify';
import { createTicket, hashContent } from '../tickets/ticket-service.js';
import { logger } from '../observability/logger.js';
import { config } from '../config.js';
// Anthropic pricing per 1M tokens (USD)
const ANTHROPIC_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 },
'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 },
'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 },
'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 },
'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 },
'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 },
};
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
const pricing = ANTHROPIC_PRICING[model] ?? ANTHROPIC_PRICING['claude-sonnet-4-20250514']!;
const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input;
const outputCost = (outputTokens / 1_000_000) * pricing.output;
const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0;
return Math.max(0, inputCost + outputCost + cacheCost);
}
/**
* Anthropic API passthrough — enables ANTHROPIC_BASE_URL routing.
*
* Claude Code (and any Anthropic SDK client) sends requests here when
* ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org is set.
* We forward the request to Anthropic using the client's own API key,
* then create a TokenVault ticket to track usage and cost.
*/
export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void> {
// ─── POST /v1/messages — Anthropic Messages API passthrough ──────────────
app.post('/v1/messages', async (req, reply) => {
const body = req.body as Record<string, unknown>;
const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514';
// Use the client's API key if provided, fall back to configured key
const apiKey = (req.headers['x-api-key'] as string)
|| (req.headers['authorization'] as string)?.replace(/^Bearer /, '')
|| config.providers.anthropic.apiKey;
const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01';
const anthropicBeta = req.headers['anthropic-beta'] as string | undefined;
const caller = req.headers['x-tokenvault-caller'] as string | undefined;
const project = req.headers['x-tokenvault-project'] as string | undefined;
const team = req.headers['x-tokenvault-team'] as string | undefined;
const start = Date.now();
// ── Forward to Anthropic ────────────────────────────────────────────────
const forwardHeaders: Record<string, string> = {
'Content-Type': 'application/json',
'x-api-key': apiKey,
'anthropic-version': anthropicVersion,
};
if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta;
const upstream = await fetch('https://api.anthropic.com/v1/messages', {
method: 'POST',
headers: forwardHeaders,
body: JSON.stringify(body),
});
const latency = Date.now() - start;
const responseBody = await upstream.json() as Record<string, unknown>;
// ── Track as TokenVault ticket (best-effort, non-blocking) ─────────────
if (upstream.ok) {
const usage = responseBody['usage'] as {
input_tokens?: number;
output_tokens?: number;
cache_read_input_tokens?: number;
cache_creation_input_tokens?: number;
} | undefined;
if (usage) {
const inputTokens = usage.input_tokens ?? 0;
const outputTokens = usage.output_tokens ?? 0;
const cachedTokens = usage.cache_read_input_tokens ?? 0;
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
const messages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? [];
const inputText = messages.map(m => {
if (typeof m.content === 'string') return m.content;
if (Array.isArray(m.content)) return m.content.map((c: Record<string, unknown>) => c['text'] ?? '').join('');
return '';
}).join('\n');
const outputContent = (responseBody['content'] as Array<{ text?: string }> | undefined) ?? [];
const outputText = outputContent.map(c => c.text ?? '').join('');
createTicket({
provider: 'anthropic',
model,
status: 'completed',
tokens_in: inputTokens,
tokens_out: outputTokens,
tokens_cached: cachedTokens,
tokens_saved: 0,
cost_usd: cost,
latency_ms: latency,
cache_hit: cachedTokens > 0,
caller: caller ?? 'claude-code',
project,
team,
input_hash: hashContent(inputText),
output_hash: hashContent(outputText),
}).catch(err => logger.warn({ err }, 'Failed to create ticket for Anthropic passthrough'));
}
} else {
// Track failed requests too
createTicket({
provider: 'anthropic',
model,
status: 'failed',
tokens_in: 0,
tokens_out: 0,
cost_usd: 0,
latency_ms: latency,
caller: caller ?? 'claude-code',
project,
team,
input_hash: hashContent(model),
output_hash: '',
}).catch(() => { /* best effort */ });
logger.warn({ model, status: upstream.status }, 'Anthropic upstream error');
}
// ── Return Anthropic response as-is ────────────────────────────────────
reply.code(upstream.status);
reply.header('Content-Type', 'application/json');
// Pass through useful Anthropic response headers
const anthropicReqId = upstream.headers.get('request-id');
if (anthropicReqId) reply.header('request-id', anthropicReqId);
return responseBody;
});
}