- New openai-proxy.ts: POST /v1/chat/completions forwards client's API key to OpenAI, creates tracking ticket with full cost calculation for all gpt-4o, gpt-4o-mini, o1, o3-mini, gpt-4-turbo, gpt-3.5-turbo models - GET /v1/models unified in openai-proxy (removed duplicate from anthropic-proxy) - Dashboard: POST /v1/chat/completions forwarded to core (OPENAI_BASE_URL works) - Overview 'Savings per AI' panel: shows emoji label, request count, cost, tokens in/out, and savings per provider (🟣 Claude 🟢 ChatGPT 🟡 Ollama) - Old pipeline-based proxy.ts replaced by direct API passthroughs
144 lines
6.1 KiB
TypeScript
144 lines
6.1 KiB
TypeScript
import type { FastifyInstance } from 'fastify';
|
|
import { createTicket, hashContent } from '../tickets/ticket-service.js';
|
|
import { logger } from '../observability/logger.js';
|
|
import { config } from '../config.js';
|
|
|
|
// Anthropic pricing per 1M tokens (USD)
|
|
const ANTHROPIC_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
|
|
'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 },
|
|
'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 },
|
|
'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 },
|
|
'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 },
|
|
'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 },
|
|
'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 },
|
|
};
|
|
|
|
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
|
|
const pricing = ANTHROPIC_PRICING[model] ?? ANTHROPIC_PRICING['claude-sonnet-4-20250514']!;
|
|
const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input;
|
|
const outputCost = (outputTokens / 1_000_000) * pricing.output;
|
|
const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0;
|
|
return Math.max(0, inputCost + outputCost + cacheCost);
|
|
}
|
|
|
|
/**
|
|
* Anthropic API passthrough — enables ANTHROPIC_BASE_URL routing.
|
|
*
|
|
* Claude Code (and any Anthropic SDK client) sends requests here when
|
|
* ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org is set.
|
|
* We forward the request to Anthropic using the client's own API key,
|
|
* then create a TokenVault ticket to track usage and cost.
|
|
*/
|
|
export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void> {
|
|
|
|
// ─── POST /v1/messages — Anthropic Messages API passthrough ──────────────
|
|
app.post('/v1/messages', async (req, reply) => {
|
|
const body = req.body as Record<string, unknown>;
|
|
const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514';
|
|
|
|
// Use the client's API key if provided, fall back to configured key
|
|
const apiKey = (req.headers['x-api-key'] as string)
|
|
|| (req.headers['authorization'] as string)?.replace(/^Bearer /, '')
|
|
|| config.providers.anthropic.apiKey;
|
|
|
|
const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01';
|
|
const anthropicBeta = req.headers['anthropic-beta'] as string | undefined;
|
|
|
|
const caller = req.headers['x-tokenvault-caller'] as string | undefined;
|
|
const project = req.headers['x-tokenvault-project'] as string | undefined;
|
|
const team = req.headers['x-tokenvault-team'] as string | undefined;
|
|
|
|
const start = Date.now();
|
|
|
|
// ── Forward to Anthropic ────────────────────────────────────────────────
|
|
const forwardHeaders: Record<string, string> = {
|
|
'Content-Type': 'application/json',
|
|
'x-api-key': apiKey,
|
|
'anthropic-version': anthropicVersion,
|
|
};
|
|
if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta;
|
|
|
|
const upstream = await fetch('https://api.anthropic.com/v1/messages', {
|
|
method: 'POST',
|
|
headers: forwardHeaders,
|
|
body: JSON.stringify(body),
|
|
});
|
|
|
|
const latency = Date.now() - start;
|
|
const responseBody = await upstream.json() as Record<string, unknown>;
|
|
|
|
// ── Track as TokenVault ticket (best-effort, non-blocking) ─────────────
|
|
if (upstream.ok) {
|
|
const usage = responseBody['usage'] as {
|
|
input_tokens?: number;
|
|
output_tokens?: number;
|
|
cache_read_input_tokens?: number;
|
|
cache_creation_input_tokens?: number;
|
|
} | undefined;
|
|
|
|
if (usage) {
|
|
const inputTokens = usage.input_tokens ?? 0;
|
|
const outputTokens = usage.output_tokens ?? 0;
|
|
const cachedTokens = usage.cache_read_input_tokens ?? 0;
|
|
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
|
|
|
|
const messages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? [];
|
|
const inputText = messages.map(m => {
|
|
if (typeof m.content === 'string') return m.content;
|
|
if (Array.isArray(m.content)) return m.content.map((c: Record<string, unknown>) => c['text'] ?? '').join('');
|
|
return '';
|
|
}).join('\n');
|
|
const outputContent = (responseBody['content'] as Array<{ text?: string }> | undefined) ?? [];
|
|
const outputText = outputContent.map(c => c.text ?? '').join('');
|
|
|
|
createTicket({
|
|
provider: 'anthropic',
|
|
model,
|
|
status: 'completed',
|
|
tokens_in: inputTokens,
|
|
tokens_out: outputTokens,
|
|
tokens_cached: cachedTokens,
|
|
tokens_saved: 0,
|
|
cost_usd: cost,
|
|
latency_ms: latency,
|
|
cache_hit: cachedTokens > 0,
|
|
caller: caller ?? 'claude-code',
|
|
project,
|
|
team,
|
|
input_hash: hashContent(inputText),
|
|
output_hash: hashContent(outputText),
|
|
}).catch(err => logger.warn({ err }, 'Failed to create ticket for Anthropic passthrough'));
|
|
}
|
|
} else {
|
|
// Track failed requests too
|
|
createTicket({
|
|
provider: 'anthropic',
|
|
model,
|
|
status: 'failed',
|
|
tokens_in: 0,
|
|
tokens_out: 0,
|
|
cost_usd: 0,
|
|
latency_ms: latency,
|
|
caller: caller ?? 'claude-code',
|
|
project,
|
|
team,
|
|
input_hash: hashContent(model),
|
|
output_hash: '',
|
|
}).catch(() => { /* best effort */ });
|
|
|
|
logger.warn({ model, status: upstream.status }, 'Anthropic upstream error');
|
|
}
|
|
|
|
// ── Return Anthropic response as-is ────────────────────────────────────
|
|
reply.code(upstream.status);
|
|
reply.header('Content-Type', 'application/json');
|
|
|
|
// Pass through useful Anthropic response headers
|
|
const anthropicReqId = upstream.headers.get('request-id');
|
|
if (anthropicReqId) reply.header('request-id', anthropicReqId);
|
|
|
|
return responseBody;
|
|
});
|
|
|
|
}
|