diff --git a/packages/core/src/routes/anthropic-proxy.ts b/packages/core/src/routes/anthropic-proxy.ts index 169577f..8343702 100644 --- a/packages/core/src/routes/anthropic-proxy.ts +++ b/packages/core/src/routes/anthropic-proxy.ts @@ -140,20 +140,4 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise return responseBody; }); - // ─── GET /v1/models — Anthropic models list passthrough ───────────────── - app.get('/v1/models', async (req, reply) => { - const apiKey = (req.headers['x-api-key'] as string) - || config.providers.anthropic.apiKey; - - const upstream = await fetch('https://api.anthropic.com/v1/models', { - headers: { - 'x-api-key': apiKey, - 'anthropic-version': '2023-06-01', - }, - }); - - reply.code(upstream.status); - reply.header('Content-Type', 'application/json'); - return upstream.json(); - }); } diff --git a/packages/core/src/routes/openai-proxy.ts b/packages/core/src/routes/openai-proxy.ts new file mode 100644 index 0000000..bad1883 --- /dev/null +++ b/packages/core/src/routes/openai-proxy.ts @@ -0,0 +1,154 @@ +import type { FastifyInstance } from 'fastify'; +import { createTicket, hashContent } from '../tickets/ticket-service.js'; +import { logger } from '../observability/logger.js'; +import { config } from '../config.js'; + +// OpenAI pricing per 1M tokens (USD) +const OPENAI_PRICING: Record = { + 'gpt-4o': { input: 2.50, output: 10.0, cached: 1.25 }, + 'gpt-4o-mini': { input: 0.15, output: 0.60, cached: 0.075 }, + 'gpt-4-turbo': { input: 10.0, output: 30.0 }, + 'gpt-4': { input: 30.0, output: 60.0 }, + 'gpt-3.5-turbo': { input: 0.50, output: 1.50 }, + 'o1': { input: 15.0, output: 60.0 }, + 'o1-mini': { input: 3.0, output: 12.0 }, + 'o3-mini': { input: 1.10, output: 4.40 }, +}; + +function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number { + // Match model prefix (e.g. "gpt-4o-2024-11-20" → "gpt-4o") + const key = Object.keys(OPENAI_PRICING).find(k => model === k || model.startsWith(k + '-')) ?? null; + if (!key) return 0; + const pricing = OPENAI_PRICING[key]!; + const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input; + const outputCost = (outputTokens / 1_000_000) * pricing.output; + const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0; + return Math.max(0, inputCost + outputCost + cacheCost); +} + +/** + * OpenAI API passthrough — enables OPENAI_BASE_URL routing. + * + * Any OpenAI SDK client routes through here when + * OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set. + * Forwards to OpenAI with the client's API key and creates a TokenVault ticket. + */ +export async function openaiProxyRoutes(app: FastifyInstance): Promise { + + // ─── POST /v1/chat/completions — OpenAI Chat API passthrough ───────────── + app.post('/v1/chat/completions', async (req, reply) => { + const body = req.body as Record; + const model = (body['model'] as string) ?? 'gpt-4o'; + + // Use client's API key, fall back to configured key + const authHeader = req.headers['authorization'] as string | undefined; + const apiKey = authHeader?.replace(/^Bearer /, '') + || config.providers.openai?.apiKey + || ''; + + const caller = req.headers['x-tokenvault-caller'] as string | undefined; + const project = req.headers['x-tokenvault-project'] as string | undefined; + const team = req.headers['x-tokenvault-team'] as string | undefined; + const orgId = req.headers['openai-organization'] as string | undefined; + + const start = Date.now(); + + // ── Forward to OpenAI ─────────────────────────────────────────────────── + const forwardHeaders: Record = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }; + if (orgId) forwardHeaders['OpenAI-Organization'] = orgId; + + const upstream = await fetch('https://api.openai.com/v1/chat/completions', { + method: 'POST', + headers: forwardHeaders, + body: JSON.stringify(body), + }); + + const latency = Date.now() - start; + const responseBody = await upstream.json() as Record; + + // ── Track as TokenVault ticket ────────────────────────────────────────── + if (upstream.ok) { + const usage = responseBody['usage'] as { + prompt_tokens?: number; + completion_tokens?: number; + total_tokens?: number; + prompt_tokens_details?: { cached_tokens?: number }; + } | undefined; + + if (usage) { + const inputTokens = usage.prompt_tokens ?? 0; + const outputTokens = usage.completion_tokens ?? 0; + const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0; + const cost = calcCost(model, inputTokens, outputTokens, cachedTokens); + + const messages = (body['messages'] as Array<{ role: string; content: string }>) ?? []; + const inputText = messages.map(m => m.content).join('\n'); + const choices = (responseBody['choices'] as Array<{ message?: { content?: string } }>) ?? []; + const outputText = choices.map(c => c.message?.content ?? '').join(''); + + createTicket({ + provider: 'openai', + model, + status: 'completed', + tokens_in: inputTokens, + tokens_out: outputTokens, + tokens_cached: cachedTokens, + tokens_saved: 0, + cost_usd: cost, + latency_ms: latency, + cache_hit: cachedTokens > 0, + caller: caller ?? 'openai-sdk', + project, + team, + input_hash: hashContent(inputText), + output_hash: hashContent(outputText), + }).catch(err => logger.warn({ err }, 'Failed to create ticket for OpenAI passthrough')); + } + } else { + createTicket({ + provider: 'openai', + model, + status: 'failed', + tokens_in: 0, + tokens_out: 0, + cost_usd: 0, + latency_ms: latency, + caller: caller ?? 'openai-sdk', + project, + team, + input_hash: hashContent(model), + output_hash: '', + }).catch(() => { /* best effort */ }); + + logger.warn({ model, status: upstream.status }, 'OpenAI upstream error'); + } + + // ── Return OpenAI response as-is + TokenVault headers ────────────────── + reply.code(upstream.status); + reply.header('Content-Type', 'application/json'); + + if (upstream.ok) { + const ticket_num = (responseBody['tokenvault'] as Record | undefined)?.['ticket_number']; + if (ticket_num) reply.header('X-TokenVault-Ticket', String(ticket_num)); + } + + return responseBody; + }); + + // ─── GET /v1/models — OpenAI models list passthrough ──────────────────── + app.get('/v1/models', async (req, reply) => { + const authHeader = req.headers['authorization'] as string | undefined; + const apiKey = authHeader?.replace(/^Bearer /, '') || config.providers.openai?.apiKey || ''; + + const upstream = await fetch('https://api.openai.com/v1/models', { + headers: { 'Authorization': `Bearer ${apiKey}` }, + }); + + reply.code(upstream.status); + reply.header('Content-Type', 'application/json'); + return upstream.json(); + }); +} diff --git a/packages/core/src/routes/proxy.ts b/packages/core/src/routes/proxy.ts index 210952d..42c9b6e 100644 --- a/packages/core/src/routes/proxy.ts +++ b/packages/core/src/routes/proxy.ts @@ -1,69 +1,4 @@ -import type { FastifyInstance } from 'fastify'; -import { executePipeline } from '../pipeline/index.js'; -import type { ChatRequest } from '../types.js'; - -interface ProxyBody { - model: string; - messages: Array<{ role: string; content: string }>; - temperature?: number; - max_tokens?: number; - stream?: boolean; - // TokenVault extensions - caller?: string; - project?: string; - team?: string; -} - -export async function proxyRoutes(app: FastifyInstance): Promise { - app.post<{ Body: ProxyBody }>('/v1/chat/completions', async (req, reply) => { - const { model, messages, temperature, max_tokens, caller, project, team } = req.body; - - const chatRequest: ChatRequest = { - model, - messages: messages.map(m => ({ - role: m.role as 'system' | 'user' | 'assistant' | 'tool', - content: m.content, - })), - temperature, - max_tokens, - caller: caller ?? req.headers['x-tokenvault-caller'] as string, - project: project ?? req.headers['x-tokenvault-project'] as string, - team: team ?? req.headers['x-tokenvault-team'] as string, - }; - - const { response, ticket } = await executePipeline(chatRequest); - - reply.header('X-TokenVault-Ticket-ID', ticket.id); - reply.header('X-TokenVault-Ticket-Number', `TV-${String(ticket.ticket_number).padStart(5, '0')}`); - reply.header('X-TokenVault-Cost-USD', ticket.cost_usd.toFixed(6)); - reply.header('X-TokenVault-Tokens-Saved', ticket.tokens_saved); - - // Return OpenAI-compatible format - return { - id: response.id, - object: 'chat.completion', - created: Math.floor(Date.now() / 1000), - model: response.model, - choices: response.choices.map(c => ({ - index: c.index, - message: { role: c.message.role, content: c.message.content }, - finish_reason: c.finish_reason, - })), - usage: { - prompt_tokens: response.usage.prompt_tokens, - completion_tokens: response.usage.completion_tokens, - total_tokens: response.usage.total_tokens, - cached_tokens: response.usage.cached_tokens, - }, - // TokenVault extensions - tokenvault: { - ticket_id: ticket.id, - ticket_number: `TV-${String(ticket.ticket_number).padStart(5, '0')}`, - provider: response.provider, - cost_usd: ticket.cost_usd, - tokens_saved: ticket.tokens_saved, - latency_ms: response.latency_ms, - }, - }; - }); -} +// OpenAI-compatible /v1/chat/completions is now handled by openai-proxy.ts +// which forwards the client's API key to OpenAI and creates tracking tickets. +// This file is kept for any future internal pipeline routing (AI-Bridge, Ollama). +export {}; diff --git a/packages/core/src/server.ts b/packages/core/src/server.ts index 5c78268..9b8fcc7 100644 --- a/packages/core/src/server.ts +++ b/packages/core/src/server.ts @@ -7,8 +7,8 @@ import { runRtkMigrations } from './db/rtk-migrate.js'; import { closePool } from './db/client.js'; import { initProviders } from './providers/index.js'; import { healthRoutes } from './routes/health.js'; -import { proxyRoutes } from './routes/proxy.js'; import { anthropicProxyRoutes } from './routes/anthropic-proxy.js'; +import { openaiProxyRoutes } from './routes/openai-proxy.js'; import { ticketRoutes } from './routes/tickets.js'; import { rtkRoutes } from './routes/rtk.js'; @@ -20,8 +20,8 @@ await app.register(cors, { origin: true }); // ─── Routes ────────────────────────────────────────────────────────────────── await app.register(healthRoutes); -await app.register(proxyRoutes); -await app.register(anthropicProxyRoutes); +await app.register(anthropicProxyRoutes); // POST /v1/messages (Claude / Anthropic SDK) +await app.register(openaiProxyRoutes); // POST /v1/chat/completions (OpenAI SDK / ChatGPT API) await app.register(ticketRoutes); await app.register(rtkRoutes); diff --git a/packages/dashboard/public/index.html b/packages/dashboard/public/index.html index fffa953..7895c6f 100644 --- a/packages/dashboard/public/index.html +++ b/packages/dashboard/public/index.html @@ -173,7 +173,7 @@ tr:hover { background:#f8fafc; }
Cost timeline will appear here after requests are tracked
-
Provider Split
+
Savings per AI / Pro KI gespart
Provider split will appear here after requests are tracked
@@ -363,21 +363,30 @@ async function loadStats() { `; } - // ── Provider split (simple bar chart) ─────────────────────────────────── + // ── Savings per AI (provider breakdown) ──────────────────────────────── try { const breakdown = await (await fetch(API + '/cost/breakdown?group_by=provider')).json(); - const total = breakdown.reduce((s, b) => s + b.request_count, 0); - if (total > 0) { - const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' }; + const totalTokens = breakdown.reduce((s, b) => s + b.tokens_in + b.tokens_out, 0); + const AI_LABELS = { anthropic:'Claude (Anthropic)', openai:'ChatGPT (OpenAI)', ollama:'Ollama (Lokal)', 'ai-bridge':'AI-Bridge' }; + const AI_EMOJI = { anthropic:'🟣', openai:'🟢', ollama:'🟡', 'ai-bridge':'🔵' }; + const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' }; + if (breakdown.length > 0) { document.getElementById('provider-split-inner').innerHTML = breakdown.map(b => { - const pct = ((b.request_count / total) * 100).toFixed(0); + const tok = b.tokens_in + b.tokens_out; + const pct = totalTokens > 0 ? ((tok / totalTokens) * 100).toFixed(0) : 0; const col = colors[b.group_value] || '#94a3b8'; - return `
-
- ${b.group_value}${b.request_count} req · ${pct}% + const label = AI_LABELS[b.group_value] || b.group_value; + const emoji = AI_EMOJI[b.group_value] || '⚫'; + return `
+
+ ${emoji} ${label} + ${b.request_count} req · $${fmt(b.cost_usd)}
-
-
+
+ ↑ ${fmtK(b.tokens_in)} in↓ ${fmtK(b.tokens_out)} out💰 $${fmt(b.saved_usd)} saved +
+
+
`; }).join(''); diff --git a/packages/dashboard/src/server.ts b/packages/dashboard/src/server.ts index a7c1c73..deea41c 100644 --- a/packages/dashboard/src/server.ts +++ b/packages/dashboard/src/server.ts @@ -63,6 +63,30 @@ app.get('/api/rtk/hosts', async () => { return res.json(); }); +// ─── OpenAI-compatible proxy (OPENAI_BASE_URL routing) ────────────────────── +// OpenAI SDK / ChatGPT API clients route through here when +// OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set. +app.post('/v1/chat/completions', async (req, reply) => { + const headers: Record = { 'Content-Type': 'application/json' }; + for (const key of ['authorization', 'openai-organization', 'x-tokenvault-caller', 'x-tokenvault-project', 'x-tokenvault-team']) { + const val = req.headers[key]; + if (val) headers[key] = val as string; + } + const res = await fetch(`${CORE_URL}/v1/chat/completions`, { + method: 'POST', + headers, + body: JSON.stringify(req.body), + }); + reply.code(res.status); + reply.header('Content-Type', 'application/json'); + // Forward TokenVault tracking headers + for (const h of ['x-tokenvault-ticket-id', 'x-tokenvault-ticket-number', 'x-tokenvault-cost-usd', 'x-tokenvault-tokens-saved']) { + const v = res.headers.get(h); + if (v) reply.header(h, v); + } + return res.json(); +}); + // ─── Anthropic API passthrough (ANTHROPIC_BASE_URL routing) ────────────────── // Claude Code routes through here when ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org // Forward to core which handles tracking + proxying to Anthropic.