feat: add OpenAI/ChatGPT passthrough + per-AI savings in Overview

- New openai-proxy.ts: POST /v1/chat/completions forwards client's API key
  to OpenAI, creates tracking ticket with full cost calculation for all
  gpt-4o, gpt-4o-mini, o1, o3-mini, gpt-4-turbo, gpt-3.5-turbo models
- GET /v1/models unified in openai-proxy (removed duplicate from anthropic-proxy)
- Dashboard: POST /v1/chat/completions forwarded to core (OPENAI_BASE_URL works)
- Overview 'Savings per AI' panel: shows emoji label, request count, cost,
  tokens in/out, and savings per provider (🟣 Claude 🟢 ChatGPT 🟡 Ollama)
- Old pipeline-based proxy.ts replaced by direct API passthroughs
This commit is contained in:
Rene Fichtmueller 2026-04-14 22:55:13 +02:00
parent bdde8cf115
commit 998e8d8aee
6 changed files with 205 additions and 99 deletions

View File

@ -140,20 +140,4 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
return responseBody;
});
// ─── GET /v1/models — Anthropic models list passthrough ─────────────────
app.get('/v1/models', async (req, reply) => {
const apiKey = (req.headers['x-api-key'] as string)
|| config.providers.anthropic.apiKey;
const upstream = await fetch('https://api.anthropic.com/v1/models', {
headers: {
'x-api-key': apiKey,
'anthropic-version': '2023-06-01',
},
});
reply.code(upstream.status);
reply.header('Content-Type', 'application/json');
return upstream.json();
});
}

View File

@ -0,0 +1,154 @@
import type { FastifyInstance } from 'fastify';
import { createTicket, hashContent } from '../tickets/ticket-service.js';
import { logger } from '../observability/logger.js';
import { config } from '../config.js';
// OpenAI pricing per 1M tokens (USD)
const OPENAI_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
'gpt-4o': { input: 2.50, output: 10.0, cached: 1.25 },
'gpt-4o-mini': { input: 0.15, output: 0.60, cached: 0.075 },
'gpt-4-turbo': { input: 10.0, output: 30.0 },
'gpt-4': { input: 30.0, output: 60.0 },
'gpt-3.5-turbo': { input: 0.50, output: 1.50 },
'o1': { input: 15.0, output: 60.0 },
'o1-mini': { input: 3.0, output: 12.0 },
'o3-mini': { input: 1.10, output: 4.40 },
};
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
// Match model prefix (e.g. "gpt-4o-2024-11-20" → "gpt-4o")
const key = Object.keys(OPENAI_PRICING).find(k => model === k || model.startsWith(k + '-')) ?? null;
if (!key) return 0;
const pricing = OPENAI_PRICING[key]!;
const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input;
const outputCost = (outputTokens / 1_000_000) * pricing.output;
const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0;
return Math.max(0, inputCost + outputCost + cacheCost);
}
/**
* OpenAI API passthrough enables OPENAI_BASE_URL routing.
*
* Any OpenAI SDK client routes through here when
* OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set.
* Forwards to OpenAI with the client's API key and creates a TokenVault ticket.
*/
export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
// ─── POST /v1/chat/completions — OpenAI Chat API passthrough ─────────────
app.post('/v1/chat/completions', async (req, reply) => {
const body = req.body as Record<string, unknown>;
const model = (body['model'] as string) ?? 'gpt-4o';
// Use client's API key, fall back to configured key
const authHeader = req.headers['authorization'] as string | undefined;
const apiKey = authHeader?.replace(/^Bearer /, '')
|| config.providers.openai?.apiKey
|| '';
const caller = req.headers['x-tokenvault-caller'] as string | undefined;
const project = req.headers['x-tokenvault-project'] as string | undefined;
const team = req.headers['x-tokenvault-team'] as string | undefined;
const orgId = req.headers['openai-organization'] as string | undefined;
const start = Date.now();
// ── Forward to OpenAI ───────────────────────────────────────────────────
const forwardHeaders: Record<string, string> = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`,
};
if (orgId) forwardHeaders['OpenAI-Organization'] = orgId;
const upstream = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: forwardHeaders,
body: JSON.stringify(body),
});
const latency = Date.now() - start;
const responseBody = await upstream.json() as Record<string, unknown>;
// ── Track as TokenVault ticket ──────────────────────────────────────────
if (upstream.ok) {
const usage = responseBody['usage'] as {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
prompt_tokens_details?: { cached_tokens?: number };
} | undefined;
if (usage) {
const inputTokens = usage.prompt_tokens ?? 0;
const outputTokens = usage.completion_tokens ?? 0;
const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
const messages = (body['messages'] as Array<{ role: string; content: string }>) ?? [];
const inputText = messages.map(m => m.content).join('\n');
const choices = (responseBody['choices'] as Array<{ message?: { content?: string } }>) ?? [];
const outputText = choices.map(c => c.message?.content ?? '').join('');
createTicket({
provider: 'openai',
model,
status: 'completed',
tokens_in: inputTokens,
tokens_out: outputTokens,
tokens_cached: cachedTokens,
tokens_saved: 0,
cost_usd: cost,
latency_ms: latency,
cache_hit: cachedTokens > 0,
caller: caller ?? 'openai-sdk',
project,
team,
input_hash: hashContent(inputText),
output_hash: hashContent(outputText),
}).catch(err => logger.warn({ err }, 'Failed to create ticket for OpenAI passthrough'));
}
} else {
createTicket({
provider: 'openai',
model,
status: 'failed',
tokens_in: 0,
tokens_out: 0,
cost_usd: 0,
latency_ms: latency,
caller: caller ?? 'openai-sdk',
project,
team,
input_hash: hashContent(model),
output_hash: '',
}).catch(() => { /* best effort */ });
logger.warn({ model, status: upstream.status }, 'OpenAI upstream error');
}
// ── Return OpenAI response as-is + TokenVault headers ──────────────────
reply.code(upstream.status);
reply.header('Content-Type', 'application/json');
if (upstream.ok) {
const ticket_num = (responseBody['tokenvault'] as Record<string, unknown> | undefined)?.['ticket_number'];
if (ticket_num) reply.header('X-TokenVault-Ticket', String(ticket_num));
}
return responseBody;
});
// ─── GET /v1/models — OpenAI models list passthrough ────────────────────
app.get('/v1/models', async (req, reply) => {
const authHeader = req.headers['authorization'] as string | undefined;
const apiKey = authHeader?.replace(/^Bearer /, '') || config.providers.openai?.apiKey || '';
const upstream = await fetch('https://api.openai.com/v1/models', {
headers: { 'Authorization': `Bearer ${apiKey}` },
});
reply.code(upstream.status);
reply.header('Content-Type', 'application/json');
return upstream.json();
});
}

View File

@ -1,69 +1,4 @@
import type { FastifyInstance } from 'fastify';
import { executePipeline } from '../pipeline/index.js';
import type { ChatRequest } from '../types.js';
interface ProxyBody {
model: string;
messages: Array<{ role: string; content: string }>;
temperature?: number;
max_tokens?: number;
stream?: boolean;
// TokenVault extensions
caller?: string;
project?: string;
team?: string;
}
export async function proxyRoutes(app: FastifyInstance): Promise<void> {
app.post<{ Body: ProxyBody }>('/v1/chat/completions', async (req, reply) => {
const { model, messages, temperature, max_tokens, caller, project, team } = req.body;
const chatRequest: ChatRequest = {
model,
messages: messages.map(m => ({
role: m.role as 'system' | 'user' | 'assistant' | 'tool',
content: m.content,
})),
temperature,
max_tokens,
caller: caller ?? req.headers['x-tokenvault-caller'] as string,
project: project ?? req.headers['x-tokenvault-project'] as string,
team: team ?? req.headers['x-tokenvault-team'] as string,
};
const { response, ticket } = await executePipeline(chatRequest);
reply.header('X-TokenVault-Ticket-ID', ticket.id);
reply.header('X-TokenVault-Ticket-Number', `TV-${String(ticket.ticket_number).padStart(5, '0')}`);
reply.header('X-TokenVault-Cost-USD', ticket.cost_usd.toFixed(6));
reply.header('X-TokenVault-Tokens-Saved', ticket.tokens_saved);
// Return OpenAI-compatible format
return {
id: response.id,
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
model: response.model,
choices: response.choices.map(c => ({
index: c.index,
message: { role: c.message.role, content: c.message.content },
finish_reason: c.finish_reason,
})),
usage: {
prompt_tokens: response.usage.prompt_tokens,
completion_tokens: response.usage.completion_tokens,
total_tokens: response.usage.total_tokens,
cached_tokens: response.usage.cached_tokens,
},
// TokenVault extensions
tokenvault: {
ticket_id: ticket.id,
ticket_number: `TV-${String(ticket.ticket_number).padStart(5, '0')}`,
provider: response.provider,
cost_usd: ticket.cost_usd,
tokens_saved: ticket.tokens_saved,
latency_ms: response.latency_ms,
},
};
});
}
// OpenAI-compatible /v1/chat/completions is now handled by openai-proxy.ts
// which forwards the client's API key to OpenAI and creates tracking tickets.
// This file is kept for any future internal pipeline routing (AI-Bridge, Ollama).
export {};

View File

@ -7,8 +7,8 @@ import { runRtkMigrations } from './db/rtk-migrate.js';
import { closePool } from './db/client.js';
import { initProviders } from './providers/index.js';
import { healthRoutes } from './routes/health.js';
import { proxyRoutes } from './routes/proxy.js';
import { anthropicProxyRoutes } from './routes/anthropic-proxy.js';
import { openaiProxyRoutes } from './routes/openai-proxy.js';
import { ticketRoutes } from './routes/tickets.js';
import { rtkRoutes } from './routes/rtk.js';
@ -20,8 +20,8 @@ await app.register(cors, { origin: true });
// ─── Routes ──────────────────────────────────────────────────────────────────
await app.register(healthRoutes);
await app.register(proxyRoutes);
await app.register(anthropicProxyRoutes);
await app.register(anthropicProxyRoutes); // POST /v1/messages (Claude / Anthropic SDK)
await app.register(openaiProxyRoutes); // POST /v1/chat/completions (OpenAI SDK / ChatGPT API)
await app.register(ticketRoutes);
await app.register(rtkRoutes);

View File

@ -173,7 +173,7 @@ tr:hover { background:#f8fafc; }
<div class="chart-row">
<div class="chart-placeholder" id="cost-timeline">Cost timeline will appear here after requests are tracked</div>
<div id="provider-split" style="background:var(--surface);border:1px solid var(--border);border-radius:12px;padding:24px;flex:1;min-height:180px">
<div style="font-weight:600;margin-bottom:12px;color:var(--text-muted)">Provider Split</div>
<div style="font-weight:600;margin-bottom:12px;color:var(--text-muted)">Savings per AI / Pro KI gespart</div>
<div id="provider-split-inner" style="color:var(--text-muted);font-size:13px;text-align:center;margin-top:40px">Provider split will appear here after requests are tracked</div>
</div>
</div>
@ -363,21 +363,30 @@ async function loadStats() {
`;
}
// ── Provider split (simple bar chart) ───────────────────────────────────
// ── Savings per AI (provider breakdown) ────────────────────────────────
try {
const breakdown = await (await fetch(API + '/cost/breakdown?group_by=provider')).json();
const total = breakdown.reduce((s, b) => s + b.request_count, 0);
if (total > 0) {
const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' };
const totalTokens = breakdown.reduce((s, b) => s + b.tokens_in + b.tokens_out, 0);
const AI_LABELS = { anthropic:'Claude (Anthropic)', openai:'ChatGPT (OpenAI)', ollama:'Ollama (Lokal)', 'ai-bridge':'AI-Bridge' };
const AI_EMOJI = { anthropic:'🟣', openai:'🟢', ollama:'🟡', 'ai-bridge':'🔵' };
const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' };
if (breakdown.length > 0) {
document.getElementById('provider-split-inner').innerHTML = breakdown.map(b => {
const pct = ((b.request_count / total) * 100).toFixed(0);
const tok = b.tokens_in + b.tokens_out;
const pct = totalTokens > 0 ? ((tok / totalTokens) * 100).toFixed(0) : 0;
const col = colors[b.group_value] || '#94a3b8';
return `<div style="margin-bottom:10px">
<div style="display:flex;justify-content:space-between;font-size:12px;margin-bottom:3px">
<span style="font-weight:500">${b.group_value}</span><span style="color:var(--text-muted)">${b.request_count} req · ${pct}%</span>
const label = AI_LABELS[b.group_value] || b.group_value;
const emoji = AI_EMOJI[b.group_value] || '⚫';
return `<div style="margin-bottom:14px">
<div style="display:flex;justify-content:space-between;align-items:center;font-size:12px;margin-bottom:4px">
<span style="font-weight:600">${emoji} ${label}</span>
<span style="color:var(--text-muted)">${b.request_count} req · $${fmt(b.cost_usd)}</span>
</div>
<div style="background:var(--border);border-radius:4px;height:6px">
<div style="background:${col};width:${pct}%;height:6px;border-radius:4px"></div>
<div style="display:flex;gap:8px;font-size:11px;color:var(--text-muted);margin-bottom:4px">
<span>↑ ${fmtK(b.tokens_in)} in</span><span>↓ ${fmtK(b.tokens_out)} out</span><span style="color:#16a34a">💰 $${fmt(b.saved_usd)} saved</span>
</div>
<div style="background:var(--border);border-radius:4px;height:5px">
<div style="background:${col};width:${pct}%;height:5px;border-radius:4px"></div>
</div>
</div>`;
}).join('');

View File

@ -63,6 +63,30 @@ app.get('/api/rtk/hosts', async () => {
return res.json();
});
// ─── OpenAI-compatible proxy (OPENAI_BASE_URL routing) ──────────────────────
// OpenAI SDK / ChatGPT API clients route through here when
// OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set.
app.post('/v1/chat/completions', async (req, reply) => {
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
for (const key of ['authorization', 'openai-organization', 'x-tokenvault-caller', 'x-tokenvault-project', 'x-tokenvault-team']) {
const val = req.headers[key];
if (val) headers[key] = val as string;
}
const res = await fetch(`${CORE_URL}/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify(req.body),
});
reply.code(res.status);
reply.header('Content-Type', 'application/json');
// Forward TokenVault tracking headers
for (const h of ['x-tokenvault-ticket-id', 'x-tokenvault-ticket-number', 'x-tokenvault-cost-usd', 'x-tokenvault-tokens-saved']) {
const v = res.headers.get(h);
if (v) reply.header(h, v);
}
return res.json();
});
// ─── Anthropic API passthrough (ANTHROPIC_BASE_URL routing) ──────────────────
// Claude Code routes through here when ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org
// Forward to core which handles tracking + proxying to Anthropic.