feat: add OpenAI/ChatGPT passthrough + per-AI savings in Overview
- New openai-proxy.ts: POST /v1/chat/completions forwards client's API key to OpenAI, creates tracking ticket with full cost calculation for all gpt-4o, gpt-4o-mini, o1, o3-mini, gpt-4-turbo, gpt-3.5-turbo models - GET /v1/models unified in openai-proxy (removed duplicate from anthropic-proxy) - Dashboard: POST /v1/chat/completions forwarded to core (OPENAI_BASE_URL works) - Overview 'Savings per AI' panel: shows emoji label, request count, cost, tokens in/out, and savings per provider (🟣 Claude 🟢 ChatGPT 🟡 Ollama) - Old pipeline-based proxy.ts replaced by direct API passthroughs
This commit is contained in:
parent
bdde8cf115
commit
998e8d8aee
@ -140,20 +140,4 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
|
|||||||
return responseBody;
|
return responseBody;
|
||||||
});
|
});
|
||||||
|
|
||||||
// ─── GET /v1/models — Anthropic models list passthrough ─────────────────
|
|
||||||
app.get('/v1/models', async (req, reply) => {
|
|
||||||
const apiKey = (req.headers['x-api-key'] as string)
|
|
||||||
|| config.providers.anthropic.apiKey;
|
|
||||||
|
|
||||||
const upstream = await fetch('https://api.anthropic.com/v1/models', {
|
|
||||||
headers: {
|
|
||||||
'x-api-key': apiKey,
|
|
||||||
'anthropic-version': '2023-06-01',
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
reply.code(upstream.status);
|
|
||||||
reply.header('Content-Type', 'application/json');
|
|
||||||
return upstream.json();
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|||||||
154
packages/core/src/routes/openai-proxy.ts
Normal file
154
packages/core/src/routes/openai-proxy.ts
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import { createTicket, hashContent } from '../tickets/ticket-service.js';
|
||||||
|
import { logger } from '../observability/logger.js';
|
||||||
|
import { config } from '../config.js';
|
||||||
|
|
||||||
|
// OpenAI pricing per 1M tokens (USD)
|
||||||
|
const OPENAI_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
|
||||||
|
'gpt-4o': { input: 2.50, output: 10.0, cached: 1.25 },
|
||||||
|
'gpt-4o-mini': { input: 0.15, output: 0.60, cached: 0.075 },
|
||||||
|
'gpt-4-turbo': { input: 10.0, output: 30.0 },
|
||||||
|
'gpt-4': { input: 30.0, output: 60.0 },
|
||||||
|
'gpt-3.5-turbo': { input: 0.50, output: 1.50 },
|
||||||
|
'o1': { input: 15.0, output: 60.0 },
|
||||||
|
'o1-mini': { input: 3.0, output: 12.0 },
|
||||||
|
'o3-mini': { input: 1.10, output: 4.40 },
|
||||||
|
};
|
||||||
|
|
||||||
|
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
|
||||||
|
// Match model prefix (e.g. "gpt-4o-2024-11-20" → "gpt-4o")
|
||||||
|
const key = Object.keys(OPENAI_PRICING).find(k => model === k || model.startsWith(k + '-')) ?? null;
|
||||||
|
if (!key) return 0;
|
||||||
|
const pricing = OPENAI_PRICING[key]!;
|
||||||
|
const inputCost = ((inputTokens - cachedTokens) / 1_000_000) * pricing.input;
|
||||||
|
const outputCost = (outputTokens / 1_000_000) * pricing.output;
|
||||||
|
const cacheCost = pricing.cached ? (cachedTokens / 1_000_000) * pricing.cached : 0;
|
||||||
|
return Math.max(0, inputCost + outputCost + cacheCost);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenAI API passthrough — enables OPENAI_BASE_URL routing.
|
||||||
|
*
|
||||||
|
* Any OpenAI SDK client routes through here when
|
||||||
|
* OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set.
|
||||||
|
* Forwards to OpenAI with the client's API key and creates a TokenVault ticket.
|
||||||
|
*/
|
||||||
|
export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
|
||||||
|
|
||||||
|
// ─── POST /v1/chat/completions — OpenAI Chat API passthrough ─────────────
|
||||||
|
app.post('/v1/chat/completions', async (req, reply) => {
|
||||||
|
const body = req.body as Record<string, unknown>;
|
||||||
|
const model = (body['model'] as string) ?? 'gpt-4o';
|
||||||
|
|
||||||
|
// Use client's API key, fall back to configured key
|
||||||
|
const authHeader = req.headers['authorization'] as string | undefined;
|
||||||
|
const apiKey = authHeader?.replace(/^Bearer /, '')
|
||||||
|
|| config.providers.openai?.apiKey
|
||||||
|
|| '';
|
||||||
|
|
||||||
|
const caller = req.headers['x-tokenvault-caller'] as string | undefined;
|
||||||
|
const project = req.headers['x-tokenvault-project'] as string | undefined;
|
||||||
|
const team = req.headers['x-tokenvault-team'] as string | undefined;
|
||||||
|
const orgId = req.headers['openai-organization'] as string | undefined;
|
||||||
|
|
||||||
|
const start = Date.now();
|
||||||
|
|
||||||
|
// ── Forward to OpenAI ───────────────────────────────────────────────────
|
||||||
|
const forwardHeaders: Record<string, string> = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${apiKey}`,
|
||||||
|
};
|
||||||
|
if (orgId) forwardHeaders['OpenAI-Organization'] = orgId;
|
||||||
|
|
||||||
|
const upstream = await fetch('https://api.openai.com/v1/chat/completions', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: forwardHeaders,
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
|
||||||
|
const latency = Date.now() - start;
|
||||||
|
const responseBody = await upstream.json() as Record<string, unknown>;
|
||||||
|
|
||||||
|
// ── Track as TokenVault ticket ──────────────────────────────────────────
|
||||||
|
if (upstream.ok) {
|
||||||
|
const usage = responseBody['usage'] as {
|
||||||
|
prompt_tokens?: number;
|
||||||
|
completion_tokens?: number;
|
||||||
|
total_tokens?: number;
|
||||||
|
prompt_tokens_details?: { cached_tokens?: number };
|
||||||
|
} | undefined;
|
||||||
|
|
||||||
|
if (usage) {
|
||||||
|
const inputTokens = usage.prompt_tokens ?? 0;
|
||||||
|
const outputTokens = usage.completion_tokens ?? 0;
|
||||||
|
const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
|
||||||
|
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
|
||||||
|
|
||||||
|
const messages = (body['messages'] as Array<{ role: string; content: string }>) ?? [];
|
||||||
|
const inputText = messages.map(m => m.content).join('\n');
|
||||||
|
const choices = (responseBody['choices'] as Array<{ message?: { content?: string } }>) ?? [];
|
||||||
|
const outputText = choices.map(c => c.message?.content ?? '').join('');
|
||||||
|
|
||||||
|
createTicket({
|
||||||
|
provider: 'openai',
|
||||||
|
model,
|
||||||
|
status: 'completed',
|
||||||
|
tokens_in: inputTokens,
|
||||||
|
tokens_out: outputTokens,
|
||||||
|
tokens_cached: cachedTokens,
|
||||||
|
tokens_saved: 0,
|
||||||
|
cost_usd: cost,
|
||||||
|
latency_ms: latency,
|
||||||
|
cache_hit: cachedTokens > 0,
|
||||||
|
caller: caller ?? 'openai-sdk',
|
||||||
|
project,
|
||||||
|
team,
|
||||||
|
input_hash: hashContent(inputText),
|
||||||
|
output_hash: hashContent(outputText),
|
||||||
|
}).catch(err => logger.warn({ err }, 'Failed to create ticket for OpenAI passthrough'));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
createTicket({
|
||||||
|
provider: 'openai',
|
||||||
|
model,
|
||||||
|
status: 'failed',
|
||||||
|
tokens_in: 0,
|
||||||
|
tokens_out: 0,
|
||||||
|
cost_usd: 0,
|
||||||
|
latency_ms: latency,
|
||||||
|
caller: caller ?? 'openai-sdk',
|
||||||
|
project,
|
||||||
|
team,
|
||||||
|
input_hash: hashContent(model),
|
||||||
|
output_hash: '',
|
||||||
|
}).catch(() => { /* best effort */ });
|
||||||
|
|
||||||
|
logger.warn({ model, status: upstream.status }, 'OpenAI upstream error');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Return OpenAI response as-is + TokenVault headers ──────────────────
|
||||||
|
reply.code(upstream.status);
|
||||||
|
reply.header('Content-Type', 'application/json');
|
||||||
|
|
||||||
|
if (upstream.ok) {
|
||||||
|
const ticket_num = (responseBody['tokenvault'] as Record<string, unknown> | undefined)?.['ticket_number'];
|
||||||
|
if (ticket_num) reply.header('X-TokenVault-Ticket', String(ticket_num));
|
||||||
|
}
|
||||||
|
|
||||||
|
return responseBody;
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── GET /v1/models — OpenAI models list passthrough ────────────────────
|
||||||
|
app.get('/v1/models', async (req, reply) => {
|
||||||
|
const authHeader = req.headers['authorization'] as string | undefined;
|
||||||
|
const apiKey = authHeader?.replace(/^Bearer /, '') || config.providers.openai?.apiKey || '';
|
||||||
|
|
||||||
|
const upstream = await fetch('https://api.openai.com/v1/models', {
|
||||||
|
headers: { 'Authorization': `Bearer ${apiKey}` },
|
||||||
|
});
|
||||||
|
|
||||||
|
reply.code(upstream.status);
|
||||||
|
reply.header('Content-Type', 'application/json');
|
||||||
|
return upstream.json();
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -1,69 +1,4 @@
|
|||||||
import type { FastifyInstance } from 'fastify';
|
// OpenAI-compatible /v1/chat/completions is now handled by openai-proxy.ts
|
||||||
import { executePipeline } from '../pipeline/index.js';
|
// which forwards the client's API key to OpenAI and creates tracking tickets.
|
||||||
import type { ChatRequest } from '../types.js';
|
// This file is kept for any future internal pipeline routing (AI-Bridge, Ollama).
|
||||||
|
export {};
|
||||||
interface ProxyBody {
|
|
||||||
model: string;
|
|
||||||
messages: Array<{ role: string; content: string }>;
|
|
||||||
temperature?: number;
|
|
||||||
max_tokens?: number;
|
|
||||||
stream?: boolean;
|
|
||||||
// TokenVault extensions
|
|
||||||
caller?: string;
|
|
||||||
project?: string;
|
|
||||||
team?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function proxyRoutes(app: FastifyInstance): Promise<void> {
|
|
||||||
app.post<{ Body: ProxyBody }>('/v1/chat/completions', async (req, reply) => {
|
|
||||||
const { model, messages, temperature, max_tokens, caller, project, team } = req.body;
|
|
||||||
|
|
||||||
const chatRequest: ChatRequest = {
|
|
||||||
model,
|
|
||||||
messages: messages.map(m => ({
|
|
||||||
role: m.role as 'system' | 'user' | 'assistant' | 'tool',
|
|
||||||
content: m.content,
|
|
||||||
})),
|
|
||||||
temperature,
|
|
||||||
max_tokens,
|
|
||||||
caller: caller ?? req.headers['x-tokenvault-caller'] as string,
|
|
||||||
project: project ?? req.headers['x-tokenvault-project'] as string,
|
|
||||||
team: team ?? req.headers['x-tokenvault-team'] as string,
|
|
||||||
};
|
|
||||||
|
|
||||||
const { response, ticket } = await executePipeline(chatRequest);
|
|
||||||
|
|
||||||
reply.header('X-TokenVault-Ticket-ID', ticket.id);
|
|
||||||
reply.header('X-TokenVault-Ticket-Number', `TV-${String(ticket.ticket_number).padStart(5, '0')}`);
|
|
||||||
reply.header('X-TokenVault-Cost-USD', ticket.cost_usd.toFixed(6));
|
|
||||||
reply.header('X-TokenVault-Tokens-Saved', ticket.tokens_saved);
|
|
||||||
|
|
||||||
// Return OpenAI-compatible format
|
|
||||||
return {
|
|
||||||
id: response.id,
|
|
||||||
object: 'chat.completion',
|
|
||||||
created: Math.floor(Date.now() / 1000),
|
|
||||||
model: response.model,
|
|
||||||
choices: response.choices.map(c => ({
|
|
||||||
index: c.index,
|
|
||||||
message: { role: c.message.role, content: c.message.content },
|
|
||||||
finish_reason: c.finish_reason,
|
|
||||||
})),
|
|
||||||
usage: {
|
|
||||||
prompt_tokens: response.usage.prompt_tokens,
|
|
||||||
completion_tokens: response.usage.completion_tokens,
|
|
||||||
total_tokens: response.usage.total_tokens,
|
|
||||||
cached_tokens: response.usage.cached_tokens,
|
|
||||||
},
|
|
||||||
// TokenVault extensions
|
|
||||||
tokenvault: {
|
|
||||||
ticket_id: ticket.id,
|
|
||||||
ticket_number: `TV-${String(ticket.ticket_number).padStart(5, '0')}`,
|
|
||||||
provider: response.provider,
|
|
||||||
cost_usd: ticket.cost_usd,
|
|
||||||
tokens_saved: ticket.tokens_saved,
|
|
||||||
latency_ms: response.latency_ms,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|||||||
@ -7,8 +7,8 @@ import { runRtkMigrations } from './db/rtk-migrate.js';
|
|||||||
import { closePool } from './db/client.js';
|
import { closePool } from './db/client.js';
|
||||||
import { initProviders } from './providers/index.js';
|
import { initProviders } from './providers/index.js';
|
||||||
import { healthRoutes } from './routes/health.js';
|
import { healthRoutes } from './routes/health.js';
|
||||||
import { proxyRoutes } from './routes/proxy.js';
|
|
||||||
import { anthropicProxyRoutes } from './routes/anthropic-proxy.js';
|
import { anthropicProxyRoutes } from './routes/anthropic-proxy.js';
|
||||||
|
import { openaiProxyRoutes } from './routes/openai-proxy.js';
|
||||||
import { ticketRoutes } from './routes/tickets.js';
|
import { ticketRoutes } from './routes/tickets.js';
|
||||||
import { rtkRoutes } from './routes/rtk.js';
|
import { rtkRoutes } from './routes/rtk.js';
|
||||||
|
|
||||||
@ -20,8 +20,8 @@ await app.register(cors, { origin: true });
|
|||||||
|
|
||||||
// ─── Routes ──────────────────────────────────────────────────────────────────
|
// ─── Routes ──────────────────────────────────────────────────────────────────
|
||||||
await app.register(healthRoutes);
|
await app.register(healthRoutes);
|
||||||
await app.register(proxyRoutes);
|
await app.register(anthropicProxyRoutes); // POST /v1/messages (Claude / Anthropic SDK)
|
||||||
await app.register(anthropicProxyRoutes);
|
await app.register(openaiProxyRoutes); // POST /v1/chat/completions (OpenAI SDK / ChatGPT API)
|
||||||
await app.register(ticketRoutes);
|
await app.register(ticketRoutes);
|
||||||
await app.register(rtkRoutes);
|
await app.register(rtkRoutes);
|
||||||
|
|
||||||
|
|||||||
@ -173,7 +173,7 @@ tr:hover { background:#f8fafc; }
|
|||||||
<div class="chart-row">
|
<div class="chart-row">
|
||||||
<div class="chart-placeholder" id="cost-timeline">Cost timeline will appear here after requests are tracked</div>
|
<div class="chart-placeholder" id="cost-timeline">Cost timeline will appear here after requests are tracked</div>
|
||||||
<div id="provider-split" style="background:var(--surface);border:1px solid var(--border);border-radius:12px;padding:24px;flex:1;min-height:180px">
|
<div id="provider-split" style="background:var(--surface);border:1px solid var(--border);border-radius:12px;padding:24px;flex:1;min-height:180px">
|
||||||
<div style="font-weight:600;margin-bottom:12px;color:var(--text-muted)">Provider Split</div>
|
<div style="font-weight:600;margin-bottom:12px;color:var(--text-muted)">Savings per AI / Pro KI gespart</div>
|
||||||
<div id="provider-split-inner" style="color:var(--text-muted);font-size:13px;text-align:center;margin-top:40px">Provider split will appear here after requests are tracked</div>
|
<div id="provider-split-inner" style="color:var(--text-muted);font-size:13px;text-align:center;margin-top:40px">Provider split will appear here after requests are tracked</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -363,21 +363,30 @@ async function loadStats() {
|
|||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Provider split (simple bar chart) ───────────────────────────────────
|
// ── Savings per AI (provider breakdown) ────────────────────────────────
|
||||||
try {
|
try {
|
||||||
const breakdown = await (await fetch(API + '/cost/breakdown?group_by=provider')).json();
|
const breakdown = await (await fetch(API + '/cost/breakdown?group_by=provider')).json();
|
||||||
const total = breakdown.reduce((s, b) => s + b.request_count, 0);
|
const totalTokens = breakdown.reduce((s, b) => s + b.tokens_in + b.tokens_out, 0);
|
||||||
if (total > 0) {
|
const AI_LABELS = { anthropic:'Claude (Anthropic)', openai:'ChatGPT (OpenAI)', ollama:'Ollama (Lokal)', 'ai-bridge':'AI-Bridge' };
|
||||||
|
const AI_EMOJI = { anthropic:'🟣', openai:'🟢', ollama:'🟡', 'ai-bridge':'🔵' };
|
||||||
const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' };
|
const colors = { anthropic:'#6366f1', openai:'#10b981', ollama:'#f59e0b', 'ai-bridge':'#3b82f6' };
|
||||||
|
if (breakdown.length > 0) {
|
||||||
document.getElementById('provider-split-inner').innerHTML = breakdown.map(b => {
|
document.getElementById('provider-split-inner').innerHTML = breakdown.map(b => {
|
||||||
const pct = ((b.request_count / total) * 100).toFixed(0);
|
const tok = b.tokens_in + b.tokens_out;
|
||||||
|
const pct = totalTokens > 0 ? ((tok / totalTokens) * 100).toFixed(0) : 0;
|
||||||
const col = colors[b.group_value] || '#94a3b8';
|
const col = colors[b.group_value] || '#94a3b8';
|
||||||
return `<div style="margin-bottom:10px">
|
const label = AI_LABELS[b.group_value] || b.group_value;
|
||||||
<div style="display:flex;justify-content:space-between;font-size:12px;margin-bottom:3px">
|
const emoji = AI_EMOJI[b.group_value] || '⚫';
|
||||||
<span style="font-weight:500">${b.group_value}</span><span style="color:var(--text-muted)">${b.request_count} req · ${pct}%</span>
|
return `<div style="margin-bottom:14px">
|
||||||
|
<div style="display:flex;justify-content:space-between;align-items:center;font-size:12px;margin-bottom:4px">
|
||||||
|
<span style="font-weight:600">${emoji} ${label}</span>
|
||||||
|
<span style="color:var(--text-muted)">${b.request_count} req · $${fmt(b.cost_usd)}</span>
|
||||||
</div>
|
</div>
|
||||||
<div style="background:var(--border);border-radius:4px;height:6px">
|
<div style="display:flex;gap:8px;font-size:11px;color:var(--text-muted);margin-bottom:4px">
|
||||||
<div style="background:${col};width:${pct}%;height:6px;border-radius:4px"></div>
|
<span>↑ ${fmtK(b.tokens_in)} in</span><span>↓ ${fmtK(b.tokens_out)} out</span><span style="color:#16a34a">💰 $${fmt(b.saved_usd)} saved</span>
|
||||||
|
</div>
|
||||||
|
<div style="background:var(--border);border-radius:4px;height:5px">
|
||||||
|
<div style="background:${col};width:${pct}%;height:5px;border-radius:4px"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>`;
|
</div>`;
|
||||||
}).join('');
|
}).join('');
|
||||||
|
|||||||
@ -63,6 +63,30 @@ app.get('/api/rtk/hosts', async () => {
|
|||||||
return res.json();
|
return res.json();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ─── OpenAI-compatible proxy (OPENAI_BASE_URL routing) ──────────────────────
|
||||||
|
// OpenAI SDK / ChatGPT API clients route through here when
|
||||||
|
// OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set.
|
||||||
|
app.post('/v1/chat/completions', async (req, reply) => {
|
||||||
|
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||||
|
for (const key of ['authorization', 'openai-organization', 'x-tokenvault-caller', 'x-tokenvault-project', 'x-tokenvault-team']) {
|
||||||
|
const val = req.headers[key];
|
||||||
|
if (val) headers[key] = val as string;
|
||||||
|
}
|
||||||
|
const res = await fetch(`${CORE_URL}/v1/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify(req.body),
|
||||||
|
});
|
||||||
|
reply.code(res.status);
|
||||||
|
reply.header('Content-Type', 'application/json');
|
||||||
|
// Forward TokenVault tracking headers
|
||||||
|
for (const h of ['x-tokenvault-ticket-id', 'x-tokenvault-ticket-number', 'x-tokenvault-cost-usd', 'x-tokenvault-tokens-saved']) {
|
||||||
|
const v = res.headers.get(h);
|
||||||
|
if (v) reply.header(h, v);
|
||||||
|
}
|
||||||
|
return res.json();
|
||||||
|
});
|
||||||
|
|
||||||
// ─── Anthropic API passthrough (ANTHROPIC_BASE_URL routing) ──────────────────
|
// ─── Anthropic API passthrough (ANTHROPIC_BASE_URL routing) ──────────────────
|
||||||
// Claude Code routes through here when ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org
|
// Claude Code routes through here when ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org
|
||||||
// Forward to core which handles tracking + proxying to Anthropic.
|
// Forward to core which handles tracking + proxying to Anthropic.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user