llm-gateway/packages/gateway/src/pipeline/external-providers.ts
Rene Fichtmueller 0191c60b64 chore: commit deployed gateway state (dashboard, streaming, routing, bridges, cost-tracking)
Live production state on Erik that had drifted from Gitea — deployed across several
sessions but never committed. Excludes deploy/ecosystem.config.cjs (holds live tokens).

- dashboard: passive usage-report endpoint, per-device entries, CEST timezone, cost-panel rounding
- completion: SSE + HTTP/2 streaming
- pipeline: routing-rules, request-scorer, external-providers (subscription bridges)
- cost-tracking: tokenvault migration, cost-calculator, request-logger
- infra: docker-compose bridge env, server/health/tls, deps
2026-06-05 20:23:33 +00:00

495 lines
17 KiB
TypeScript

import { logger } from '../observability/logger.js';
// ─── External Provider Configuration ────────────────────────────────
// Free LLM APIs for fallback when Ollama is unavailable.
// All use OpenAI-compatible /v1/chat/completions format.
// Source: github.com/mnfst/awesome-free-llm-apis (2026-04)
export interface ExternalProvider {
readonly name: string;
readonly baseUrl: string;
readonly envKey: string;
readonly models: readonly ExternalModel[];
readonly rateLimitRpm: number;
readonly enabled: boolean;
}
export interface ExternalModel {
readonly id: string;
readonly tier: 'fast' | 'medium' | 'large' | 'reasoning';
readonly contextLength: number;
}
export interface ExternalCompletionRequest {
readonly model: string;
readonly messages: readonly { readonly role: string; readonly content: string }[];
readonly temperature?: number;
readonly max_tokens?: number;
}
export interface ExternalCompletionResponse {
readonly response: string;
readonly model: string;
readonly provider: string;
readonly inputTokens: number;
readonly outputTokens: number;
readonly latencyMs: number;
}
// ─── Provider Registry ──────────────────────────────────────────────
const PROVIDERS: readonly ExternalProvider[] = [
{
name: 'claude-bridge',
baseUrl: '', // constructed from CLAUDE_BRIDGE_URL env var
envKey: 'CLAUDE_BRIDGE_URL',
rateLimitRpm: 100,
enabled: true,
models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
],
},
{
name: 'openai-bridge',
baseUrl: '', // constructed from OPENAI_BRIDGE_URL env var
envKey: 'OPENAI_BRIDGE_URL',
rateLimitRpm: 90,
enabled: true,
models: [
{ id: 'gpt-4-turbo', tier: 'reasoning', contextLength: 128000 },
{ id: 'gpt-4', tier: 'reasoning', contextLength: 8192 },
{ id: 'gpt-3.5-turbo', tier: 'fast', contextLength: 16384 },
],
},
{
name: 'chatgpt-bridge',
baseUrl: '', // constructed from CHATGPT_BRIDGE_URL env var (same as openai-bridge)
envKey: 'CHATGPT_BRIDGE_URL',
rateLimitRpm: 90,
enabled: true,
models: [
{ id: 'gpt-4-turbo', tier: 'reasoning', contextLength: 128000 },
{ id: 'gpt-4', tier: 'large', contextLength: 8192 },
{ id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 16384 },
],
},
{
name: 'copilot-bridge',
baseUrl: '', // constructed from COPILOT_BRIDGE_URL env var
envKey: 'COPILOT_BRIDGE_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'gpt-4', tier: 'reasoning', contextLength: 8192 },
{ id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 4096 },
],
},
{
name: 'm365-copilot-bridge',
baseUrl: '', // constructed from M365_COPILOT_BRIDGE_URL env var
envKey: 'M365_COPILOT_BRIDGE_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'microsoft-365-copilot', tier: 'reasoning', contextLength: 128000 },
{ id: 'm365-copilot-chat', tier: 'large', contextLength: 128000 },
],
},
{
name: 'cerebras',
baseUrl: 'https://api.cerebras.ai/v1',
envKey: 'CEREBRAS_API_KEY',
rateLimitRpm: 30,
enabled: true,
models: [
{ id: 'llama-3.3-70b', tier: 'large', contextLength: 8192 },
{ id: 'qwen3-235b', tier: 'reasoning', contextLength: 8192 },
],
},
{
name: 'groq',
baseUrl: 'https://api.groq.com/openai/v1',
envKey: 'GROQ_API_KEY',
rateLimitRpm: 30,
enabled: true,
models: [
{ id: 'llama-3.3-70b-versatile', tier: 'large', contextLength: 131072 },
{ id: 'llama-3.1-8b-instant', tier: 'fast', contextLength: 131072 },
{ id: 'gemma2-9b-it', tier: 'medium', contextLength: 8192 },
],
},
{
name: 'mistral',
baseUrl: 'https://api.mistral.ai/v1',
envKey: 'MISTRAL_API_KEY',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'mistral-large-latest', tier: 'reasoning', contextLength: 131072 },
{ id: 'mistral-small-latest', tier: 'medium', contextLength: 131072 },
{ id: 'ministral-8b-latest', tier: 'fast', contextLength: 131072 },
],
},
{
name: 'nvidia',
baseUrl: 'https://integrate.api.nvidia.com/v1',
envKey: 'NVIDIA_API_KEY',
rateLimitRpm: 40,
enabled: true,
models: [
{ id: 'meta/llama-3.3-70b-instruct', tier: 'large', contextLength: 131072 },
{ id: 'mistralai/mistral-large-2-instruct', tier: 'reasoning', contextLength: 131072 },
],
},
{
name: 'cloudflare',
baseUrl: '', // constructed dynamically from CLOUDFLARE_ACCOUNT_ID
envKey: 'CLOUDFLARE_AI_TOKEN',
rateLimitRpm: 100,
enabled: true,
models: [
{ id: '@cf/meta/llama-3.3-70b-instruct-fp8-fast', tier: 'large', contextLength: 8192 },
{ id: '@cf/qwen/qwen1.5-14b-chat-awq', tier: 'medium', contextLength: 32768 },
],
},
{
name: 'openai-codex',
baseUrl: 'https://api.openai.com/v1',
envKey: 'OPENAI_CODEX_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
{ id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
],
},
{
name: 'claude-code',
baseUrl: '', // constructed from CLAUDE_CODE_URL env var
envKey: 'CLAUDE_CODE_URL',
rateLimitRpm: 100,
enabled: true,
models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
],
},
{
name: 'codex',
baseUrl: 'https://api.github.com/copilot_inner/v2',
envKey: 'CODEX_BRIDGE_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
{ id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
],
},
];
const AUTHLESS_BRIDGE_PROVIDERS = new Set([
'claude-bridge',
'claude-code',
'openai-bridge',
'chatgpt-bridge',
'copilot-bridge',
'm365-copilot-bridge',
]);
const GENERATE_BRIDGE_PROVIDERS = new Set(['claude-bridge', 'claude-code']);
// ─── Rate Limiter (simple sliding window) ───────────────────────────
const requestTimestamps: Map<string, number[]> = new Map();
function isRateLimited(provider: ExternalProvider): boolean {
const now = Date.now();
const windowMs = 60_000;
const timestamps = requestTimestamps.get(provider.name) ?? [];
const recent = timestamps.filter((t) => now - t < windowMs);
requestTimestamps.set(provider.name, recent);
return recent.length >= provider.rateLimitRpm;
}
function recordRequest(providerName: string): void {
const timestamps = requestTimestamps.get(providerName) ?? [];
timestamps.push(Date.now());
requestTimestamps.set(providerName, timestamps);
}
// ─── Provider Resolution ────────────────────────────────────────────
function getApiKey(provider: ExternalProvider): string | undefined {
if (provider.name === 'claude-bridge') {
// claude-bridge doesn't use an API key; check if enabled and URL is set
const enabled = process.env['CLAUDE_BRIDGE_ENABLED'] === 'true';
const url = process.env['CLAUDE_BRIDGE_URL'];
return enabled && url ? 'claude-bridge-enabled' : undefined;
}
if (provider.name === 'claude-code') {
// claude-code uses Claude Code subscription bridge
const url = process.env['CLAUDE_CODE_URL'];
return url ? 'claude-code-enabled' : undefined;
}
if (provider.name === 'openai-bridge') {
// Subscription bridge auth is handled by the bridge process/CLI session.
const url = process.env['OPENAI_BRIDGE_URL'];
return url ? 'openai-bridge-enabled' : undefined;
}
if (provider.name === 'chatgpt-bridge') {
// ChatGPT Plus bridge can reuse the OpenAI bridge when configured that way.
const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'];
return url ? 'chatgpt-bridge-enabled' : undefined;
}
if (provider.name === 'copilot-bridge') {
// copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api).
const url = process.env['COPILOT_BRIDGE_URL'];
return url ? 'copilot-authenticated' : undefined;
}
if (provider.name === 'm365-copilot-bridge') {
// Microsoft 365 Copilot uses Microsoft Graph delegated auth inside the bridge.
const url = process.env['M365_COPILOT_BRIDGE_URL'];
return url ? 'm365-copilot-bridge-enabled' : undefined;
}
if (provider.name === 'openai-codex') {
const bridgeUrl = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
if (bridgeUrl) return 'openai-codex-bridge-enabled';
return process.env['OPENAI_API_KEY'] || undefined;
}
if (provider.name === 'codex') {
// Codex can run through an authless local/subscription bridge. A token remains supported as fallback.
const bridgeUrl = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
if (bridgeUrl) return 'codex-bridge-enabled';
const token = process.env['GITHUB_CODEX_TOKEN'];
return token ? token : undefined;
}
return process.env[provider.envKey] || undefined;
}
function getBaseUrl(provider: ExternalProvider): string {
if (provider.name === 'claude-bridge') {
const url = process.env['CLAUDE_BRIDGE_URL'];
return url ?? '';
}
if (provider.name === 'claude-code') {
const url = process.env['CLAUDE_CODE_URL'];
return url ?? '';
}
if (provider.name === 'openai-bridge') {
const url = process.env['OPENAI_BRIDGE_URL'];
return url ? `${url}/v1` : '';
}
if (provider.name === 'chatgpt-bridge') {
const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'];
return url ? `${url}/v1` : '';
}
if (provider.name === 'copilot-bridge') {
const url = process.env['COPILOT_BRIDGE_URL'];
return url ? `${url}/v1` : '';
}
if (provider.name === 'm365-copilot-bridge') {
const url = process.env['M365_COPILOT_BRIDGE_URL'];
return url ? `${url}/v1` : '';
}
if (provider.name === 'openai-codex') {
const url = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
return url ? `${url}/v1` : provider.baseUrl;
}
if (provider.name === 'codex') {
const url = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
return url ? `${url}/v1` : provider.baseUrl;
}
if (provider.name === 'cloudflare') {
const accountId = process.env['CLOUDFLARE_ACCOUNT_ID'];
if (!accountId) return '';
return `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/v1`;
}
return provider.baseUrl;
}
export function getAvailableProviders(): readonly ExternalProvider[] {
return PROVIDERS.filter((p) => p.enabled && getApiKey(p));
}
/** Returns ALL configured providers (enabled or not, with or without API key). For dashboard listing. */
export function getAllProviders(): readonly ExternalProvider[] {
return PROVIDERS;
}
function findBestModel(
provider: ExternalProvider,
targetTier: 'fast' | 'medium' | 'large' | 'reasoning',
): ExternalModel | undefined {
// Exact tier match first
const exact = provider.models.find((m) => m.tier === targetTier);
if (exact) return exact;
// Fallback: try higher tiers
const tierOrder: readonly string[] = ['fast', 'medium', 'large', 'reasoning'];
const targetIdx = tierOrder.indexOf(targetTier);
for (let i = targetIdx + 1; i < tierOrder.length; i++) {
const model = provider.models.find((m) => m.tier === tierOrder[i]);
if (model) return model;
}
// Last resort: any model
return provider.models[0];
}
// ─── OpenAI-Compatible Client ───────────────────────────────────────
function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> {
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
const usesAuthlessBridge = AUTHLESS_BRIDGE_PROVIDERS.has(provider.name)
|| (provider.name === 'openai-codex' && !!(process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL']))
|| (provider.name === 'codex' && !!(process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL']));
if (!usesAuthlessBridge) {
headers['Authorization'] = `Bearer ${apiKey}`;
}
return headers;
}
function buildRequestPayload(model: ExternalModel, request: ExternalCompletionRequest): Record<string, unknown> {
return {
model: model.id,
messages: request.messages,
temperature: request.temperature ?? 0.3,
max_tokens: request.max_tokens ?? 2048,
};
}
function buildGenerateBridgePayload(model: ExternalModel, request: ExternalCompletionRequest): Record<string, unknown> {
const system = request.messages.find((m) => m.role === 'system')?.content;
const prompt = request.messages
.filter((m) => m.role !== 'system')
.map((m) => `${m.role}: ${m.content}`)
.join('\n\n');
return {
model: model.id,
prompt,
system,
temperature: request.temperature ?? 0.3,
max_tokens: request.max_tokens ?? 2048,
};
}
function parseExternalResponse(
data: any,
model: ExternalModel,
provider: ExternalProvider,
start: number,
): ExternalCompletionResponse {
const content = data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? data.message?.content ?? '';
recordRequest(provider.name);
return {
response: content,
model: data.model ?? model.id,
provider: provider.name,
inputTokens: data.usage?.prompt_tokens ?? 0,
outputTokens: data.usage?.completion_tokens ?? 0,
latencyMs: Date.now() - start,
};
}
async function callProvider(
provider: ExternalProvider,
model: ExternalModel,
request: ExternalCompletionRequest,
timeoutMs: number,
): Promise<ExternalCompletionResponse> {
const apiKey = getApiKey(provider);
if (!apiKey) throw new Error(`No API key for ${provider.name}`);
const baseUrl = getBaseUrl(provider);
if (!baseUrl) throw new Error(`No base URL for ${provider.name}`);
const generateBridge = GENERATE_BRIDGE_PROVIDERS.has(provider.name);
const url = generateBridge ? `${baseUrl}/api/generate` : `${baseUrl}/chat/completions`;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
const start = Date.now();
try {
const headers = buildRequestHeaders(provider, apiKey);
const payload = generateBridge ? buildGenerateBridgePayload(model, request) : buildRequestPayload(model, request);
const response = await fetch(url, {
method: 'POST',
headers,
body: JSON.stringify(payload),
signal: controller.signal,
});
if (!response.ok) {
const body = await response.text();
throw new Error(`${provider.name} HTTP ${response.status}: ${body.slice(0, 200)}`);
}
const data = await response.json();
return parseExternalResponse(data, model, provider, start);
} finally {
clearTimeout(timer);
}
}
// ─── External Fallback Chain ────────────────────────────────────────
export async function callExternalFallback(
request: ExternalCompletionRequest,
targetTier: 'fast' | 'medium' | 'large' | 'reasoning',
timeoutMs: number = 30_000,
): Promise<ExternalCompletionResponse> {
const available = getAvailableProviders();
if (available.length === 0) {
throw new Error('No external providers configured (missing API keys)');
}
const errors: string[] = [];
for (const provider of available) {
if (isRateLimited(provider)) {
logger.debug({ provider: provider.name }, 'External provider rate-limited, skipping');
continue;
}
const model = findBestModel(provider, targetTier);
if (!model) continue;
try {
logger.info(
{ provider: provider.name, model: model.id, tier: targetTier },
'Calling external provider fallback',
);
const result = await callProvider(provider, model, request, timeoutMs);
logger.info(
{
provider: result.provider,
model: result.model,
latencyMs: result.latencyMs,
tokens: result.inputTokens + result.outputTokens,
},
'External provider fallback succeeded',
);
return result;
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
errors.push(`${provider.name}: ${msg}`);
logger.warn({ provider: provider.name, err: msg }, 'External provider failed, trying next');
}
}
throw new Error(`All external providers failed: ${errors.join('; ')}`);
}