feat: TokenVault MVP — hybrid MCP + proxy for LLM token savings
4-package monorepo: - @tokenvault/core: Fastify 5.x proxy server, 7-stage pipeline, 3 provider adapters (Anthropic, OpenAI, Ollama), PostgreSQL ticket system, cost calculator with real provider pricing - @tokenvault/mcp: MCP server (stdio) with tv_ticket, tv_cost, tv_health tools for IDE integration - @tokenvault/client: TypeScript SDK with createTokenVaultClient() - @tokenvault/dashboard: Single-file HTML dashboard with MAGATAMA CI style (indigo #6366f1), bilingual DE+EN, 4 tabs OpenAI-compatible proxy at /v1/chat/completions — drop-in replacement. Every LLM request becomes a trackable ticket (TV-00001).
This commit is contained in:
commit
d43b9f5298
11
.gitignore
vendored
Normal file
11
.gitignore
vendored
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
node_modules/
|
||||||
|
dist/
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
.dev.vars
|
||||||
|
wrangler.toml
|
||||||
|
*.local
|
||||||
|
*.log
|
||||||
|
.DS_Store
|
||||||
|
coverage/
|
||||||
|
.turbo/
|
||||||
1
.pnpmconfig.json
Normal file
1
.pnpmconfig.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"onlyBuiltDependenciesFile": "","ignoredBuiltDependencies": [],"onlyBuiltDependencies": ["esbuild"]}
|
||||||
19
LICENSE
Normal file
19
LICENSE
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
Copyright 2026 Context X (context-x.org)
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
41
deploy.sh
Executable file
41
deploy.sh
Executable file
@ -0,0 +1,41 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REMOTE="root@82.165.222.127"
|
||||||
|
REMOTE_DIR="/opt/tokenvault"
|
||||||
|
LOCAL_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
|
||||||
|
echo "=== TokenVault Deploy ==="
|
||||||
|
|
||||||
|
# 1. Build locally
|
||||||
|
echo "[1/5] Building..."
|
||||||
|
cd "$LOCAL_DIR"
|
||||||
|
pnpm build
|
||||||
|
|
||||||
|
# 2. Sync to Erik
|
||||||
|
echo "[2/5] Syncing to Erik..."
|
||||||
|
rsync -avz --delete \
|
||||||
|
--exclude='node_modules' \
|
||||||
|
--exclude='.env' \
|
||||||
|
--exclude='.git' \
|
||||||
|
--exclude='coverage' \
|
||||||
|
"$LOCAL_DIR/" "$REMOTE:$REMOTE_DIR/"
|
||||||
|
|
||||||
|
# 3. Install dependencies on Erik
|
||||||
|
echo "[3/5] Installing dependencies..."
|
||||||
|
ssh "$REMOTE" "cd $REMOTE_DIR && pnpm install --prod --frozen-lockfile 2>/dev/null || npm install --omit=dev"
|
||||||
|
|
||||||
|
# 4. Create DB if needed
|
||||||
|
echo "[4/5] Setting up database..."
|
||||||
|
ssh "$REMOTE" "sudo -u postgres psql -tc \"SELECT 1 FROM pg_roles WHERE rolname='tokenvault'\" | grep -q 1 || sudo -u postgres createuser tokenvault"
|
||||||
|
ssh "$REMOTE" "sudo -u postgres psql -tc \"SELECT 1 FROM pg_database WHERE datname='tokenvault'\" | grep -q 1 || sudo -u postgres createdb -O tokenvault tokenvault"
|
||||||
|
ssh "$REMOTE" "mkdir -p $REMOTE_DIR/logs"
|
||||||
|
|
||||||
|
# 5. Start/restart PM2
|
||||||
|
echo "[5/5] Starting PM2 processes..."
|
||||||
|
ssh "$REMOTE" "cd $REMOTE_DIR && pm2 startOrRestart ecosystem.config.cjs && pm2 save"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Deploy complete ==="
|
||||||
|
echo "Core: http://localhost:3300/health"
|
||||||
|
echo "Dashboard: http://localhost:3301"
|
||||||
50
ecosystem.config.cjs
Normal file
50
ecosystem.config.cjs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
module.exports = {
|
||||||
|
apps: [
|
||||||
|
{
|
||||||
|
name: 'tokenvault',
|
||||||
|
script: 'packages/core/dist/server.js',
|
||||||
|
cwd: '/opt/tokenvault',
|
||||||
|
env: {
|
||||||
|
NODE_ENV: 'production',
|
||||||
|
TOKENVAULT_PORT: '3300',
|
||||||
|
TOKENVAULT_HOST: '0.0.0.0',
|
||||||
|
DB_HOST: '127.0.0.1',
|
||||||
|
DB_PORT: '5432',
|
||||||
|
DB_NAME: 'tokenvault',
|
||||||
|
DB_USER: 'tokenvault',
|
||||||
|
QDRANT_URL: 'http://localhost:6333',
|
||||||
|
OLLAMA_URL: 'https://ollama.fichtmueller.org',
|
||||||
|
},
|
||||||
|
instances: 1,
|
||||||
|
exec_mode: 'fork',
|
||||||
|
max_memory_restart: '512M',
|
||||||
|
error_file: '/opt/tokenvault/logs/error.log',
|
||||||
|
out_file: '/opt/tokenvault/logs/out.log',
|
||||||
|
merge_logs: true,
|
||||||
|
time: true,
|
||||||
|
watch: false,
|
||||||
|
autorestart: true,
|
||||||
|
max_restarts: 10,
|
||||||
|
restart_delay: 5000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'tokenvault-dashboard',
|
||||||
|
script: 'packages/dashboard/dist/server.js',
|
||||||
|
cwd: '/opt/tokenvault',
|
||||||
|
env: {
|
||||||
|
NODE_ENV: 'production',
|
||||||
|
PORT: '3301',
|
||||||
|
TOKENVAULT_CORE_URL: 'http://localhost:3300',
|
||||||
|
},
|
||||||
|
instances: 1,
|
||||||
|
exec_mode: 'fork',
|
||||||
|
max_memory_restart: '256M',
|
||||||
|
error_file: '/opt/tokenvault/logs/dashboard-error.log',
|
||||||
|
out_file: '/opt/tokenvault/logs/dashboard-out.log',
|
||||||
|
merge_logs: true,
|
||||||
|
time: true,
|
||||||
|
watch: false,
|
||||||
|
autorestart: true,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
19
package.json
Normal file
19
package.json
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"name": "tokenvault",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"description": "Hybrid MCP + Proxy platform for LLM token savings, multi-provider routing, and ticket-based cost tracking",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"author": "Context X <rf@context-x.org>",
|
||||||
|
"scripts": {
|
||||||
|
"build": "pnpm -r build",
|
||||||
|
"dev": "pnpm -r --parallel dev",
|
||||||
|
"test": "pnpm -r test",
|
||||||
|
"lint": "pnpm -r lint",
|
||||||
|
"clean": "pnpm -r clean"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.0.0",
|
||||||
|
"pnpm": ">=9.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
31
packages/client/package.json
Normal file
31
packages/client/package.json
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
{
|
||||||
|
"name": "@tokenvault/client",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"description": "TokenVault TypeScript SDK — wrap LLM provider SDKs with auto-tracking",
|
||||||
|
"type": "module",
|
||||||
|
"main": "dist/index.js",
|
||||||
|
"types": "dist/index.d.ts",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"exports": {
|
||||||
|
".": {
|
||||||
|
"import": "./dist/index.js",
|
||||||
|
"types": "./dist/index.d.ts"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsup",
|
||||||
|
"dev": "tsx watch src/index.ts",
|
||||||
|
"test": "vitest run",
|
||||||
|
"clean": "rm -rf dist"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"zod": "^3.24.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"tsup": "^8.4.0",
|
||||||
|
"tsx": "^4.19.0",
|
||||||
|
"typescript": "^5.7.0",
|
||||||
|
"vitest": "^3.1.0",
|
||||||
|
"@types/node": "^22.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
119
packages/client/src/index.ts
Normal file
119
packages/client/src/index.ts
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
export interface TokenVaultConfig {
|
||||||
|
readonly baseUrl: string;
|
||||||
|
readonly caller?: string;
|
||||||
|
readonly project?: string;
|
||||||
|
readonly team?: string;
|
||||||
|
readonly timeout?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CompletionRequest {
|
||||||
|
readonly model: string;
|
||||||
|
readonly messages: Array<{ role: string; content: string }>;
|
||||||
|
readonly temperature?: number;
|
||||||
|
readonly max_tokens?: number;
|
||||||
|
readonly caller?: string;
|
||||||
|
readonly project?: string;
|
||||||
|
readonly team?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CompletionResponse {
|
||||||
|
readonly id: string;
|
||||||
|
readonly model: string;
|
||||||
|
readonly choices: Array<{
|
||||||
|
readonly index: number;
|
||||||
|
readonly message: { readonly role: string; readonly content: string };
|
||||||
|
readonly finish_reason: string;
|
||||||
|
}>;
|
||||||
|
readonly usage: {
|
||||||
|
readonly prompt_tokens: number;
|
||||||
|
readonly completion_tokens: number;
|
||||||
|
readonly total_tokens: number;
|
||||||
|
readonly cached_tokens?: number;
|
||||||
|
};
|
||||||
|
readonly tokenvault?: {
|
||||||
|
readonly ticket_id: string;
|
||||||
|
readonly ticket_number: string;
|
||||||
|
readonly provider: string;
|
||||||
|
readonly cost_usd: number;
|
||||||
|
readonly tokens_saved: number;
|
||||||
|
readonly latency_ms: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CostSummary {
|
||||||
|
readonly period: string;
|
||||||
|
readonly total_cost_usd: number;
|
||||||
|
readonly total_saved_usd: number;
|
||||||
|
readonly total_tokens_in: number;
|
||||||
|
readonly total_tokens_out: number;
|
||||||
|
readonly total_requests: number;
|
||||||
|
readonly cache_hit_rate: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class TokenVaultClient {
|
||||||
|
private readonly baseUrl: string;
|
||||||
|
private readonly defaults: { caller?: string; project?: string; team?: string };
|
||||||
|
private readonly timeout: number;
|
||||||
|
|
||||||
|
constructor(config: TokenVaultConfig) {
|
||||||
|
this.baseUrl = config.baseUrl.replace(/\/$/, '');
|
||||||
|
this.defaults = { caller: config.caller, project: config.project, team: config.team };
|
||||||
|
this.timeout = config.timeout ?? 30_000;
|
||||||
|
}
|
||||||
|
|
||||||
|
async completion(request: CompletionRequest): Promise<CompletionResponse> {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timer = setTimeout(() => controller.abort(), this.timeout);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
...request,
|
||||||
|
caller: request.caller ?? this.defaults.caller,
|
||||||
|
project: request.project ?? this.defaults.project,
|
||||||
|
team: request.team ?? this.defaults.team,
|
||||||
|
}),
|
||||||
|
signal: controller.signal,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const errText = await res.text();
|
||||||
|
throw new Error(`TokenVault error ${res.status}: ${errText.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.json() as Promise<CompletionResponse>;
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async safeCompletion(request: CompletionRequest): Promise<CompletionResponse | null> {
|
||||||
|
try {
|
||||||
|
return await this.completion(request);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async health(): Promise<Record<string, unknown>> {
|
||||||
|
const res = await fetch(`${this.baseUrl}/health`);
|
||||||
|
return res.json() as Promise<Record<string, unknown>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
async cost(period: string = 'month'): Promise<CostSummary> {
|
||||||
|
const res = await fetch(`${this.baseUrl}/v1/cost?period=${period}`);
|
||||||
|
return res.json() as Promise<CostSummary>;
|
||||||
|
}
|
||||||
|
|
||||||
|
async tickets(params?: Record<string, string>): Promise<unknown> {
|
||||||
|
const qs = params ? `?${new URLSearchParams(params)}` : '';
|
||||||
|
const res = await fetch(`${this.baseUrl}/v1/tickets${qs}`);
|
||||||
|
return res.json();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createTokenVaultClient(config: TokenVaultConfig): TokenVaultClient {
|
||||||
|
return new TokenVaultClient(config);
|
||||||
|
}
|
||||||
8
packages/client/tsconfig.json
Normal file
8
packages/client/tsconfig.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src"
|
||||||
|
},
|
||||||
|
"include": ["src"]
|
||||||
|
}
|
||||||
11
packages/client/tsup.config.ts
Normal file
11
packages/client/tsup.config.ts
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
import { defineConfig } from 'tsup';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
entry: ['src/index.ts'],
|
||||||
|
format: ['esm'],
|
||||||
|
target: 'node20',
|
||||||
|
outDir: 'dist',
|
||||||
|
clean: true,
|
||||||
|
sourcemap: true,
|
||||||
|
dts: true,
|
||||||
|
});
|
||||||
32
packages/core/package.json
Normal file
32
packages/core/package.json
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"name": "@tokenvault/core",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"description": "TokenVault central proxy server — multi-provider routing, ticket tracking, semantic caching",
|
||||||
|
"type": "module",
|
||||||
|
"main": "dist/server.js",
|
||||||
|
"types": "dist/server.d.ts",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsup",
|
||||||
|
"dev": "tsx watch src/server.ts",
|
||||||
|
"test": "vitest run",
|
||||||
|
"test:watch": "vitest",
|
||||||
|
"clean": "rm -rf dist"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"fastify": "^5.3.0",
|
||||||
|
"@fastify/cors": "^11.0.0",
|
||||||
|
"pg": "^8.13.0",
|
||||||
|
"pino": "^9.6.0",
|
||||||
|
"zod": "^3.24.0",
|
||||||
|
"nanoid": "^5.1.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"tsup": "^8.4.0",
|
||||||
|
"tsx": "^4.19.0",
|
||||||
|
"typescript": "^5.7.0",
|
||||||
|
"vitest": "^3.1.0",
|
||||||
|
"@types/node": "^22.0.0",
|
||||||
|
"@types/pg": "^8.11.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
28
packages/core/src/config.ts
Normal file
28
packages/core/src/config.ts
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
export const config = {
|
||||||
|
port: parseInt(process.env['TOKENVAULT_PORT'] ?? '3300', 10),
|
||||||
|
host: process.env['TOKENVAULT_HOST'] ?? '0.0.0.0',
|
||||||
|
nodeEnv: process.env['NODE_ENV'] ?? 'development',
|
||||||
|
|
||||||
|
db: {
|
||||||
|
host: process.env['DB_HOST'] ?? '127.0.0.1',
|
||||||
|
port: parseInt(process.env['DB_PORT'] ?? '5432', 10),
|
||||||
|
name: process.env['DB_NAME'] ?? 'tokenvault',
|
||||||
|
user: process.env['DB_USER'] ?? 'tokenvault',
|
||||||
|
password: process.env['DB_PASSWORD'] ?? '',
|
||||||
|
},
|
||||||
|
|
||||||
|
qdrant: {
|
||||||
|
url: process.env['QDRANT_URL'] ?? 'http://localhost:6333',
|
||||||
|
collection: process.env['QDRANT_COLLECTION'] ?? 'tokenvault_cache',
|
||||||
|
},
|
||||||
|
|
||||||
|
providers: {
|
||||||
|
anthropic: { apiKey: process.env['ANTHROPIC_API_KEY'] ?? '' },
|
||||||
|
openai: { apiKey: process.env['OPENAI_API_KEY'] ?? '' },
|
||||||
|
google: { apiKey: process.env['GOOGLE_AI_API_KEY'] ?? '' },
|
||||||
|
mistral: { apiKey: process.env['MISTRAL_API_KEY'] ?? '' },
|
||||||
|
groq: { apiKey: process.env['GROQ_API_KEY'] ?? '' },
|
||||||
|
cerebras: { apiKey: process.env['CEREBRAS_API_KEY'] ?? '' },
|
||||||
|
ollama: { url: process.env['OLLAMA_URL'] ?? 'http://localhost:11434' },
|
||||||
|
},
|
||||||
|
} as const;
|
||||||
37
packages/core/src/db/client.ts
Normal file
37
packages/core/src/db/client.ts
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import pg from 'pg';
|
||||||
|
import { config } from '../config.js';
|
||||||
|
import { logger } from '../observability/logger.js';
|
||||||
|
|
||||||
|
const { Pool } = pg;
|
||||||
|
|
||||||
|
const pool = new Pool({
|
||||||
|
host: config.db.host,
|
||||||
|
port: config.db.port,
|
||||||
|
database: config.db.name,
|
||||||
|
user: config.db.user,
|
||||||
|
password: config.db.password,
|
||||||
|
max: 20,
|
||||||
|
idleTimeoutMillis: 30_000,
|
||||||
|
connectionTimeoutMillis: 5_000,
|
||||||
|
});
|
||||||
|
|
||||||
|
pool.on('error', (err) => {
|
||||||
|
logger.error({ err }, 'Unexpected PostgreSQL pool error');
|
||||||
|
});
|
||||||
|
|
||||||
|
export async function query<T extends pg.QueryResultRow>(
|
||||||
|
text: string,
|
||||||
|
params?: readonly unknown[],
|
||||||
|
): Promise<pg.QueryResult<T>> {
|
||||||
|
const start = Date.now();
|
||||||
|
const result = await pool.query<T>(text, params as unknown[]);
|
||||||
|
const latency = Date.now() - start;
|
||||||
|
if (latency > 500) {
|
||||||
|
logger.warn({ latency, query: text.slice(0, 80) }, 'Slow query detected');
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function closePool(): Promise<void> {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
125
packages/core/src/db/migrate.ts
Normal file
125
packages/core/src/db/migrate.ts
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
import { query } from './client.js';
|
||||||
|
import { logger } from '../observability/logger.js';
|
||||||
|
|
||||||
|
const MIGRATIONS = [
|
||||||
|
// ─── tickets (core table, TimescaleDB hypertable) ─────────────────────
|
||||||
|
`CREATE TABLE IF NOT EXISTS tickets (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
ticket_number BIGSERIAL,
|
||||||
|
session_id UUID,
|
||||||
|
provider TEXT NOT NULL,
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL DEFAULT 'completed',
|
||||||
|
tokens_in INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tokens_out INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tokens_cached INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tokens_saved INTEGER NOT NULL DEFAULT 0,
|
||||||
|
cost_usd NUMERIC(12,8) NOT NULL DEFAULT 0,
|
||||||
|
cost_saved_usd NUMERIC(12,8) NOT NULL DEFAULT 0,
|
||||||
|
latency_ms INTEGER NOT NULL DEFAULT 0,
|
||||||
|
cache_hit BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
compression_mode TEXT,
|
||||||
|
compression_ratio NUMERIC(5,2),
|
||||||
|
caller TEXT,
|
||||||
|
project TEXT,
|
||||||
|
team TEXT,
|
||||||
|
input_hash TEXT,
|
||||||
|
output_hash TEXT,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
)`,
|
||||||
|
|
||||||
|
// ─── sessions ─────────────────────────────────────────────────────────
|
||||||
|
`CREATE TABLE IF NOT EXISTS sessions (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
caller TEXT NOT NULL,
|
||||||
|
project TEXT,
|
||||||
|
team TEXT,
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
ended_at TIMESTAMPTZ,
|
||||||
|
ticket_count INTEGER DEFAULT 0,
|
||||||
|
total_cost NUMERIC(12,8) DEFAULT 0,
|
||||||
|
total_tokens_saved INTEGER DEFAULT 0,
|
||||||
|
metadata JSONB DEFAULT '{}'
|
||||||
|
)`,
|
||||||
|
|
||||||
|
// ─── audit_log (immutable, append-only) ───────────────────────────────
|
||||||
|
`CREATE TABLE IF NOT EXISTS audit_log (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
actor TEXT NOT NULL,
|
||||||
|
action TEXT NOT NULL,
|
||||||
|
resource TEXT NOT NULL,
|
||||||
|
details JSONB DEFAULT '{}',
|
||||||
|
ip_address TEXT,
|
||||||
|
timestamp TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
)`,
|
||||||
|
|
||||||
|
// ─── provider_pricing ─────────────────────────────────────────────────
|
||||||
|
`CREATE TABLE IF NOT EXISTS provider_pricing (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
provider TEXT NOT NULL,
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
input_per_mtok NUMERIC(10,6) NOT NULL,
|
||||||
|
output_per_mtok NUMERIC(10,6) NOT NULL,
|
||||||
|
cached_per_mtok NUMERIC(10,6),
|
||||||
|
effective_from DATE NOT NULL DEFAULT CURRENT_DATE,
|
||||||
|
UNIQUE(provider, model, effective_from)
|
||||||
|
)`,
|
||||||
|
|
||||||
|
// ─── budgets ──────────────────────────────────────────────────────────
|
||||||
|
`CREATE TABLE IF NOT EXISTS budgets (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
scope_type TEXT NOT NULL,
|
||||||
|
scope_value TEXT NOT NULL,
|
||||||
|
limit_usd NUMERIC(12,4) NOT NULL,
|
||||||
|
period TEXT NOT NULL DEFAULT 'monthly',
|
||||||
|
current_spend NUMERIC(12,8) DEFAULT 0,
|
||||||
|
alert_threshold NUMERIC(3,2) DEFAULT 0.80,
|
||||||
|
hard_limit BOOLEAN DEFAULT false,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
)`,
|
||||||
|
|
||||||
|
// ─── Indexes ──────────────────────────────────────────────────────────
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_tickets_session ON tickets(session_id)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_tickets_provider ON tickets(provider)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_tickets_project ON tickets(project)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_tickets_status ON tickets(status)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_tickets_created ON tickets(created_at DESC)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_tickets_caller ON tickets(caller)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_log(timestamp DESC)`,
|
||||||
|
|
||||||
|
// ─── Seed: Provider Pricing (April 2026) ──────────────────────────────
|
||||||
|
`INSERT INTO provider_pricing (provider, model, input_per_mtok, output_per_mtok, cached_per_mtok, effective_from)
|
||||||
|
VALUES
|
||||||
|
('anthropic', 'claude-opus-4-20250514', 15.0, 75.0, 1.50, '2026-01-01'),
|
||||||
|
('anthropic', 'claude-sonnet-4-20250514', 3.0, 15.0, 0.30, '2026-01-01'),
|
||||||
|
('anthropic', 'claude-haiku-3-20250630', 0.25, 1.25, 0.025, '2026-01-01'),
|
||||||
|
('openai', 'gpt-4o', 2.50, 10.0, 1.25, '2026-01-01'),
|
||||||
|
('openai', 'gpt-4o-mini', 0.15, 0.60, 0.075, '2026-01-01'),
|
||||||
|
('openai', 'o1', 15.0, 60.0, NULL, '2026-01-01'),
|
||||||
|
('google', 'gemini-2.0-flash', 0.10, 0.40, 0.025, '2026-01-01'),
|
||||||
|
('google', 'gemini-2.5-pro', 1.25, 10.0, 0.315, '2026-01-01'),
|
||||||
|
('mistral', 'mistral-large', 2.0, 6.0, NULL, '2026-01-01'),
|
||||||
|
('mistral', 'mistral-small', 0.10, 0.30, NULL, '2026-01-01'),
|
||||||
|
('groq', 'llama-3.3-70b', 0.59, 0.79, NULL, '2026-01-01'),
|
||||||
|
('cerebras', 'llama-3.3-70b', 0.85, 1.20, NULL, '2026-01-01'),
|
||||||
|
('ollama', 'qwen2.5:14b', 0.0, 0.0, NULL, '2026-01-01'),
|
||||||
|
('ollama', 'llama3.3:70b', 0.0, 0.0, NULL, '2026-01-01')
|
||||||
|
ON CONFLICT (provider, model, effective_from) DO NOTHING`,
|
||||||
|
];
|
||||||
|
|
||||||
|
export async function runMigrations(): Promise<void> {
|
||||||
|
logger.info('Running TokenVault DB migrations...');
|
||||||
|
for (const sql of MIGRATIONS) {
|
||||||
|
try {
|
||||||
|
await query(sql);
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
if (!msg.includes('already exists')) {
|
||||||
|
logger.error({ err, sql: sql.slice(0, 60) }, 'Migration failed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.info('TokenVault migrations complete');
|
||||||
|
}
|
||||||
10
packages/core/src/observability/logger.ts
Normal file
10
packages/core/src/observability/logger.ts
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
import pino from 'pino';
|
||||||
|
import { config } from '../config.js';
|
||||||
|
|
||||||
|
export const logger = pino({
|
||||||
|
name: 'tokenvault',
|
||||||
|
level: config.nodeEnv === 'production' ? 'info' : 'debug',
|
||||||
|
transport: config.nodeEnv === 'development'
|
||||||
|
? { target: 'pino-pretty', options: { colorize: true } }
|
||||||
|
: undefined,
|
||||||
|
});
|
||||||
119
packages/core/src/pipeline/index.ts
Normal file
119
packages/core/src/pipeline/index.ts
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
import { randomUUID } from 'node:crypto';
|
||||||
|
import { logger } from '../observability/logger.js';
|
||||||
|
import { resolveProvider } from '../providers/index.js';
|
||||||
|
import { createTicket, hashContent } from '../tickets/ticket-service.js';
|
||||||
|
import type { ChatRequest, ChatResponse, PipelineContext, ProviderName } from '../types.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TokenVault 7-Stage Pipeline
|
||||||
|
*
|
||||||
|
* 1. Validate — Schema check, RBAC, budget
|
||||||
|
* 2. Compress — AST-based input compression (Phase 2)
|
||||||
|
* 3. Cache — Semantic cache lookup (Phase 2)
|
||||||
|
* 4. Route — Select provider + model
|
||||||
|
* 5. Execute — Call provider API
|
||||||
|
* 6. Track — Create ticket, update budget
|
||||||
|
* 7. Respond — Return response with cost headers
|
||||||
|
*/
|
||||||
|
export async function executePipeline(request: ChatRequest): Promise<{
|
||||||
|
response: ChatResponse;
|
||||||
|
ticket: { id: string; ticket_number: number; cost_usd: number; tokens_saved: number };
|
||||||
|
}> {
|
||||||
|
const ctx: PipelineContext = {
|
||||||
|
requestId: randomUUID(),
|
||||||
|
startTime: Date.now(),
|
||||||
|
request,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ─── Stage 1: Validate ────────────────────────────────────────────
|
||||||
|
if (!request.model || !request.messages?.length) {
|
||||||
|
throw new Error('Invalid request: model and messages required');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Stage 2: Compress (Phase 2 — placeholder) ────────────────────
|
||||||
|
// Future: AST-based compression of code content in messages
|
||||||
|
|
||||||
|
// ─── Stage 3: Cache Check (Phase 2 — placeholder) ─────────────────
|
||||||
|
// Future: Qdrant semantic similarity lookup
|
||||||
|
|
||||||
|
// ─── Stage 4: Route ───────────────────────────────────────────────
|
||||||
|
const resolved = resolveProvider(request.model);
|
||||||
|
if (!resolved) {
|
||||||
|
throw new Error(`No configured provider found for model: ${request.model}`);
|
||||||
|
}
|
||||||
|
ctx.resolvedProvider = resolved.provider;
|
||||||
|
ctx.resolvedModel = request.model;
|
||||||
|
|
||||||
|
logger.debug({ requestId: ctx.requestId, provider: resolved.provider, model: request.model }, 'Routed request');
|
||||||
|
|
||||||
|
// ─── Stage 5: Execute ─────────────────────────────────────────────
|
||||||
|
const response = await resolved.adapter.chat(request);
|
||||||
|
ctx.response = response;
|
||||||
|
|
||||||
|
// ─── Stage 6: Track ───────────────────────────────────────────────
|
||||||
|
const inputText = request.messages.map(m => m.content).join('\n');
|
||||||
|
const outputText = response.choices.map(c => c.message.content).join('\n');
|
||||||
|
const cost = resolved.adapter.calculateCost(
|
||||||
|
response.usage.prompt_tokens,
|
||||||
|
response.usage.completion_tokens,
|
||||||
|
response.usage.cached_tokens ?? 0,
|
||||||
|
response.model,
|
||||||
|
);
|
||||||
|
|
||||||
|
const ticket = await createTicket({
|
||||||
|
provider: resolved.provider,
|
||||||
|
model: response.model,
|
||||||
|
status: 'completed',
|
||||||
|
tokens_in: response.usage.prompt_tokens,
|
||||||
|
tokens_out: response.usage.completion_tokens,
|
||||||
|
tokens_cached: response.usage.cached_tokens ?? 0,
|
||||||
|
tokens_saved: ctx.tokensSaved ?? 0,
|
||||||
|
cost_usd: cost,
|
||||||
|
cost_saved_usd: 0,
|
||||||
|
latency_ms: response.latency_ms,
|
||||||
|
cache_hit: false,
|
||||||
|
compression_mode: ctx.compressionMode,
|
||||||
|
compression_ratio: ctx.compressionRatio,
|
||||||
|
caller: request.caller,
|
||||||
|
project: request.project,
|
||||||
|
team: request.team,
|
||||||
|
input_hash: hashContent(inputText),
|
||||||
|
output_hash: hashContent(outputText),
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── Stage 7: Respond ─────────────────────────────────────────────
|
||||||
|
return {
|
||||||
|
response,
|
||||||
|
ticket: {
|
||||||
|
id: ticket.id,
|
||||||
|
ticket_number: ticket.ticket_number,
|
||||||
|
cost_usd: cost,
|
||||||
|
tokens_saved: ctx.tokensSaved ?? 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
||||||
|
logger.error({ requestId: ctx.requestId, error: errorMsg }, 'Pipeline failed');
|
||||||
|
|
||||||
|
// Track failed request as ticket
|
||||||
|
try {
|
||||||
|
await createTicket({
|
||||||
|
provider: (ctx.resolvedProvider ?? 'ollama') as ProviderName,
|
||||||
|
model: request.model,
|
||||||
|
status: 'failed',
|
||||||
|
tokens_in: 0,
|
||||||
|
tokens_out: 0,
|
||||||
|
cost_usd: 0,
|
||||||
|
latency_ms: Date.now() - ctx.startTime,
|
||||||
|
caller: request.caller,
|
||||||
|
project: request.project,
|
||||||
|
team: request.team,
|
||||||
|
input_hash: hashContent(request.messages.map(m => m.content).join('\n')),
|
||||||
|
output_hash: '',
|
||||||
|
});
|
||||||
|
} catch { /* best effort */ }
|
||||||
|
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
275
packages/core/src/providers/index.ts
Normal file
275
packages/core/src/providers/index.ts
Normal file
@ -0,0 +1,275 @@
|
|||||||
|
import { config } from '../config.js';
|
||||||
|
import { logger } from '../observability/logger.js';
|
||||||
|
import type { ProviderAdapter, ProviderName, ChatRequest, ChatResponse, ProviderModel } from '../types.js';
|
||||||
|
import { createHash, randomUUID } from 'node:crypto';
|
||||||
|
|
||||||
|
// ─── Provider Pricing (hardcoded MVP, later from DB) ────────────────────────
|
||||||
|
|
||||||
|
const PRICING: Record<string, Record<string, { input: number; output: number; cached?: number }>> = {
|
||||||
|
anthropic: {
|
||||||
|
'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 },
|
||||||
|
'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 },
|
||||||
|
'claude-haiku-3-20250630': { input: 0.25, output: 1.25, cached: 0.025 },
|
||||||
|
},
|
||||||
|
openai: {
|
||||||
|
'gpt-4o': { input: 2.50, output: 10.0, cached: 1.25 },
|
||||||
|
'gpt-4o-mini': { input: 0.15, output: 0.60, cached: 0.075 },
|
||||||
|
'o1': { input: 15.0, output: 60.0 },
|
||||||
|
},
|
||||||
|
ollama: {},
|
||||||
|
};
|
||||||
|
|
||||||
|
function calcCost(provider: string, model: string, tokensIn: number, tokensOut: number, tokensCached: number): number {
|
||||||
|
const pricing = PRICING[provider]?.[model];
|
||||||
|
if (!pricing) return 0;
|
||||||
|
const inputCost = ((tokensIn - tokensCached) / 1_000_000) * pricing.input;
|
||||||
|
const outputCost = (tokensOut / 1_000_000) * pricing.output;
|
||||||
|
const cacheCost = pricing.cached ? (tokensCached / 1_000_000) * pricing.cached : 0;
|
||||||
|
return Math.max(0, inputCost + outputCost + cacheCost);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Anthropic Adapter ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function createAnthropicAdapter(): ProviderAdapter {
|
||||||
|
const apiKey = config.providers.anthropic.apiKey;
|
||||||
|
const models: ProviderModel[] = [
|
||||||
|
{ id: 'claude-opus-4-20250514', displayName: 'Claude Opus 4', contextLength: 200_000, inputPricePerMTok: 15.0, outputPricePerMTok: 75.0, cachedPricePerMTok: 1.50, tier: 'premium' },
|
||||||
|
{ id: 'claude-sonnet-4-20250514', displayName: 'Claude Sonnet 4', contextLength: 200_000, inputPricePerMTok: 3.0, outputPricePerMTok: 15.0, cachedPricePerMTok: 0.30, tier: 'standard' },
|
||||||
|
{ id: 'claude-haiku-3-20250630', displayName: 'Claude Haiku 3', contextLength: 200_000, inputPricePerMTok: 0.25, outputPricePerMTok: 1.25, cachedPricePerMTok: 0.025, tier: 'fast' },
|
||||||
|
];
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: 'anthropic',
|
||||||
|
displayName: 'Anthropic Claude',
|
||||||
|
models,
|
||||||
|
isConfigured: () => apiKey.length > 0,
|
||||||
|
supportsPromptCaching: () => true,
|
||||||
|
calculateCost: (tIn, tOut, tCached, model) => calcCost('anthropic', model, tIn, tOut, tCached),
|
||||||
|
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||||
|
const start = Date.now();
|
||||||
|
const systemMsg = request.messages.find(m => m.role === 'system');
|
||||||
|
const userMsgs = request.messages.filter(m => m.role !== 'system');
|
||||||
|
|
||||||
|
const body = {
|
||||||
|
model: request.model,
|
||||||
|
max_tokens: request.max_tokens ?? 4096,
|
||||||
|
temperature: request.temperature ?? 0.7,
|
||||||
|
...(systemMsg ? { system: systemMsg.content } : {}),
|
||||||
|
messages: userMsgs.map(m => ({ role: m.role, content: m.content })),
|
||||||
|
};
|
||||||
|
|
||||||
|
const res = await fetch('https://api.anthropic.com/v1/messages', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
'anthropic-version': '2023-06-01',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const errText = await res.text();
|
||||||
|
throw new Error(`Anthropic API error ${res.status}: ${errText.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json() as {
|
||||||
|
id: string;
|
||||||
|
model: string;
|
||||||
|
content: Array<{ type: string; text: string }>;
|
||||||
|
stop_reason: string;
|
||||||
|
usage: { input_tokens: number; output_tokens: number; cache_creation_input_tokens?: number; cache_read_input_tokens?: number };
|
||||||
|
};
|
||||||
|
|
||||||
|
const cachedTokens = (data.usage.cache_read_input_tokens ?? 0);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: data.id,
|
||||||
|
model: data.model,
|
||||||
|
provider: 'anthropic',
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
message: { role: 'assistant', content: data.content.map(c => c.text).join('') },
|
||||||
|
finish_reason: data.stop_reason,
|
||||||
|
}],
|
||||||
|
usage: {
|
||||||
|
prompt_tokens: data.usage.input_tokens,
|
||||||
|
completion_tokens: data.usage.output_tokens,
|
||||||
|
total_tokens: data.usage.input_tokens + data.usage.output_tokens,
|
||||||
|
cached_tokens: cachedTokens,
|
||||||
|
},
|
||||||
|
latency_ms: Date.now() - start,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── OpenAI Adapter ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function createOpenAIAdapter(): ProviderAdapter {
|
||||||
|
const apiKey = config.providers.openai.apiKey;
|
||||||
|
const models: ProviderModel[] = [
|
||||||
|
{ id: 'gpt-4o', displayName: 'GPT-4o', contextLength: 128_000, inputPricePerMTok: 2.50, outputPricePerMTok: 10.0, cachedPricePerMTok: 1.25, tier: 'standard' },
|
||||||
|
{ id: 'gpt-4o-mini', displayName: 'GPT-4o Mini', contextLength: 128_000, inputPricePerMTok: 0.15, outputPricePerMTok: 0.60, cachedPricePerMTok: 0.075, tier: 'fast' },
|
||||||
|
{ id: 'o1', displayName: 'o1', contextLength: 200_000, inputPricePerMTok: 15.0, outputPricePerMTok: 60.0, tier: 'reasoning' },
|
||||||
|
];
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: 'openai',
|
||||||
|
displayName: 'OpenAI',
|
||||||
|
models,
|
||||||
|
isConfigured: () => apiKey.length > 0,
|
||||||
|
supportsPromptCaching: () => true,
|
||||||
|
calculateCost: (tIn, tOut, tCached, model) => calcCost('openai', model, tIn, tOut, tCached),
|
||||||
|
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||||
|
const start = Date.now();
|
||||||
|
const res = await fetch('https://api.openai.com/v1/chat/completions', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${apiKey}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: request.model,
|
||||||
|
messages: request.messages.map(m => ({ role: m.role, content: m.content })),
|
||||||
|
temperature: request.temperature ?? 0.7,
|
||||||
|
max_tokens: request.max_tokens ?? 4096,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const errText = await res.text();
|
||||||
|
throw new Error(`OpenAI API error ${res.status}: ${errText.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json() as {
|
||||||
|
id: string;
|
||||||
|
model: string;
|
||||||
|
choices: Array<{ index: number; message: { role: string; content: string }; finish_reason: string }>;
|
||||||
|
usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number; prompt_tokens_details?: { cached_tokens?: number } };
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: data.id,
|
||||||
|
model: data.model,
|
||||||
|
provider: 'openai',
|
||||||
|
choices: data.choices.map(c => ({
|
||||||
|
index: c.index,
|
||||||
|
message: { role: c.message.role as 'assistant', content: c.message.content },
|
||||||
|
finish_reason: c.finish_reason,
|
||||||
|
})),
|
||||||
|
usage: {
|
||||||
|
prompt_tokens: data.usage.prompt_tokens,
|
||||||
|
completion_tokens: data.usage.completion_tokens,
|
||||||
|
total_tokens: data.usage.total_tokens,
|
||||||
|
cached_tokens: data.usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||||
|
},
|
||||||
|
latency_ms: Date.now() - start,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Ollama Adapter ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function createOllamaAdapter(): ProviderAdapter {
|
||||||
|
const baseUrl = config.providers.ollama.url;
|
||||||
|
const models: ProviderModel[] = [
|
||||||
|
{ id: 'qwen2.5:14b', displayName: 'Qwen 2.5 14B', contextLength: 32_768, inputPricePerMTok: 0, outputPricePerMTok: 0, tier: 'standard' },
|
||||||
|
{ id: 'qwen2.5:3b', displayName: 'Qwen 2.5 3B', contextLength: 32_768, inputPricePerMTok: 0, outputPricePerMTok: 0, tier: 'fast' },
|
||||||
|
{ id: 'llama3.3:70b', displayName: 'Llama 3.3 70B', contextLength: 131_072, inputPricePerMTok: 0, outputPricePerMTok: 0, tier: 'premium' },
|
||||||
|
];
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: 'ollama',
|
||||||
|
displayName: 'Ollama (Local)',
|
||||||
|
models,
|
||||||
|
isConfigured: () => true,
|
||||||
|
supportsPromptCaching: () => false,
|
||||||
|
calculateCost: () => 0,
|
||||||
|
async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||||
|
const start = Date.now();
|
||||||
|
const res = await fetch(`${baseUrl}/api/chat`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: request.model,
|
||||||
|
messages: request.messages.map(m => ({ role: m.role, content: m.content })),
|
||||||
|
stream: false,
|
||||||
|
options: {
|
||||||
|
temperature: request.temperature ?? 0.7,
|
||||||
|
num_predict: request.max_tokens ?? 4096,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const errText = await res.text();
|
||||||
|
throw new Error(`Ollama error ${res.status}: ${errText.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json() as {
|
||||||
|
model: string;
|
||||||
|
message: { role: string; content: string };
|
||||||
|
done_reason: string;
|
||||||
|
prompt_eval_count?: number;
|
||||||
|
eval_count?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const promptTokens = data.prompt_eval_count ?? 0;
|
||||||
|
const completionTokens = data.eval_count ?? 0;
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: randomUUID(),
|
||||||
|
model: data.model,
|
||||||
|
provider: 'ollama',
|
||||||
|
choices: [{
|
||||||
|
index: 0,
|
||||||
|
message: { role: 'assistant', content: data.message.content },
|
||||||
|
finish_reason: data.done_reason ?? 'stop',
|
||||||
|
}],
|
||||||
|
usage: {
|
||||||
|
prompt_tokens: promptTokens,
|
||||||
|
completion_tokens: completionTokens,
|
||||||
|
total_tokens: promptTokens + completionTokens,
|
||||||
|
},
|
||||||
|
latency_ms: Date.now() - start,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Provider Registry ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const adapters: Map<ProviderName, ProviderAdapter> = new Map();
|
||||||
|
|
||||||
|
export function initProviders(): void {
|
||||||
|
const anthropic = createAnthropicAdapter();
|
||||||
|
const openai = createOpenAIAdapter();
|
||||||
|
const ollama = createOllamaAdapter();
|
||||||
|
|
||||||
|
adapters.set('anthropic', anthropic);
|
||||||
|
adapters.set('openai', openai);
|
||||||
|
adapters.set('ollama', ollama);
|
||||||
|
|
||||||
|
const configured = [...adapters.values()].filter(a => a.isConfigured()).map(a => a.name);
|
||||||
|
logger.info({ configured }, 'Provider registry initialized');
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getProvider(name: ProviderName): ProviderAdapter | undefined {
|
||||||
|
return adapters.get(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getAllProviders(): ProviderAdapter[] {
|
||||||
|
return [...adapters.values()];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function resolveProvider(model: string): { provider: ProviderName; adapter: ProviderAdapter } | undefined {
|
||||||
|
for (const [name, adapter] of adapters) {
|
||||||
|
if (!adapter.isConfigured()) continue;
|
||||||
|
if (adapter.models.some(m => m.id === model)) {
|
||||||
|
return { provider: name, adapter };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
19
packages/core/src/routes/health.ts
Normal file
19
packages/core/src/routes/health.ts
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import { getAllProviders } from '../providers/index.js';
|
||||||
|
|
||||||
|
export async function healthRoutes(app: FastifyInstance): Promise<void> {
|
||||||
|
app.get('/health', async () => {
|
||||||
|
const providers = getAllProviders();
|
||||||
|
return {
|
||||||
|
status: 'ok',
|
||||||
|
service: 'tokenvault',
|
||||||
|
version: '0.1.0',
|
||||||
|
uptime_s: Math.floor(process.uptime()),
|
||||||
|
providers: providers.map(p => ({
|
||||||
|
name: p.name,
|
||||||
|
configured: p.isConfigured(),
|
||||||
|
models: p.models.length,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
69
packages/core/src/routes/proxy.ts
Normal file
69
packages/core/src/routes/proxy.ts
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import { executePipeline } from '../pipeline/index.js';
|
||||||
|
import type { ChatRequest } from '../types.js';
|
||||||
|
|
||||||
|
interface ProxyBody {
|
||||||
|
model: string;
|
||||||
|
messages: Array<{ role: string; content: string }>;
|
||||||
|
temperature?: number;
|
||||||
|
max_tokens?: number;
|
||||||
|
stream?: boolean;
|
||||||
|
// TokenVault extensions
|
||||||
|
caller?: string;
|
||||||
|
project?: string;
|
||||||
|
team?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function proxyRoutes(app: FastifyInstance): Promise<void> {
|
||||||
|
app.post<{ Body: ProxyBody }>('/v1/chat/completions', async (req, reply) => {
|
||||||
|
const { model, messages, temperature, max_tokens, caller, project, team } = req.body;
|
||||||
|
|
||||||
|
const chatRequest: ChatRequest = {
|
||||||
|
model,
|
||||||
|
messages: messages.map(m => ({
|
||||||
|
role: m.role as 'system' | 'user' | 'assistant' | 'tool',
|
||||||
|
content: m.content,
|
||||||
|
})),
|
||||||
|
temperature,
|
||||||
|
max_tokens,
|
||||||
|
caller: caller ?? req.headers['x-tokenvault-caller'] as string,
|
||||||
|
project: project ?? req.headers['x-tokenvault-project'] as string,
|
||||||
|
team: team ?? req.headers['x-tokenvault-team'] as string,
|
||||||
|
};
|
||||||
|
|
||||||
|
const { response, ticket } = await executePipeline(chatRequest);
|
||||||
|
|
||||||
|
reply.header('X-TokenVault-Ticket-ID', ticket.id);
|
||||||
|
reply.header('X-TokenVault-Ticket-Number', `TV-${String(ticket.ticket_number).padStart(5, '0')}`);
|
||||||
|
reply.header('X-TokenVault-Cost-USD', ticket.cost_usd.toFixed(6));
|
||||||
|
reply.header('X-TokenVault-Tokens-Saved', ticket.tokens_saved);
|
||||||
|
|
||||||
|
// Return OpenAI-compatible format
|
||||||
|
return {
|
||||||
|
id: response.id,
|
||||||
|
object: 'chat.completion',
|
||||||
|
created: Math.floor(Date.now() / 1000),
|
||||||
|
model: response.model,
|
||||||
|
choices: response.choices.map(c => ({
|
||||||
|
index: c.index,
|
||||||
|
message: { role: c.message.role, content: c.message.content },
|
||||||
|
finish_reason: c.finish_reason,
|
||||||
|
})),
|
||||||
|
usage: {
|
||||||
|
prompt_tokens: response.usage.prompt_tokens,
|
||||||
|
completion_tokens: response.usage.completion_tokens,
|
||||||
|
total_tokens: response.usage.total_tokens,
|
||||||
|
cached_tokens: response.usage.cached_tokens,
|
||||||
|
},
|
||||||
|
// TokenVault extensions
|
||||||
|
tokenvault: {
|
||||||
|
ticket_id: ticket.id,
|
||||||
|
ticket_number: `TV-${String(ticket.ticket_number).padStart(5, '0')}`,
|
||||||
|
provider: response.provider,
|
||||||
|
cost_usd: ticket.cost_usd,
|
||||||
|
tokens_saved: ticket.tokens_saved,
|
||||||
|
latency_ms: response.latency_ms,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
57
packages/core/src/routes/tickets.ts
Normal file
57
packages/core/src/routes/tickets.ts
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import { listTickets, getTicket, getTicketStats, getCostBreakdown } from '../tickets/ticket-service.js';
|
||||||
|
|
||||||
|
export async function ticketRoutes(app: FastifyInstance): Promise<void> {
|
||||||
|
app.get<{
|
||||||
|
Querystring: {
|
||||||
|
provider?: string; model?: string; status?: string;
|
||||||
|
caller?: string; project?: string; team?: string;
|
||||||
|
from?: string; to?: string;
|
||||||
|
limit?: string; offset?: string;
|
||||||
|
};
|
||||||
|
}>('/v1/tickets', async (req) => {
|
||||||
|
const { tickets, total } = await listTickets({
|
||||||
|
provider: req.query.provider as never,
|
||||||
|
model: req.query.model,
|
||||||
|
status: req.query.status as never,
|
||||||
|
caller: req.query.caller,
|
||||||
|
project: req.query.project,
|
||||||
|
team: req.query.team,
|
||||||
|
from: req.query.from ? new Date(req.query.from) : undefined,
|
||||||
|
to: req.query.to ? new Date(req.query.to) : undefined,
|
||||||
|
limit: req.query.limit ? parseInt(req.query.limit, 10) : undefined,
|
||||||
|
offset: req.query.offset ? parseInt(req.query.offset, 10) : undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
tickets: tickets.map(t => ({
|
||||||
|
...t,
|
||||||
|
ticket_display: `TV-${String(t.ticket_number).padStart(5, '0')}`,
|
||||||
|
})),
|
||||||
|
total,
|
||||||
|
limit: parseInt(req.query.limit ?? '50', 10),
|
||||||
|
offset: parseInt(req.query.offset ?? '0', 10),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get<{ Params: { id: string } }>('/v1/tickets/:id', async (req, reply) => {
|
||||||
|
const ticket = await getTicket(req.params.id);
|
||||||
|
if (!ticket) {
|
||||||
|
reply.code(404);
|
||||||
|
return { error: 'Ticket not found' };
|
||||||
|
}
|
||||||
|
return { ...ticket, ticket_display: `TV-${String(ticket.ticket_number).padStart(5, '0')}` };
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get<{ Querystring: { period?: string } }>('/v1/tickets/stats', async (req) => {
|
||||||
|
return getTicketStats(req.query.period ?? 'today');
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get<{ Querystring: { period?: string } }>('/v1/cost', async (req) => {
|
||||||
|
return getTicketStats(req.query.period ?? 'month');
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get<{ Querystring: { group_by?: string } }>('/v1/cost/breakdown', async (req) => {
|
||||||
|
return getCostBreakdown(req.query.group_by ?? 'provider');
|
||||||
|
});
|
||||||
|
}
|
||||||
53
packages/core/src/server.ts
Normal file
53
packages/core/src/server.ts
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import Fastify from 'fastify';
|
||||||
|
import cors from '@fastify/cors';
|
||||||
|
import { config } from './config.js';
|
||||||
|
import { logger } from './observability/logger.js';
|
||||||
|
import { runMigrations } from './db/migrate.js';
|
||||||
|
import { closePool } from './db/client.js';
|
||||||
|
import { initProviders } from './providers/index.js';
|
||||||
|
import { healthRoutes } from './routes/health.js';
|
||||||
|
import { proxyRoutes } from './routes/proxy.js';
|
||||||
|
import { ticketRoutes } from './routes/tickets.js';
|
||||||
|
|
||||||
|
const app = Fastify({
|
||||||
|
logger: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await app.register(cors, { origin: true });
|
||||||
|
|
||||||
|
// ─── Routes ──────────────────────────────────────────────────────────────────
|
||||||
|
await app.register(healthRoutes);
|
||||||
|
await app.register(proxyRoutes);
|
||||||
|
await app.register(ticketRoutes);
|
||||||
|
|
||||||
|
// ─── Startup ─────────────────────────────────────────────────────────────────
|
||||||
|
async function startup(): Promise<void> {
|
||||||
|
logger.info('TokenVault starting...');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await runMigrations();
|
||||||
|
} catch (err) {
|
||||||
|
logger.error({ err }, 'DB migration failed — proceeding in degraded mode');
|
||||||
|
}
|
||||||
|
|
||||||
|
initProviders();
|
||||||
|
|
||||||
|
await app.listen({ port: config.port, host: config.host });
|
||||||
|
logger.info({ port: config.port, host: config.host }, 'TokenVault proxy server running');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Graceful Shutdown ───────────────────────────────────────────────────────
|
||||||
|
async function shutdown(signal: string): Promise<void> {
|
||||||
|
logger.info({ signal }, 'TokenVault shutting down');
|
||||||
|
await app.close();
|
||||||
|
await closePool();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
process.on('SIGINT', () => void shutdown('SIGINT'));
|
||||||
|
process.on('SIGTERM', () => void shutdown('SIGTERM'));
|
||||||
|
|
||||||
|
startup().catch((err) => {
|
||||||
|
logger.fatal({ err }, 'TokenVault startup failed');
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
157
packages/core/src/tickets/ticket-service.ts
Normal file
157
packages/core/src/tickets/ticket-service.ts
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
import { createHash } from 'node:crypto';
|
||||||
|
import { query } from '../db/client.js';
|
||||||
|
import { logger } from '../observability/logger.js';
|
||||||
|
import type { Ticket, TicketFilter, TicketStatus, ProviderName, CostSummary, CostBreakdown } from '../types.js';
|
||||||
|
|
||||||
|
export async function createTicket(data: {
|
||||||
|
session_id?: string;
|
||||||
|
provider: ProviderName;
|
||||||
|
model: string;
|
||||||
|
status: TicketStatus;
|
||||||
|
tokens_in: number;
|
||||||
|
tokens_out: number;
|
||||||
|
tokens_cached?: number;
|
||||||
|
tokens_saved?: number;
|
||||||
|
cost_usd: number;
|
||||||
|
cost_saved_usd?: number;
|
||||||
|
latency_ms: number;
|
||||||
|
cache_hit?: boolean;
|
||||||
|
compression_mode?: string;
|
||||||
|
compression_ratio?: number;
|
||||||
|
caller?: string;
|
||||||
|
project?: string;
|
||||||
|
team?: string;
|
||||||
|
input_hash?: string;
|
||||||
|
output_hash?: string;
|
||||||
|
}): Promise<Ticket> {
|
||||||
|
const result = await query<Ticket>(
|
||||||
|
`INSERT INTO tickets (
|
||||||
|
session_id, provider, model, status,
|
||||||
|
tokens_in, tokens_out, tokens_cached, tokens_saved,
|
||||||
|
cost_usd, cost_saved_usd, latency_ms, cache_hit,
|
||||||
|
compression_mode, compression_ratio,
|
||||||
|
caller, project, team, input_hash, output_hash
|
||||||
|
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19)
|
||||||
|
RETURNING *`,
|
||||||
|
[
|
||||||
|
data.session_id ?? null, data.provider, data.model, data.status,
|
||||||
|
data.tokens_in, data.tokens_out, data.tokens_cached ?? 0, data.tokens_saved ?? 0,
|
||||||
|
data.cost_usd, data.cost_saved_usd ?? 0, data.latency_ms, data.cache_hit ?? false,
|
||||||
|
data.compression_mode ?? null, data.compression_ratio ?? null,
|
||||||
|
data.caller ?? null, data.project ?? null, data.team ?? null,
|
||||||
|
data.input_hash ?? null, data.output_hash ?? null,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
logger.info({ ticket_number: result.rows[0]?.ticket_number, provider: data.provider, model: data.model, cost: data.cost_usd }, 'Ticket created');
|
||||||
|
return result.rows[0]!;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getTicket(id: string): Promise<Ticket | null> {
|
||||||
|
const result = await query<Ticket>('SELECT * FROM tickets WHERE id = $1', [id]);
|
||||||
|
return result.rows[0] ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function listTickets(filter: TicketFilter = {}): Promise<{ tickets: Ticket[]; total: number }> {
|
||||||
|
const conditions: string[] = [];
|
||||||
|
const params: unknown[] = [];
|
||||||
|
let idx = 1;
|
||||||
|
|
||||||
|
if (filter.provider) { conditions.push(`provider = $${idx++}`); params.push(filter.provider); }
|
||||||
|
if (filter.model) { conditions.push(`model = $${idx++}`); params.push(filter.model); }
|
||||||
|
if (filter.status) { conditions.push(`status = $${idx++}`); params.push(filter.status); }
|
||||||
|
if (filter.caller) { conditions.push(`caller = $${idx++}`); params.push(filter.caller); }
|
||||||
|
if (filter.project) { conditions.push(`project = $${idx++}`); params.push(filter.project); }
|
||||||
|
if (filter.team) { conditions.push(`team = $${idx++}`); params.push(filter.team); }
|
||||||
|
if (filter.from) { conditions.push(`created_at >= $${idx++}`); params.push(filter.from); }
|
||||||
|
if (filter.to) { conditions.push(`created_at <= $${idx++}`); params.push(filter.to); }
|
||||||
|
|
||||||
|
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||||
|
const limit = filter.limit ?? 50;
|
||||||
|
const offset = filter.offset ?? 0;
|
||||||
|
|
||||||
|
const [tickets, countResult] = await Promise.all([
|
||||||
|
query<Ticket>(`SELECT * FROM tickets ${where} ORDER BY created_at DESC LIMIT $${idx++} OFFSET $${idx++}`, [...params, limit, offset]),
|
||||||
|
query<{ count: string }>(`SELECT COUNT(*) as count FROM tickets ${where}`, params),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
tickets: tickets.rows,
|
||||||
|
total: parseInt(countResult.rows[0]?.count ?? '0', 10),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getTicketStats(period: string = 'today'): Promise<CostSummary> {
|
||||||
|
const intervals: Record<string, string> = {
|
||||||
|
today: "created_at >= CURRENT_DATE",
|
||||||
|
week: "created_at >= CURRENT_DATE - INTERVAL '7 days'",
|
||||||
|
month: "created_at >= CURRENT_DATE - INTERVAL '30 days'",
|
||||||
|
all: '1=1',
|
||||||
|
};
|
||||||
|
const where = intervals[period] ?? intervals['today']!;
|
||||||
|
|
||||||
|
const result = await query<{
|
||||||
|
total_cost: string; total_saved: string;
|
||||||
|
total_in: string; total_out: string; total_cached: string; total_saved_tokens: string;
|
||||||
|
total_requests: string; cache_hits: string; avg_ratio: string;
|
||||||
|
}>(`SELECT
|
||||||
|
COALESCE(SUM(cost_usd), 0) as total_cost,
|
||||||
|
COALESCE(SUM(cost_saved_usd), 0) as total_saved,
|
||||||
|
COALESCE(SUM(tokens_in), 0) as total_in,
|
||||||
|
COALESCE(SUM(tokens_out), 0) as total_out,
|
||||||
|
COALESCE(SUM(tokens_cached), 0) as total_cached,
|
||||||
|
COALESCE(SUM(tokens_saved), 0) as total_saved_tokens,
|
||||||
|
COUNT(*) as total_requests,
|
||||||
|
COUNT(*) FILTER (WHERE cache_hit = true) as cache_hits,
|
||||||
|
COALESCE(AVG(compression_ratio) FILTER (WHERE compression_ratio IS NOT NULL), 0) as avg_ratio
|
||||||
|
FROM tickets WHERE ${where}`);
|
||||||
|
|
||||||
|
const row = result.rows[0]!;
|
||||||
|
const totalReqs = parseInt(row.total_requests, 10);
|
||||||
|
const cacheHits = parseInt(row.cache_hits, 10);
|
||||||
|
|
||||||
|
return {
|
||||||
|
period,
|
||||||
|
total_cost_usd: parseFloat(row.total_cost),
|
||||||
|
total_saved_usd: parseFloat(row.total_saved),
|
||||||
|
total_tokens_in: parseInt(row.total_in, 10),
|
||||||
|
total_tokens_out: parseInt(row.total_out, 10),
|
||||||
|
total_tokens_cached: parseInt(row.total_cached, 10),
|
||||||
|
total_tokens_saved: parseInt(row.total_saved_tokens, 10),
|
||||||
|
total_requests: totalReqs,
|
||||||
|
cache_hit_rate: totalReqs > 0 ? cacheHits / totalReqs : 0,
|
||||||
|
avg_compression_ratio: parseFloat(row.avg_ratio),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getCostBreakdown(groupBy: string = 'provider'): Promise<CostBreakdown[]> {
|
||||||
|
const validGroups = ['provider', 'model', 'project', 'team', 'caller'];
|
||||||
|
const col = validGroups.includes(groupBy) ? groupBy : 'provider';
|
||||||
|
|
||||||
|
const result = await query<{
|
||||||
|
group_value: string; cost: string; saved: string; count: string; t_in: string; t_out: string;
|
||||||
|
}>(`SELECT
|
||||||
|
COALESCE(${col}, 'unknown') as group_value,
|
||||||
|
COALESCE(SUM(cost_usd), 0) as cost,
|
||||||
|
COALESCE(SUM(cost_saved_usd), 0) as saved,
|
||||||
|
COUNT(*) as count,
|
||||||
|
COALESCE(SUM(tokens_in), 0) as t_in,
|
||||||
|
COALESCE(SUM(tokens_out), 0) as t_out
|
||||||
|
FROM tickets
|
||||||
|
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
|
||||||
|
GROUP BY ${col}
|
||||||
|
ORDER BY cost DESC`);
|
||||||
|
|
||||||
|
return result.rows.map(r => ({
|
||||||
|
group_by: groupBy,
|
||||||
|
group_value: r.group_value,
|
||||||
|
cost_usd: parseFloat(r.cost),
|
||||||
|
saved_usd: parseFloat(r.saved),
|
||||||
|
request_count: parseInt(r.count, 10),
|
||||||
|
tokens_in: parseInt(r.t_in, 10),
|
||||||
|
tokens_out: parseInt(r.t_out, 10),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hashContent(content: string): string {
|
||||||
|
return createHash('sha256').update(content).digest('hex').slice(0, 16);
|
||||||
|
}
|
||||||
151
packages/core/src/types.ts
Normal file
151
packages/core/src/types.ts
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
// ─── Provider Types ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export type ProviderName = 'anthropic' | 'openai' | 'google' | 'mistral' | 'groq' | 'cerebras' | 'ollama';
|
||||||
|
|
||||||
|
export type TicketStatus = 'completed' | 'cached' | 'failed' | 'pending_review';
|
||||||
|
|
||||||
|
export type ModelTier = 'fast' | 'standard' | 'premium' | 'reasoning';
|
||||||
|
|
||||||
|
export interface ProviderModel {
|
||||||
|
readonly id: string;
|
||||||
|
readonly displayName: string;
|
||||||
|
readonly contextLength: number;
|
||||||
|
readonly inputPricePerMTok: number;
|
||||||
|
readonly outputPricePerMTok: number;
|
||||||
|
readonly cachedPricePerMTok?: number;
|
||||||
|
readonly tier: ModelTier;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProviderAdapter {
|
||||||
|
readonly name: ProviderName;
|
||||||
|
readonly displayName: string;
|
||||||
|
readonly models: readonly ProviderModel[];
|
||||||
|
isConfigured(): boolean;
|
||||||
|
chat(request: ChatRequest): Promise<ChatResponse>;
|
||||||
|
supportsPromptCaching(): boolean;
|
||||||
|
calculateCost(tokensIn: number, tokensOut: number, tokensCached: number, model: string): number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Chat Types (OpenAI-compatible) ────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface ChatMessage {
|
||||||
|
readonly role: 'system' | 'user' | 'assistant' | 'tool';
|
||||||
|
readonly content: string;
|
||||||
|
readonly name?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatRequest {
|
||||||
|
readonly model: string;
|
||||||
|
readonly messages: readonly ChatMessage[];
|
||||||
|
readonly temperature?: number;
|
||||||
|
readonly max_tokens?: number;
|
||||||
|
readonly stream?: boolean;
|
||||||
|
readonly caller?: string;
|
||||||
|
readonly project?: string;
|
||||||
|
readonly team?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatResponse {
|
||||||
|
readonly id: string;
|
||||||
|
readonly model: string;
|
||||||
|
readonly provider: ProviderName;
|
||||||
|
readonly choices: readonly ChatChoice[];
|
||||||
|
readonly usage: TokenUsage;
|
||||||
|
readonly latency_ms: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChatChoice {
|
||||||
|
readonly index: number;
|
||||||
|
readonly message: ChatMessage;
|
||||||
|
readonly finish_reason: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TokenUsage {
|
||||||
|
readonly prompt_tokens: number;
|
||||||
|
readonly completion_tokens: number;
|
||||||
|
readonly total_tokens: number;
|
||||||
|
readonly cached_tokens?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Ticket Types ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface Ticket {
|
||||||
|
readonly id: string;
|
||||||
|
readonly ticket_number: number;
|
||||||
|
readonly session_id: string | null;
|
||||||
|
readonly provider: ProviderName;
|
||||||
|
readonly model: string;
|
||||||
|
readonly status: TicketStatus;
|
||||||
|
readonly tokens_in: number;
|
||||||
|
readonly tokens_out: number;
|
||||||
|
readonly tokens_cached: number;
|
||||||
|
readonly tokens_saved: number;
|
||||||
|
readonly cost_usd: number;
|
||||||
|
readonly cost_saved_usd: number;
|
||||||
|
readonly latency_ms: number;
|
||||||
|
readonly cache_hit: boolean;
|
||||||
|
readonly compression_mode: string | null;
|
||||||
|
readonly compression_ratio: number | null;
|
||||||
|
readonly caller: string | null;
|
||||||
|
readonly project: string | null;
|
||||||
|
readonly team: string | null;
|
||||||
|
readonly input_hash: string;
|
||||||
|
readonly output_hash: string;
|
||||||
|
readonly created_at: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TicketFilter {
|
||||||
|
readonly provider?: ProviderName;
|
||||||
|
readonly model?: string;
|
||||||
|
readonly status?: TicketStatus;
|
||||||
|
readonly caller?: string;
|
||||||
|
readonly project?: string;
|
||||||
|
readonly team?: string;
|
||||||
|
readonly from?: Date;
|
||||||
|
readonly to?: Date;
|
||||||
|
readonly limit?: number;
|
||||||
|
readonly offset?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Cost Types ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface CostSummary {
|
||||||
|
readonly period: string;
|
||||||
|
readonly total_cost_usd: number;
|
||||||
|
readonly total_saved_usd: number;
|
||||||
|
readonly total_tokens_in: number;
|
||||||
|
readonly total_tokens_out: number;
|
||||||
|
readonly total_tokens_cached: number;
|
||||||
|
readonly total_tokens_saved: number;
|
||||||
|
readonly total_requests: number;
|
||||||
|
readonly cache_hit_rate: number;
|
||||||
|
readonly avg_compression_ratio: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CostBreakdown {
|
||||||
|
readonly group_by: string;
|
||||||
|
readonly group_value: string;
|
||||||
|
readonly cost_usd: number;
|
||||||
|
readonly saved_usd: number;
|
||||||
|
readonly request_count: number;
|
||||||
|
readonly tokens_in: number;
|
||||||
|
readonly tokens_out: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Pipeline Types ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface PipelineContext {
|
||||||
|
readonly requestId: string;
|
||||||
|
readonly startTime: number;
|
||||||
|
request: ChatRequest;
|
||||||
|
resolvedProvider?: ProviderName;
|
||||||
|
resolvedModel?: string;
|
||||||
|
cacheHit?: boolean;
|
||||||
|
cachedResponse?: ChatResponse;
|
||||||
|
compressionMode?: string;
|
||||||
|
compressionRatio?: number;
|
||||||
|
tokensSaved?: number;
|
||||||
|
response?: ChatResponse;
|
||||||
|
ticket?: Ticket;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
8
packages/core/tsconfig.json
Normal file
8
packages/core/tsconfig.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src"
|
||||||
|
},
|
||||||
|
"include": ["src"]
|
||||||
|
}
|
||||||
14
packages/core/tsup.config.ts
Normal file
14
packages/core/tsup.config.ts
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import { defineConfig } from 'tsup';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
entry: ['src/**/*.ts'],
|
||||||
|
format: ['esm'],
|
||||||
|
target: 'node20',
|
||||||
|
outDir: 'dist',
|
||||||
|
clean: true,
|
||||||
|
sourcemap: true,
|
||||||
|
dts: true,
|
||||||
|
splitting: false,
|
||||||
|
bundle: false,
|
||||||
|
skipNodeModulesBundle: true,
|
||||||
|
});
|
||||||
25
packages/dashboard/package.json
Normal file
25
packages/dashboard/package.json
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"name": "@tokenvault/dashboard",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"description": "TokenVault web dashboard — cost tracking, ticket browser, provider analytics",
|
||||||
|
"type": "module",
|
||||||
|
"main": "dist/server.js",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsup",
|
||||||
|
"dev": "tsx watch src/server.ts",
|
||||||
|
"test": "vitest run",
|
||||||
|
"clean": "rm -rf dist"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"fastify": "^5.3.0",
|
||||||
|
"@fastify/static": "^8.1.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"tsup": "^8.4.0",
|
||||||
|
"tsx": "^4.19.0",
|
||||||
|
"typescript": "^5.7.0",
|
||||||
|
"vitest": "^3.1.0",
|
||||||
|
"@types/node": "^22.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
286
packages/dashboard/public/index.html
Normal file
286
packages/dashboard/public/index.html
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>TokenVault — LLM Cost Intelligence</title>
|
||||||
|
<style>
|
||||||
|
:root {
|
||||||
|
--primary: #6366f1;
|
||||||
|
--primary-light: #818cf8;
|
||||||
|
--primary-dark: #4f46e5;
|
||||||
|
--bg: #f8fafc;
|
||||||
|
--surface: #ffffff;
|
||||||
|
--text: #1e293b;
|
||||||
|
--text-muted: #64748b;
|
||||||
|
--border: #e2e8f0;
|
||||||
|
--green: #22c55e;
|
||||||
|
--red: #ef4444;
|
||||||
|
--amber: #f59e0b;
|
||||||
|
--blue: #3b82f6;
|
||||||
|
--radius: 12px;
|
||||||
|
--shadow: 0 1px 3px rgba(0,0,0,0.08), 0 1px 2px rgba(0,0,0,0.04);
|
||||||
|
}
|
||||||
|
* { margin:0; padding:0; box-sizing:border-box; }
|
||||||
|
body { font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif; background:var(--bg); color:var(--text); }
|
||||||
|
|
||||||
|
/* Header */
|
||||||
|
.header { background:var(--surface); border-bottom:1px solid var(--border); padding:16px 24px; display:flex; align-items:center; justify-content:space-between; }
|
||||||
|
.header h1 { font-size:20px; font-weight:700; display:flex; align-items:center; gap:8px; }
|
||||||
|
.header h1 span { color:var(--primary); }
|
||||||
|
.header .lang { display:flex; gap:4px; }
|
||||||
|
.header .lang button { padding:4px 10px; border:1px solid var(--border); border-radius:6px; background:var(--surface); cursor:pointer; font-size:12px; }
|
||||||
|
.header .lang button.active { background:var(--primary); color:#fff; border-color:var(--primary); }
|
||||||
|
|
||||||
|
/* Tabs */
|
||||||
|
.tabs { display:flex; gap:0; background:var(--surface); border-bottom:2px solid var(--border); padding:0 24px; overflow-x:auto; }
|
||||||
|
.tab { padding:12px 20px; cursor:pointer; font-size:14px; font-weight:500; color:var(--text-muted); border-bottom:2px solid transparent; margin-bottom:-2px; white-space:nowrap; }
|
||||||
|
.tab:hover { color:var(--text); }
|
||||||
|
.tab.active { color:var(--primary); border-bottom-color:var(--primary); }
|
||||||
|
|
||||||
|
/* Content */
|
||||||
|
.content { max-width:1400px; margin:0 auto; padding:24px; }
|
||||||
|
|
||||||
|
/* Cards */
|
||||||
|
.cards { display:grid; grid-template-columns:repeat(auto-fit, minmax(200px, 1fr)); gap:16px; margin-bottom:24px; }
|
||||||
|
.card { background:var(--surface); border-radius:var(--radius); padding:20px; box-shadow:var(--shadow); border:1px solid var(--border); }
|
||||||
|
.card .label { font-size:12px; font-weight:600; text-transform:uppercase; letter-spacing:0.5px; color:var(--text-muted); margin-bottom:4px; }
|
||||||
|
.card .value { font-size:28px; font-weight:700; }
|
||||||
|
.card .sub { font-size:12px; color:var(--text-muted); margin-top:4px; }
|
||||||
|
.card .value.green { color:var(--green); }
|
||||||
|
.card .value.primary { color:var(--primary); }
|
||||||
|
.card .value.amber { color:var(--amber); }
|
||||||
|
|
||||||
|
/* Table */
|
||||||
|
.table-wrap { background:var(--surface); border-radius:var(--radius); box-shadow:var(--shadow); border:1px solid var(--border); overflow-x:auto; }
|
||||||
|
table { width:100%; border-collapse:collapse; font-size:13px; }
|
||||||
|
th { background:#f1f5f9; text-align:left; padding:10px 14px; font-weight:600; font-size:11px; text-transform:uppercase; letter-spacing:0.5px; color:var(--text-muted); border-bottom:1px solid var(--border); }
|
||||||
|
td { padding:10px 14px; border-bottom:1px solid var(--border); }
|
||||||
|
tr:last-child td { border-bottom:none; }
|
||||||
|
tr:hover { background:#f8fafc; }
|
||||||
|
|
||||||
|
/* Badges */
|
||||||
|
.badge { display:inline-block; padding:2px 8px; border-radius:6px; font-size:11px; font-weight:600; }
|
||||||
|
.badge.completed { background:#dcfce7; color:#166534; }
|
||||||
|
.badge.cached { background:#dbeafe; color:#1e40af; }
|
||||||
|
.badge.failed { background:#fef2f2; color:#991b1b; }
|
||||||
|
.badge.pending_review { background:#fef3c7; color:#92400e; }
|
||||||
|
|
||||||
|
/* Provider badges */
|
||||||
|
.provider { display:inline-flex; align-items:center; gap:4px; padding:2px 8px; border-radius:6px; font-size:11px; font-weight:600; }
|
||||||
|
.provider.anthropic { background:#fdf4ff; color:#86198f; }
|
||||||
|
.provider.openai { background:#f0fdf4; color:#166534; }
|
||||||
|
.provider.ollama { background:#eff6ff; color:#1e40af; }
|
||||||
|
.provider.google { background:#fef9c3; color:#854d0e; }
|
||||||
|
.provider.groq { background:#fce7f3; color:#9d174d; }
|
||||||
|
|
||||||
|
/* Section */
|
||||||
|
.section { margin-bottom:24px; }
|
||||||
|
.section h2 { font-size:16px; font-weight:600; margin-bottom:12px; color:var(--text); display:flex; align-items:center; gap:8px; }
|
||||||
|
|
||||||
|
/* Charts placeholder */
|
||||||
|
.chart-placeholder { background:var(--surface); border-radius:var(--radius); padding:40px; text-align:center; color:var(--text-muted); border:1px solid var(--border); box-shadow:var(--shadow); }
|
||||||
|
.chart-row { display:grid; grid-template-columns:2fr 1fr; gap:16px; margin-bottom:24px; }
|
||||||
|
@media (max-width: 768px) { .chart-row { grid-template-columns:1fr; } }
|
||||||
|
|
||||||
|
/* Loading */
|
||||||
|
.loading { text-align:center; padding:40px; color:var(--text-muted); }
|
||||||
|
.spin { display:inline-block; width:20px; height:20px; border:2px solid var(--border); border-top-color:var(--primary); border-radius:50%; animation:spin 0.6s linear infinite; }
|
||||||
|
@keyframes spin { to { transform:rotate(360deg); } }
|
||||||
|
|
||||||
|
/* Refresh button */
|
||||||
|
.refresh { padding:6px 14px; background:var(--primary); color:#fff; border:none; border-radius:8px; cursor:pointer; font-size:13px; font-weight:500; }
|
||||||
|
.refresh:hover { background:var(--primary-dark); }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="header">
|
||||||
|
<h1><span>Token</span>Vault <small style="font-size:12px;color:var(--text-muted);margin-left:4px">v0.1.0</small></h1>
|
||||||
|
<div style="display:flex;align-items:center;gap:12px">
|
||||||
|
<div class="lang">
|
||||||
|
<button class="active" onclick="setLang('en')">EN</button>
|
||||||
|
<button onclick="setLang('de')">DE</button>
|
||||||
|
</div>
|
||||||
|
<button class="refresh" onclick="loadAll()">Refresh</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="tabs">
|
||||||
|
<div class="tab active" data-tab="overview">Overview</div>
|
||||||
|
<div class="tab" data-tab="tickets">Tickets</div>
|
||||||
|
<div class="tab" data-tab="cost">Cost Analysis</div>
|
||||||
|
<div class="tab" data-tab="providers">Providers</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="content">
|
||||||
|
<!-- Overview Tab -->
|
||||||
|
<div id="tab-overview">
|
||||||
|
<div class="cards" id="stats-cards">
|
||||||
|
<div class="loading"><div class="spin"></div> Loading...</div>
|
||||||
|
</div>
|
||||||
|
<div class="chart-row">
|
||||||
|
<div class="chart-placeholder" id="cost-timeline">Cost timeline will appear here after requests are tracked</div>
|
||||||
|
<div class="chart-placeholder" id="provider-split">Provider split will appear here</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Tickets Tab -->
|
||||||
|
<div id="tab-tickets" style="display:none">
|
||||||
|
<div class="section">
|
||||||
|
<h2>Recent Tickets</h2>
|
||||||
|
<div class="table-wrap">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Ticket</th>
|
||||||
|
<th>Provider</th>
|
||||||
|
<th>Model</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Tokens In</th>
|
||||||
|
<th>Tokens Out</th>
|
||||||
|
<th>Cached</th>
|
||||||
|
<th>Saved</th>
|
||||||
|
<th>Cost</th>
|
||||||
|
<th>Latency</th>
|
||||||
|
<th>Time</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="ticket-rows">
|
||||||
|
<tr><td colspan="11" class="loading"><div class="spin"></div></td></tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Cost Tab -->
|
||||||
|
<div id="tab-cost" style="display:none">
|
||||||
|
<div class="cards" id="cost-cards"></div>
|
||||||
|
<div class="section">
|
||||||
|
<h2>Cost Breakdown by Provider</h2>
|
||||||
|
<div class="table-wrap">
|
||||||
|
<table>
|
||||||
|
<thead><tr><th>Provider</th><th>Requests</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Saved</th></tr></thead>
|
||||||
|
<tbody id="breakdown-rows"></tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Providers Tab -->
|
||||||
|
<div id="tab-providers" style="display:none">
|
||||||
|
<div class="section">
|
||||||
|
<h2>Configured Providers</h2>
|
||||||
|
<div id="provider-list" class="cards"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const API = '/api';
|
||||||
|
let lang = 'en';
|
||||||
|
const t = {
|
||||||
|
en: { totalCost:'Total Cost', totalSaved:'Total Saved', requests:'Requests', cacheRate:'Cache Hit Rate', tokensIn:'Tokens In', tokensOut:'Tokens Out' },
|
||||||
|
de: { totalCost:'Gesamtkosten', totalSaved:'Gespart', requests:'Anfragen', cacheRate:'Cache Hit Rate', tokensIn:'Tokens Ein', tokensOut:'Tokens Aus' },
|
||||||
|
};
|
||||||
|
|
||||||
|
function setLang(l) {
|
||||||
|
lang = l;
|
||||||
|
document.querySelectorAll('.lang button').forEach(b => b.classList.toggle('active', b.textContent === l.toUpperCase()));
|
||||||
|
loadAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
document.querySelectorAll('.tab').forEach(tab => {
|
||||||
|
tab.addEventListener('click', () => {
|
||||||
|
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
||||||
|
tab.classList.add('active');
|
||||||
|
document.querySelectorAll('[id^="tab-"]').forEach(el => el.style.display = 'none');
|
||||||
|
document.getElementById('tab-' + tab.dataset.tab).style.display = 'block';
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
function fmt(n) { return typeof n === 'number' ? (n < 0.01 ? n.toFixed(6) : n.toFixed(2)) : '0'; }
|
||||||
|
function fmtK(n) { return n >= 1000000 ? (n/1000000).toFixed(1)+'M' : n >= 1000 ? (n/1000).toFixed(1)+'K' : String(n); }
|
||||||
|
function fmtTime(d) { return new Date(d).toLocaleString(lang === 'de' ? 'de-DE' : 'en-US', { month:'short', day:'numeric', hour:'2-digit', minute:'2-digit' }); }
|
||||||
|
|
||||||
|
async function loadStats() {
|
||||||
|
try {
|
||||||
|
const data = await (await fetch(API + '/tickets/stats?period=month')).json();
|
||||||
|
const labels = t[lang];
|
||||||
|
document.getElementById('stats-cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">${labels.totalCost}</div><div class="value primary">$${fmt(data.total_cost_usd)}</div><div class="sub">Last 30 days</div></div>
|
||||||
|
<div class="card"><div class="label">${labels.totalSaved}</div><div class="value green">$${fmt(data.total_saved_usd)}</div><div class="sub">Via compression + caching</div></div>
|
||||||
|
<div class="card"><div class="label">${labels.requests}</div><div class="value">${fmtK(data.total_requests)}</div><div class="sub">Tickets tracked</div></div>
|
||||||
|
<div class="card"><div class="label">${labels.cacheRate}</div><div class="value amber">${(data.cache_hit_rate * 100).toFixed(0)}%</div><div class="sub">Semantic cache hits</div></div>
|
||||||
|
<div class="card"><div class="label">${labels.tokensIn}</div><div class="value">${fmtK(data.total_tokens_in)}</div></div>
|
||||||
|
<div class="card"><div class="label">${labels.tokensOut}</div><div class="value">${fmtK(data.total_tokens_out)}</div></div>
|
||||||
|
`;
|
||||||
|
} catch { document.getElementById('stats-cards').innerHTML = '<div class="card"><div class="label">Status</div><div class="value">Offline</div><div class="sub">TokenVault core not reachable</div></div>'; }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadTickets() {
|
||||||
|
try {
|
||||||
|
const data = await (await fetch(API + '/tickets?limit=50')).json();
|
||||||
|
document.getElementById('ticket-rows').innerHTML = data.tickets.map(t => `
|
||||||
|
<tr>
|
||||||
|
<td><strong>${t.ticket_display}</strong></td>
|
||||||
|
<td><span class="provider ${t.provider}">${t.provider}</span></td>
|
||||||
|
<td style="font-size:12px">${t.model}</td>
|
||||||
|
<td><span class="badge ${t.status}">${t.status}</span></td>
|
||||||
|
<td>${fmtK(t.tokens_in)}</td>
|
||||||
|
<td>${fmtK(t.tokens_out)}</td>
|
||||||
|
<td>${fmtK(t.tokens_cached)}</td>
|
||||||
|
<td style="color:var(--green)">${fmtK(t.tokens_saved)}</td>
|
||||||
|
<td>$${fmt(t.cost_usd)}</td>
|
||||||
|
<td>${t.latency_ms}ms</td>
|
||||||
|
<td style="font-size:11px;color:var(--text-muted)">${fmtTime(t.created_at)}</td>
|
||||||
|
</tr>
|
||||||
|
`).join('') || '<tr><td colspan="11" style="text-align:center;color:var(--text-muted)">No tickets yet — send your first request through the proxy</td></tr>';
|
||||||
|
} catch { document.getElementById('ticket-rows').innerHTML = '<tr><td colspan="11" class="loading">Could not load tickets</td></tr>'; }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadBreakdown() {
|
||||||
|
try {
|
||||||
|
const [stats, breakdown] = await Promise.all([
|
||||||
|
(await fetch(API + '/cost?period=month')).json(),
|
||||||
|
(await fetch(API + '/cost/breakdown?group_by=provider')).json(),
|
||||||
|
]);
|
||||||
|
const labels = t[lang];
|
||||||
|
document.getElementById('cost-cards').innerHTML = `
|
||||||
|
<div class="card"><div class="label">${labels.totalCost} (30d)</div><div class="value primary">$${fmt(stats.total_cost_usd)}</div></div>
|
||||||
|
<div class="card"><div class="label">${labels.totalSaved}</div><div class="value green">$${fmt(stats.total_saved_usd)}</div></div>
|
||||||
|
<div class="card"><div class="label">Avg Compression</div><div class="value">${(stats.avg_compression_ratio * 100).toFixed(0)}%</div></div>
|
||||||
|
`;
|
||||||
|
document.getElementById('breakdown-rows').innerHTML = breakdown.map(b => `
|
||||||
|
<tr>
|
||||||
|
<td><span class="provider ${b.group_value}">${b.group_value}</span></td>
|
||||||
|
<td>${b.request_count}</td>
|
||||||
|
<td>${fmtK(b.tokens_in)}</td>
|
||||||
|
<td>${fmtK(b.tokens_out)}</td>
|
||||||
|
<td>$${fmt(b.cost_usd)}</td>
|
||||||
|
<td style="color:var(--green)">$${fmt(b.saved_usd)}</td>
|
||||||
|
</tr>
|
||||||
|
`).join('') || '<tr><td colspan="6">No data yet</td></tr>';
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadProviders() {
|
||||||
|
try {
|
||||||
|
const data = await (await fetch(API + '/health')).json();
|
||||||
|
document.getElementById('provider-list').innerHTML = data.providers.map(p => `
|
||||||
|
<div class="card">
|
||||||
|
<div class="label">${p.name.toUpperCase()}</div>
|
||||||
|
<div class="value" style="font-size:18px">${p.configured ? '✓ Active' : '✗ Not configured'}</div>
|
||||||
|
<div class="sub">${p.models} model${p.models !== 1 ? 's' : ''} available</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
function loadAll() { loadStats(); loadTickets(); loadBreakdown(); loadProviders(); }
|
||||||
|
loadAll();
|
||||||
|
setInterval(loadAll, 30000);
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
51
packages/dashboard/src/server.ts
Normal file
51
packages/dashboard/src/server.ts
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import Fastify from 'fastify';
|
||||||
|
import fastifyStatic from '@fastify/static';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import { dirname, join } from 'node:path';
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = dirname(__filename);
|
||||||
|
|
||||||
|
const PORT = parseInt(process.env['PORT'] ?? '3301', 10);
|
||||||
|
const CORE_URL = process.env['TOKENVAULT_CORE_URL'] ?? 'http://localhost:3300';
|
||||||
|
|
||||||
|
const app = Fastify({ logger: false });
|
||||||
|
|
||||||
|
// Serve static dashboard HTML
|
||||||
|
await app.register(fastifyStatic, {
|
||||||
|
root: join(__dirname, '..', 'public'),
|
||||||
|
prefix: '/',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Proxy API requests to core
|
||||||
|
app.get('/api/health', async () => {
|
||||||
|
const res = await fetch(`${CORE_URL}/health`);
|
||||||
|
return res.json();
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get('/api/tickets', async (req) => {
|
||||||
|
const qs = new URL(req.url, 'http://localhost').search;
|
||||||
|
const res = await fetch(`${CORE_URL}/v1/tickets${qs}`);
|
||||||
|
return res.json();
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get('/api/tickets/stats', async (req) => {
|
||||||
|
const qs = new URL(req.url, 'http://localhost').search;
|
||||||
|
const res = await fetch(`${CORE_URL}/v1/tickets/stats${qs}`);
|
||||||
|
return res.json();
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get('/api/cost', async (req) => {
|
||||||
|
const qs = new URL(req.url, 'http://localhost').search;
|
||||||
|
const res = await fetch(`${CORE_URL}/v1/cost${qs}`);
|
||||||
|
return res.json();
|
||||||
|
});
|
||||||
|
|
||||||
|
app.get('/api/cost/breakdown', async (req) => {
|
||||||
|
const qs = new URL(req.url, 'http://localhost').search;
|
||||||
|
const res = await fetch(`${CORE_URL}/v1/cost/breakdown${qs}`);
|
||||||
|
return res.json();
|
||||||
|
});
|
||||||
|
|
||||||
|
await app.listen({ port: PORT, host: '0.0.0.0' });
|
||||||
|
console.log(`TokenVault Dashboard running on http://localhost:${PORT}`);
|
||||||
8
packages/dashboard/tsconfig.json
Normal file
8
packages/dashboard/tsconfig.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src"
|
||||||
|
},
|
||||||
|
"include": ["src"]
|
||||||
|
}
|
||||||
14
packages/dashboard/tsup.config.ts
Normal file
14
packages/dashboard/tsup.config.ts
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import { defineConfig } from 'tsup';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
entry: ['src/**/*.ts'],
|
||||||
|
format: ['esm'],
|
||||||
|
target: 'node20',
|
||||||
|
outDir: 'dist',
|
||||||
|
clean: true,
|
||||||
|
sourcemap: true,
|
||||||
|
dts: true,
|
||||||
|
splitting: false,
|
||||||
|
bundle: false,
|
||||||
|
skipNodeModulesBundle: true,
|
||||||
|
});
|
||||||
28
packages/mcp/package.json
Normal file
28
packages/mcp/package.json
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
{
|
||||||
|
"name": "@tokenvault/mcp",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"description": "TokenVault MCP server for IDE integration — compressed reads, ticket view, cost dashboard",
|
||||||
|
"type": "module",
|
||||||
|
"main": "dist/server.js",
|
||||||
|
"bin": {
|
||||||
|
"tokenvault-mcp": "dist/server.js"
|
||||||
|
},
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsup",
|
||||||
|
"dev": "tsx watch src/server.ts",
|
||||||
|
"test": "vitest run",
|
||||||
|
"clean": "rm -rf dist"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@modelcontextprotocol/sdk": "^1.27.0",
|
||||||
|
"zod": "^3.24.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"tsup": "^8.4.0",
|
||||||
|
"tsx": "^4.19.0",
|
||||||
|
"typescript": "^5.7.0",
|
||||||
|
"vitest": "^3.1.0",
|
||||||
|
"@types/node": "^22.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
85
packages/mcp/src/server.ts
Normal file
85
packages/mcp/src/server.ts
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||||
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
const CORE_URL = process.env['TOKENVAULT_URL'] ?? 'http://localhost:3300';
|
||||||
|
|
||||||
|
async function fetchCore<T>(path: string): Promise<T> {
|
||||||
|
const res = await fetch(`${CORE_URL}${path}`);
|
||||||
|
if (!res.ok) throw new Error(`TokenVault API error: ${res.status}`);
|
||||||
|
return res.json() as Promise<T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
const server = new McpServer({
|
||||||
|
name: 'tokenvault',
|
||||||
|
version: '0.1.0',
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── tv_ticket: View and search tickets ──────────────────────────────────────
|
||||||
|
server.tool(
|
||||||
|
'tv_ticket',
|
||||||
|
'View, search, and manage TokenVault tickets. Every LLM request = 1 ticket.',
|
||||||
|
{
|
||||||
|
action: z.enum(['list', 'get', 'stats']).describe('Action to perform'),
|
||||||
|
id: z.string().optional().describe('Ticket ID (for get action)'),
|
||||||
|
provider: z.string().optional().describe('Filter by provider'),
|
||||||
|
project: z.string().optional().describe('Filter by project'),
|
||||||
|
period: z.string().optional().describe('Stats period: today, week, month, all'),
|
||||||
|
limit: z.number().optional().describe('Max results (default 20)'),
|
||||||
|
},
|
||||||
|
async ({ action, id, provider, project, period, limit }) => {
|
||||||
|
if (action === 'get' && id) {
|
||||||
|
const ticket = await fetchCore(`/v1/tickets/${id}`);
|
||||||
|
return { content: [{ type: 'text' as const, text: JSON.stringify(ticket, null, 2) }] };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (action === 'stats') {
|
||||||
|
const stats = await fetchCore(`/v1/tickets/stats?period=${period ?? 'today'}`);
|
||||||
|
return { content: [{ type: 'text' as const, text: JSON.stringify(stats, null, 2) }] };
|
||||||
|
}
|
||||||
|
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
if (provider) params.set('provider', provider);
|
||||||
|
if (project) params.set('project', project);
|
||||||
|
params.set('limit', String(limit ?? 20));
|
||||||
|
const data = await fetchCore(`/v1/tickets?${params}`);
|
||||||
|
return { content: [{ type: 'text' as const, text: JSON.stringify(data, null, 2) }] };
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// ─── tv_cost: Cost dashboard ─────────────────────────────────────────────────
|
||||||
|
server.tool(
|
||||||
|
'tv_cost',
|
||||||
|
'Show cost dashboard: spend, savings, forecasts. Tracks every token across all LLM providers.',
|
||||||
|
{
|
||||||
|
period: z.enum(['today', 'week', 'month', 'all']).optional().describe('Time period'),
|
||||||
|
group_by: z.enum(['provider', 'model', 'project', 'team']).optional().describe('Group breakdown by'),
|
||||||
|
},
|
||||||
|
async ({ period, group_by }) => {
|
||||||
|
const [summary, breakdown] = await Promise.all([
|
||||||
|
fetchCore(`/v1/cost?period=${period ?? 'month'}`),
|
||||||
|
group_by ? fetchCore(`/v1/cost/breakdown?group_by=${group_by}`) : Promise.resolve(null),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const parts = [`# Cost Summary (${period ?? 'month'})\n${JSON.stringify(summary, null, 2)}`];
|
||||||
|
if (breakdown) {
|
||||||
|
parts.push(`\n# Breakdown by ${group_by}\n${JSON.stringify(breakdown, null, 2)}`);
|
||||||
|
}
|
||||||
|
return { content: [{ type: 'text' as const, text: parts.join('\n') }] };
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// ─── tv_health: Service health ───────────────────────────────────────────────
|
||||||
|
server.tool(
|
||||||
|
'tv_health',
|
||||||
|
'Check TokenVault service health and configured providers.',
|
||||||
|
{},
|
||||||
|
async () => {
|
||||||
|
const health = await fetchCore('/health');
|
||||||
|
return { content: [{ type: 'text' as const, text: JSON.stringify(health, null, 2) }] };
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// ─── Start ───────────────────────────────────────────────────────────────────
|
||||||
|
const transport = new StdioServerTransport();
|
||||||
|
await server.connect(transport);
|
||||||
8
packages/mcp/tsconfig.json
Normal file
8
packages/mcp/tsconfig.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src"
|
||||||
|
},
|
||||||
|
"include": ["src"]
|
||||||
|
}
|
||||||
15
packages/mcp/tsup.config.ts
Normal file
15
packages/mcp/tsup.config.ts
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import { defineConfig } from 'tsup';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
entry: ['src/**/*.ts'],
|
||||||
|
format: ['esm'],
|
||||||
|
target: 'node20',
|
||||||
|
outDir: 'dist',
|
||||||
|
clean: true,
|
||||||
|
sourcemap: true,
|
||||||
|
dts: true,
|
||||||
|
splitting: false,
|
||||||
|
bundle: false,
|
||||||
|
skipNodeModulesBundle: true,
|
||||||
|
banner: { js: '#!/usr/bin/env node' },
|
||||||
|
});
|
||||||
2730
pnpm-lock.yaml
generated
Normal file
2730
pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
2
pnpm-workspace.yaml
Normal file
2
pnpm-workspace.yaml
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
packages:
|
||||||
|
- 'packages/*'
|
||||||
17
tsconfig.json
Normal file
17
tsconfig.json
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2023",
|
||||||
|
"module": "Node16",
|
||||||
|
"moduleResolution": "Node16",
|
||||||
|
"strict": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"declaration": true,
|
||||||
|
"declarationMap": true,
|
||||||
|
"sourceMap": true,
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src"
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user