feat: proxy-side compression for all providers + full MCP tool suite

- message-compressor.ts: code-block-aware compression for Anthropic + OpenAI messages
- anthropic-proxy.ts: fix OAuth Bearer token forwarding (Claude Code subscription auth)
  + integrate proxy-side compression, track tokens_saved per ticket
- openai-proxy.ts: integrate proxy-side compression, track tokens_saved
- mcp/compression.ts: lean-ctx-style modes (full/signatures/map/aggressive/entropy)
  + RTK-style patterns for git/npm/cargo/docker/ls output
- mcp/server.ts: 8 tools — tv_read, tv_shell, tv_search, tv_compress, tv_update,
  tv_ticket, tv_cost, tv_health
- tv_update: auto-update lean-ctx (cargo install) + RTK (brew upgrade)
- scripts/update-compression-tools.sh + LaunchAgent plist: daily auto-update at 03:00
This commit is contained in:
Rene Fichtmueller 2026-04-14 23:13:56 +02:00
parent 7ea2d1e266
commit 587431a915
7 changed files with 1053 additions and 94 deletions

View File

@ -0,0 +1,168 @@
/**
* Proxy-side message compression applied to ALL providers before forwarding.
*
* Strategy: conservative, lossless-for-meaning compression that is safe to
* apply to any LLM message without changing semantics:
* 1. Preserve content inside code blocks (``` / ~~~) verbatim
* 2. Collapse 3+ consecutive blank lines 1 blank line outside code blocks
* 3. Strip trailing whitespace from every non-code line
* 4. Remove HTML/XML comments outside code blocks
* 5. Collapse runs of 4+ identical separator lines (e.g. )
*
* Token estimation: 1 token 4 chars (GPT-3.5/4 / Claude approximation).
*/
export interface CompressResult {
text: string;
originalChars: number;
compressedChars: number;
estimatedTokensSaved: number;
}
/** Compress a plain text string, preserving code blocks verbatim. */
export function compressText(input: string): CompressResult {
if (!input || input.length < 50) {
return { text: input, originalChars: input.length, compressedChars: input.length, estimatedTokensSaved: 0 };
}
const lines = input.split('\n');
const out: string[] = [];
let inCode = false;
let blankRun = 0;
let sepLine: string | null = null;
let sepRun = 0;
for (let i = 0; i < lines.length; i++) {
const raw = lines[i]!;
const trimmed = raw.trim();
// Detect fenced code blocks (``` or ~~~)
if (/^(`{3,}|~{3,})/.test(trimmed)) {
inCode = !inCode;
out.push(raw);
blankRun = 0;
sepLine = null;
sepRun = 0;
continue;
}
// Inside code block — pass through verbatim
if (inCode) {
out.push(raw);
continue;
}
// Blank line handling — collapse 2+ blanks to 1
if (trimmed === '') {
blankRun++;
if (blankRun === 1) out.push('');
continue;
}
blankRun = 0;
// Collapse runs of separator lines (─, =, -, *, #, /)
if (/^[-=─━*#/]{4,}$/.test(trimmed)) {
if (trimmed === sepLine) {
sepRun++;
if (sepRun <= 1) out.push(raw.trimEnd());
continue;
}
sepLine = trimmed;
sepRun = 0;
} else {
sepLine = null;
sepRun = 0;
}
// Strip HTML/XML comments
const noComment = raw.replace(/<!--[\s\S]*?-->/g, '');
// Trim trailing whitespace
out.push(noComment.trimEnd());
}
const compressed = out.join('\n').trimEnd();
const originalChars = input.length;
const compressedChars = compressed.length;
const estimatedTokensSaved = Math.max(0, Math.floor((originalChars - compressedChars) / 4));
return { text: compressed, originalChars, compressedChars, estimatedTokensSaved };
}
// ─── Anthropic format ────────────────────────────────────────────────────────
type AnthropicContentBlock = { type: string; text?: string; [k: string]: unknown };
type AnthropicMessage = { role: string; content: string | AnthropicContentBlock[]; [k: string]: unknown };
export interface AnthropicCompressResult {
messages: AnthropicMessage[];
system?: string;
totalTokensSaved: number;
}
/**
* Compress an Anthropic messages request body.
* Handles both string content and content-block arrays.
* Also compresses the top-level `system` prompt.
*/
export function compressAnthropicBody(
messages: AnthropicMessage[],
system?: string,
): AnthropicCompressResult {
let totalTokensSaved = 0;
const compressedMessages = messages.map(msg => {
const content = msg.content;
if (typeof content === 'string') {
const r = compressText(content);
totalTokensSaved += r.estimatedTokensSaved;
return { ...msg, content: r.text };
}
if (Array.isArray(content)) {
const blocks = content.map((block: AnthropicContentBlock) => {
if (block.type === 'text' && typeof block.text === 'string') {
const r = compressText(block.text);
totalTokensSaved += r.estimatedTokensSaved;
return { ...block, text: r.text };
}
return block;
});
return { ...msg, content: blocks };
}
return msg;
});
let compressedSystem = system;
if (system) {
const r = compressText(system);
totalTokensSaved += r.estimatedTokensSaved;
compressedSystem = r.text;
}
return { messages: compressedMessages, system: compressedSystem, totalTokensSaved };
}
// ─── OpenAI format ───────────────────────────────────────────────────────────
type OpenAIMessage = { role: string; content: string | null; [k: string]: unknown };
export interface OpenAICompressResult {
messages: OpenAIMessage[];
totalTokensSaved: number;
}
/** Compress an OpenAI messages array. */
export function compressOpenAIMessages(messages: OpenAIMessage[]): OpenAICompressResult {
let totalTokensSaved = 0;
const compressedMessages = messages.map(msg => {
if (typeof msg.content === 'string') {
const r = compressText(msg.content);
totalTokensSaved += r.estimatedTokensSaved;
return { ...msg, content: r.text };
}
return msg;
});
return { messages: compressedMessages, totalTokensSaved };
}

View File

@ -2,14 +2,17 @@ import type { FastifyInstance } from 'fastify';
import { createTicket, hashContent } from '../tickets/ticket-service.js'; import { createTicket, hashContent } from '../tickets/ticket-service.js';
import { logger } from '../observability/logger.js'; import { logger } from '../observability/logger.js';
import { config } from '../config.js'; import { config } from '../config.js';
import { compressAnthropicBody } from '../compression/message-compressor.js';
// Anthropic pricing per 1M tokens (USD) // Anthropic pricing per 1M tokens (USD)
const ANTHROPIC_PRICING: Record<string, { input: number; output: number; cached?: number }> = { const ANTHROPIC_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 }, 'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 },
'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 }, 'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 },
'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 }, 'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 },
'claude-sonnet-4-6': { input: 3.0, output: 15.0, cached: 0.30 },
'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 }, 'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 },
'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 }, 'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 },
'claude-haiku-4-5': { input: 0.80, output: 4.0, cached: 0.08 },
'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 }, 'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 },
}; };
@ -22,24 +25,23 @@ function calcCost(model: string, inputTokens: number, outputTokens: number, cach
} }
/** /**
* Anthropic API passthrough enables ANTHROPIC_BASE_URL routing. * Anthropic API passthrough with proxy-side compression.
* *
* Claude Code (and any Anthropic SDK client) sends requests here when * Auth handling:
* ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org is set. * x-api-key: sk-ant-* forward as x-api-key (API key users)
* We forward the request to Anthropic using the client's own API key, * Authorization: Bearer sk-ant-* forward as x-api-key (SDK Bearer format)
* then create a TokenVault ticket to track usage and cost. * Authorization: Bearer <oauth> forward as Authorization: Bearer (Claude Code subscription)
* fallback configured server API key
*/ */
export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void> { export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void> {
// ─── POST /v1/messages — Anthropic Messages API passthrough ──────────────
app.post('/v1/messages', async (req, reply) => { app.post('/v1/messages', async (req, reply) => {
const body = req.body as Record<string, unknown>; const body = req.body as Record<string, unknown>;
const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514'; const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514';
// Use the client's API key if provided, fall back to configured key const apiKeyHeader = req.headers['x-api-key'] as string | undefined;
const apiKey = (req.headers['x-api-key'] as string) const authHeader = req.headers['authorization'] as string | undefined;
|| (req.headers['authorization'] as string)?.replace(/^Bearer /, '') const configApiKey = config.providers.anthropic.apiKey;
|| config.providers.anthropic.apiKey;
const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01'; const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01';
const anthropicBeta = req.headers['anthropic-beta'] as string | undefined; const anthropicBeta = req.headers['anthropic-beta'] as string | undefined;
@ -48,26 +50,59 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
const project = req.headers['x-tokenvault-project'] as string | undefined; const project = req.headers['x-tokenvault-project'] as string | undefined;
const team = req.headers['x-tokenvault-team'] as string | undefined; const team = req.headers['x-tokenvault-team'] as string | undefined;
const start = Date.now(); // ── Proxy-side compression ─────────────────────────────────────────────
const rawMessages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? [];
const rawSystem = body['system'] as string | undefined;
const { messages: cMsgs, system: cSystem, totalTokensSaved: compressionSaved } =
compressAnthropicBody(
rawMessages as Parameters<typeof compressAnthropicBody>[0],
rawSystem,
);
const compressedBody: Record<string, unknown> = { ...body, messages: cMsgs };
if (cSystem !== undefined) compressedBody['system'] = cSystem;
// ── Forward to Anthropic ──────────────────────────────────────────────── // ── Auth forwarding — detect API key vs OAuth session ──────────────────
const forwardHeaders: Record<string, string> = { const forwardHeaders: Record<string, string> = {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'x-api-key': apiKey, 'anthropic-version': anthropicVersion,
'anthropic-version': anthropicVersion,
}; };
if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta; if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta;
let apiKeyForLogging = '(none)';
if (apiKeyHeader?.startsWith('sk-ant-')) {
// Direct API key in x-api-key
forwardHeaders['x-api-key'] = apiKeyHeader;
apiKeyForLogging = 'x-api-key:sk-ant-*';
} else if (authHeader) {
const bearerToken = authHeader.replace(/^Bearer\s+/i, '');
if (bearerToken.startsWith('sk-ant-')) {
// API key in Authorization: Bearer format (some SDK configs)
forwardHeaders['x-api-key'] = bearerToken;
apiKeyForLogging = 'bearer:sk-ant-*';
} else {
// OAuth session token (Claude Code subscription) — forward as-is
forwardHeaders['authorization'] = authHeader;
apiKeyForLogging = 'bearer:oauth-session';
}
} else if (apiKeyHeader) {
forwardHeaders['x-api-key'] = apiKeyHeader;
apiKeyForLogging = 'x-api-key:custom';
} else if (configApiKey) {
forwardHeaders['x-api-key'] = configApiKey;
apiKeyForLogging = 'config-key';
}
const start = Date.now();
const upstream = await fetch('https://api.anthropic.com/v1/messages', { const upstream = await fetch('https://api.anthropic.com/v1/messages', {
method: 'POST', method: 'POST',
headers: forwardHeaders, headers: forwardHeaders,
body: JSON.stringify(body), body: JSON.stringify(compressedBody),
}); });
const latency = Date.now() - start; const latency = Date.now() - start;
const responseBody = await upstream.json() as Record<string, unknown>; const responseBody = await upstream.json() as Record<string, unknown>;
// ── Track as TokenVault ticket (best-effort, non-blocking) ─────────────
if (upstream.ok) { if (upstream.ok) {
const usage = responseBody['usage'] as { const usage = responseBody['usage'] as {
input_tokens?: number; input_tokens?: number;
@ -82,8 +117,7 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
const cachedTokens = usage.cache_read_input_tokens ?? 0; const cachedTokens = usage.cache_read_input_tokens ?? 0;
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens); const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
const messages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? []; const inputText = rawMessages.map(m => {
const inputText = messages.map(m => {
if (typeof m.content === 'string') return m.content; if (typeof m.content === 'string') return m.content;
if (Array.isArray(m.content)) return m.content.map((c: Record<string, unknown>) => c['text'] ?? '').join(''); if (Array.isArray(m.content)) return m.content.map((c: Record<string, unknown>) => c['text'] ?? '').join('');
return ''; return '';
@ -92,25 +126,26 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
const outputText = outputContent.map(c => c.text ?? '').join(''); const outputText = outputContent.map(c => c.text ?? '').join('');
createTicket({ createTicket({
provider: 'anthropic', provider: 'anthropic',
model, model,
status: 'completed', status: 'completed',
tokens_in: inputTokens, tokens_in: inputTokens,
tokens_out: outputTokens, tokens_out: outputTokens,
tokens_cached: cachedTokens, tokens_cached: cachedTokens,
tokens_saved: 0, tokens_saved: compressionSaved,
cost_usd: cost, cost_usd: cost,
latency_ms: latency, latency_ms: latency,
cache_hit: cachedTokens > 0, cache_hit: cachedTokens > 0,
caller: caller ?? 'claude-code', caller: caller ?? 'claude-code',
project, project,
team, team,
input_hash: hashContent(inputText), input_hash: hashContent(inputText),
output_hash: hashContent(outputText), output_hash: hashContent(outputText),
}).catch(err => logger.warn({ err }, 'Failed to create ticket for Anthropic passthrough')); }).catch(err => logger.warn({ err }, 'Failed to create ticket'));
} }
} else { } else {
// Track failed requests too logger.warn({ model, status: upstream.status, auth: apiKeyForLogging }, 'Anthropic upstream error');
createTicket({ createTicket({
provider: 'anthropic', provider: 'anthropic',
model, model,
@ -122,21 +157,15 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
caller: caller ?? 'claude-code', caller: caller ?? 'claude-code',
project, project,
team, team,
input_hash: hashContent(model), input_hash: hashContent(model),
output_hash: '', output_hash: '',
}).catch(() => { /* best effort */ }); }).catch(() => { /* best effort */ });
logger.warn({ model, status: upstream.status }, 'Anthropic upstream error');
} }
// ── Return Anthropic response as-is ────────────────────────────────────
reply.code(upstream.status); reply.code(upstream.status);
reply.header('Content-Type', 'application/json'); reply.header('Content-Type', 'application/json');
const reqId = upstream.headers.get('request-id');
// Pass through useful Anthropic response headers if (reqId) reply.header('request-id', reqId);
const anthropicReqId = upstream.headers.get('request-id');
if (anthropicReqId) reply.header('request-id', anthropicReqId);
return responseBody; return responseBody;
}); });

View File

@ -2,6 +2,7 @@ import type { FastifyInstance } from 'fastify';
import { createTicket, hashContent } from '../tickets/ticket-service.js'; import { createTicket, hashContent } from '../tickets/ticket-service.js';
import { logger } from '../observability/logger.js'; import { logger } from '../observability/logger.js';
import { config } from '../config.js'; import { config } from '../config.js';
import { compressOpenAIMessages } from '../compression/message-compressor.js';
// OpenAI pricing per 1M tokens (USD) // OpenAI pricing per 1M tokens (USD)
const OPENAI_PRICING: Record<string, { input: number; output: number; cached?: number }> = { const OPENAI_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
@ -12,11 +13,12 @@ const OPENAI_PRICING: Record<string, { input: number; output: number; cached?: n
'gpt-3.5-turbo': { input: 0.50, output: 1.50 }, 'gpt-3.5-turbo': { input: 0.50, output: 1.50 },
'o1': { input: 15.0, output: 60.0 }, 'o1': { input: 15.0, output: 60.0 },
'o1-mini': { input: 3.0, output: 12.0 }, 'o1-mini': { input: 3.0, output: 12.0 },
'o3': { input: 10.0, output: 40.0 },
'o3-mini': { input: 1.10, output: 4.40 }, 'o3-mini': { input: 1.10, output: 4.40 },
'o4-mini': { input: 1.10, output: 4.40 },
}; };
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number { function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
// Match model prefix (e.g. "gpt-4o-2024-11-20" → "gpt-4o")
const key = Object.keys(OPENAI_PRICING).find(k => model === k || model.startsWith(k + '-')) ?? null; const key = Object.keys(OPENAI_PRICING).find(k => model === k || model.startsWith(k + '-')) ?? null;
if (!key) return 0; if (!key) return 0;
const pricing = OPENAI_PRICING[key]!; const pricing = OPENAI_PRICING[key]!;
@ -26,23 +28,15 @@ function calcCost(model: string, inputTokens: number, outputTokens: number, cach
return Math.max(0, inputCost + outputCost + cacheCost); return Math.max(0, inputCost + outputCost + cacheCost);
} }
/**
* OpenAI API passthrough enables OPENAI_BASE_URL routing.
*
* Any OpenAI SDK client routes through here when
* OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set.
* Forwards to OpenAI with the client's API key and creates a TokenVault ticket.
*/
export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> { export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
// ─── POST /v1/chat/completions — OpenAI Chat API passthrough ───────────── // ─── POST /v1/chat/completions ────────────────────────────────────────────
app.post('/v1/chat/completions', async (req, reply) => { app.post('/v1/chat/completions', async (req, reply) => {
const body = req.body as Record<string, unknown>; const body = req.body as Record<string, unknown>;
const model = (body['model'] as string) ?? 'gpt-4o'; const model = (body['model'] as string) ?? 'gpt-4o';
// Use client's API key, fall back to configured key
const authHeader = req.headers['authorization'] as string | undefined; const authHeader = req.headers['authorization'] as string | undefined;
const apiKey = authHeader?.replace(/^Bearer /, '') const apiKey = authHeader?.replace(/^Bearer\s+/i, '')
|| config.providers.openai?.apiKey || config.providers.openai?.apiKey
|| ''; || '';
@ -51,63 +45,66 @@ export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
const team = req.headers['x-tokenvault-team'] as string | undefined; const team = req.headers['x-tokenvault-team'] as string | undefined;
const orgId = req.headers['openai-organization'] as string | undefined; const orgId = req.headers['openai-organization'] as string | undefined;
const start = Date.now(); // ── Proxy-side compression ───────────────────────────────────────────
const rawMessages = (body['messages'] as Array<{ role: string; content: string | null }>) ?? [];
const { messages: cMsgs, totalTokensSaved: compressionSaved } = compressOpenAIMessages(rawMessages);
const compressedBody = { ...body, messages: cMsgs };
// ── Forward to OpenAI ───────────────────────────────────────────────────
const forwardHeaders: Record<string, string> = { const forwardHeaders: Record<string, string> = {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`, 'Authorization': `Bearer ${apiKey}`,
}; };
if (orgId) forwardHeaders['OpenAI-Organization'] = orgId; if (orgId) forwardHeaders['OpenAI-Organization'] = orgId;
const start = Date.now();
const upstream = await fetch('https://api.openai.com/v1/chat/completions', { const upstream = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST', method: 'POST',
headers: forwardHeaders, headers: forwardHeaders,
body: JSON.stringify(body), body: JSON.stringify(compressedBody),
}); });
const latency = Date.now() - start; const latency = Date.now() - start;
const responseBody = await upstream.json() as Record<string, unknown>; const responseBody = await upstream.json() as Record<string, unknown>;
// ── Track as TokenVault ticket ──────────────────────────────────────────
if (upstream.ok) { if (upstream.ok) {
const usage = responseBody['usage'] as { const usage = responseBody['usage'] as {
prompt_tokens?: number; prompt_tokens?: number;
completion_tokens?: number; completion_tokens?: number;
total_tokens?: number;
prompt_tokens_details?: { cached_tokens?: number }; prompt_tokens_details?: { cached_tokens?: number };
} | undefined; } | undefined;
if (usage) { if (usage) {
const inputTokens = usage.prompt_tokens ?? 0; const inputTokens = usage.prompt_tokens ?? 0;
const outputTokens = usage.completion_tokens ?? 0; const outputTokens = usage.completion_tokens ?? 0;
const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0; const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens); const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
const messages = (body['messages'] as Array<{ role: string; content: string }>) ?? []; const inputText = rawMessages.map(m => m.content ?? '').join('\n');
const inputText = messages.map(m => m.content).join('\n'); const choices = (responseBody['choices'] as Array<{ message?: { content?: string } }>) ?? [];
const choices = (responseBody['choices'] as Array<{ message?: { content?: string } }>) ?? [];
const outputText = choices.map(c => c.message?.content ?? '').join(''); const outputText = choices.map(c => c.message?.content ?? '').join('');
createTicket({ createTicket({
provider: 'openai', provider: 'openai',
model, model,
status: 'completed', status: 'completed',
tokens_in: inputTokens, tokens_in: inputTokens,
tokens_out: outputTokens, tokens_out: outputTokens,
tokens_cached: cachedTokens, tokens_cached: cachedTokens,
tokens_saved: 0, tokens_saved: compressionSaved,
cost_usd: cost, cost_usd: cost,
latency_ms: latency, latency_ms: latency,
cache_hit: cachedTokens > 0, cache_hit: cachedTokens > 0,
caller: caller ?? 'openai-sdk', caller: caller ?? 'openai-sdk',
project, project,
team, team,
input_hash: hashContent(inputText), input_hash: hashContent(inputText),
output_hash: hashContent(outputText), output_hash: hashContent(outputText),
}).catch(err => logger.warn({ err }, 'Failed to create ticket for OpenAI passthrough')); }).catch(err => logger.warn({ err }, 'Failed to create ticket for OpenAI passthrough'));
} }
} else { } else {
logger.warn({ model, status: upstream.status }, 'OpenAI upstream error');
createTicket({ createTicket({
provider: 'openai', provider: 'openai',
model, model,
@ -119,29 +116,20 @@ export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
caller: caller ?? 'openai-sdk', caller: caller ?? 'openai-sdk',
project, project,
team, team,
input_hash: hashContent(model), input_hash: hashContent(model),
output_hash: '', output_hash: '',
}).catch(() => { /* best effort */ }); }).catch(() => { /* best effort */ });
logger.warn({ model, status: upstream.status }, 'OpenAI upstream error');
} }
// ── Return OpenAI response as-is + TokenVault headers ──────────────────
reply.code(upstream.status); reply.code(upstream.status);
reply.header('Content-Type', 'application/json'); reply.header('Content-Type', 'application/json');
if (upstream.ok) {
const ticket_num = (responseBody['tokenvault'] as Record<string, unknown> | undefined)?.['ticket_number'];
if (ticket_num) reply.header('X-TokenVault-Ticket', String(ticket_num));
}
return responseBody; return responseBody;
}); });
// ─── GET /v1/models — OpenAI models list passthrough ──────────────────── // ─── GET /v1/models ────────────────────────────────────────────────────────
app.get('/v1/models', async (req, reply) => { app.get('/v1/models', async (req, reply) => {
const authHeader = req.headers['authorization'] as string | undefined; const authHeader = req.headers['authorization'] as string | undefined;
const apiKey = authHeader?.replace(/^Bearer /, '') || config.providers.openai?.apiKey || ''; const apiKey = authHeader?.replace(/^Bearer\s+/i, '') || config.providers.openai?.apiKey || '';
const upstream = await fetch('https://api.openai.com/v1/models', { const upstream = await fetch('https://api.openai.com/v1/models', {
headers: { 'Authorization': `Bearer ${apiKey}` }, headers: { 'Authorization': `Bearer ${apiKey}` },
@ -151,4 +139,5 @@ export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
reply.header('Content-Type', 'application/json'); reply.header('Content-Type', 'application/json');
return upstream.json(); return upstream.json();
}); });
} }

View File

@ -0,0 +1,393 @@
/**
* TokenVault MCP Compression Library
*
* Implements lean-ctx-style file compression modes + RTK-style shell
* output compression all inline so the MCP server has zero extra deps.
*
* lean-ctx modes: full | signatures | map | aggressive | entropy | lines
* RTK patterns: git | npm | cargo | docker | general
*/
// ─── Shared primitives ────────────────────────────────────────────────────────
/** Strip ANSI escape codes */
export function stripAnsi(s: string): string {
// eslint-disable-next-line no-control-regex
return s.replace(/\x1B\[[0-9;]*[mGKHF]/g, '').replace(/\x1B\[[0-9]*[ABCD]/g, '');
}
/** Collapse 3+ blank lines → 1 */
function collapseBlankLines(s: string): string {
return s.replace(/\n{3,}/g, '\n\n');
}
/** Trim trailing whitespace per line */
function trimLines(s: string): string {
return s.replace(/[ \t]+$/gm, '');
}
/** Basic safe compression: whitespace + blank lines (preserves code blocks) */
export function compressBasic(input: string): string {
if (!input) return input;
const lines = input.split('\n');
const out: string[] = [];
let inCode = false;
let blanks = 0;
for (const raw of lines) {
const t = raw.trim();
if (/^(`{3,}|~{3,})/.test(t)) { inCode = !inCode; out.push(raw); blanks = 0; continue; }
if (inCode) { out.push(raw); continue; }
if (t === '') { blanks++; if (blanks <= 1) out.push(''); continue; }
blanks = 0;
out.push(raw.trimEnd());
}
return out.join('\n').trim();
}
/** Estimate token count (1 token ≈ 4 chars) */
export function estimateTokens(s: string): number {
return Math.ceil(s.length / 4);
}
// ─── lean-ctx Compression Modes ───────────────────────────────────────────────
/** Extract function/class/interface signatures from source code */
function extractSignatures(content: string, ext: string): string {
const lines = content.split('\n');
const sigs: string[] = [];
// Signature patterns by language family
const patterns: RegExp[] = [];
if (['.ts', '.tsx', '.js', '.jsx', '.mts', '.mjs'].includes(ext)) {
patterns.push(
/^(export\s+)?(async\s+)?function[\s*]+\w+/,
/^(export\s+)?(abstract\s+)?class\s+\w+/,
/^(export\s+)?interface\s+\w+/,
/^(export\s+)?type\s+\w+\s*=/,
/^(export\s+)?enum\s+\w+/,
/^(export\s+)?(const|let|var)\s+\w+\s*[:=]/,
/^\s+(public|private|protected|static|abstract|async|readonly)\s+\w+/,
/^\s+(async\s+)?\w+\s*\([^)]*\)\s*[:{\-=]/,
);
} else if (['.py'].includes(ext)) {
patterns.push(
/^(async\s+)?def\s+\w+/,
/^class\s+\w+/,
/^@\w+/,
);
} else if (['.go'].includes(ext)) {
patterns.push(
/^func\s+/,
/^type\s+\w+\s+(struct|interface)/,
/^var\s+|^const\s+/,
);
} else if (['.rs'].includes(ext)) {
patterns.push(
/^(pub\s+)?(async\s+)?fn\s+\w+/,
/^(pub\s+)?(struct|enum|trait|impl|type|mod)\s+\w+/,
);
} else if (['.java', '.kt'].includes(ext)) {
patterns.push(
/^\s*(public|private|protected|static|final|abstract|override)\s+/,
/^(class|interface|enum|record|object)\s+\w+/,
);
}
// Fallback: anything that looks like a declaration
if (patterns.length === 0) {
patterns.push(/^(function|class|def|fn|func|type|interface|const|let|var|export)\s+/);
}
let inComment = false;
let lineNum = 0;
for (const line of lines) {
lineNum++;
const t = line.trim();
if (t.startsWith('/*') || t.startsWith('/**')) { inComment = true; continue; }
if (inComment) { if (t.includes('*/')) inComment = false; continue; }
if (t.startsWith('//') || t.startsWith('#')) continue;
if (t === '' || t === '{' || t === '}') continue;
if (patterns.some(p => p.test(line))) {
sigs.push(`L${lineNum}: ${line.trimEnd()}`);
}
}
return sigs.length > 0
? `// Signatures (${sigs.length} found, ${lines.length} total lines)\n${sigs.join('\n')}`
: compressBasic(content);
}
/** Map mode: imports + exports + top-level structure */
function extractMap(content: string, ext: string): string {
const lines = content.split('\n');
const imports: string[] = [];
const exports: string[] = [];
const structs: string[] = [];
let lineNum = 0;
for (const line of lines) {
lineNum++;
const t = line.trim();
if (!t) continue;
if (/^import\s/.test(t) || /^from\s+['"]/.test(t) || /^require\s*\(/.test(t)) {
imports.push(line.trimEnd());
} else if (/^export\s+(default\s+)?(function|class|const|let|var|type|interface|enum|async)/.test(t)) {
exports.push(`L${lineNum}: ${t.slice(0, 80)}`);
} else if (/^(function|class|interface|type|enum|struct|impl|trait|def\s|fn\s|func\s)/.test(t)) {
structs.push(`L${lineNum}: ${t.slice(0, 80)}`);
}
}
const parts: string[] = [`// File map (${lines.length} lines, ext=${ext})`];
if (imports.length) parts.push(`\n// IMPORTS (${imports.length})\n${imports.join('\n')}`);
if (exports.length) parts.push(`\n// EXPORTS (${exports.length})\n${exports.join('\n')}`);
if (structs.length) parts.push(`\n// DECLARATIONS (${structs.length})\n${structs.join('\n')}`);
return parts.join('\n');
}
/** Aggressive mode: strip comments, blank lines, docstrings — keep logic only */
function compressAggressive(content: string): string {
const lines = content.split('\n');
const out: string[] = [];
let inCode = false;
let inJsDoc = false;
for (const raw of lines) {
const t = raw.trim();
if (/^(`{3,}|~{3,})/.test(t)) { inCode = !inCode; out.push(raw); continue; }
if (inCode) { out.push(raw); continue; }
// JSDoc blocks
if (t.startsWith('/**')) { inJsDoc = true; continue; }
if (inJsDoc) { if (t.includes('*/')) inJsDoc = false; continue; }
// Single-line comments
if (t.startsWith('//') || t.startsWith('#!') || (t.startsWith('#') && !t.startsWith('#!'))) continue;
// Block comments
if (t.startsWith('/*')) continue;
if (t === '') continue;
out.push(raw.trimEnd());
}
return out.join('\n');
}
/** Entropy-based auto mode selection */
function detectMode(content: string, ext: string): 'signatures' | 'aggressive' | 'full' {
const codeExts = ['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.kt', '.cpp', '.c', '.h'];
if (!codeExts.includes(ext)) return 'full';
const lines = content.split('\n');
const codeLines = lines.filter(l => {
const t = l.trim();
return t.length > 0 && !t.startsWith('//') && !t.startsWith('#') && !t.startsWith('*');
});
const commentRatio = 1 - (codeLines.length / Math.max(lines.length, 1));
// High comment ratio → aggressive removes them; low density → signatures
if (commentRatio > 0.3) return 'aggressive';
if (content.length > 5000) return 'signatures';
return 'full';
}
/** Apply a lean-ctx compression mode to file content */
export function applyMode(
content: string,
ext: string,
mode: 'full' | 'signatures' | 'map' | 'aggressive' | 'entropy',
): string {
switch (mode) {
case 'signatures': return extractSignatures(content, ext);
case 'map': return extractMap(content, ext);
case 'aggressive': return compressAggressive(content);
case 'entropy': return applyMode(content, ext, detectMode(content, ext));
case 'full':
default: return compressBasic(content);
}
}
// ─── RTK Shell Compression Patterns ─────────────────────────────────────────
function compressGitLog(output: string): string {
const lines = output.split('\n');
const out: string[] = [];
for (const line of lines) {
const t = line.trim();
// Keep commit hash lines (short summary)
if (/^commit [a-f0-9]{40}/.test(t)) {
out.push(line.slice(0, 15) + '...');
continue;
}
// Keep first message line (skip Author/Date/blank)
if (/^Author:|^Date:|^Merge:/.test(t)) continue;
if (t) out.push(line.trimEnd());
}
return out.join('\n');
}
function compressGitStatus(output: string): string {
// Count file groups, don't list every file
const lines = output.split('\n');
const staged: string[] = [], changed: string[] = [], untracked: string[] = [];
for (const l of lines) {
if (/^\s+modified:|^\s+new file:|^\s+deleted:/.test(l)) staged.push(l.trim());
else if (/^\s+M\s|^\s+A\s|^\s+D\s/.test(l)) changed.push(l.trim());
else if (/^\?\?/.test(l)) untracked.push(l.slice(3).trim());
}
const parts: string[] = [];
if (staged.length) parts.push(`Staged (${staged.length}):\n ${staged.slice(0, 5).join('\n ')}${staged.length > 5 ? `\n ...+${staged.length - 5} more` : ''}`);
if (changed.length) parts.push(`Changed (${changed.length}):\n ${changed.slice(0, 5).join('\n ')}${changed.length > 5 ? `\n ...+${changed.length - 5} more` : ''}`);
if (untracked.length) parts.push(`Untracked (${untracked.length}):\n ${untracked.slice(0, 3).join('\n ')}${untracked.length > 3 ? `\n ...+${untracked.length - 3} more` : ''}`);
return parts.join('\n\n') || output;
}
function compressGitDiff(output: string): string {
// Show file names + line counts only, not full diff
const lines = output.split('\n');
const files: string[] = [];
let current = '';
let adds = 0, dels = 0;
const flush = () => { if (current) files.push(`${current} (+${adds} -${dels})`); };
for (const l of lines) {
if (l.startsWith('diff --git ')) { flush(); current = l.replace('diff --git a/', ''); adds = 0; dels = 0; }
else if (l.startsWith('+') && !l.startsWith('+++')) adds++;
else if (l.startsWith('-') && !l.startsWith('---')) dels++;
}
flush();
return files.length ? `Changed files:\n${files.join('\n')}` : output;
}
function compressNpm(output: string): string {
const lines = output.split('\n').map(l => stripAnsi(l));
const keep: string[] = [];
for (const l of lines) {
const t = l.trim();
// Keep summary lines, skip individual package installs
if (/^added \d+|^changed \d+|^found \d+|^npm warn|^npm error|ERROR|WARN|^\s*\d+ package/.test(t)) keep.push(t);
else if (/^up to date|^audited/.test(t)) keep.push(t);
}
return keep.length ? keep.join('\n') : lines.slice(0, 20).join('\n');
}
function compressCargo(output: string): string {
const lines = output.split('\n').map(l => stripAnsi(l));
const keep: string[] = [];
for (const l of lines) {
const t = l.trim();
// Keep errors, warnings summary, Finished line — skip Compiling/Checking individual crates
if (/^error|^warning\[|Finished|^ = |^ -->/.test(t)) keep.push(t);
else if (/^warning: .+ warnings? emitted/.test(t)) keep.push(t);
}
return keep.length ? keep.join('\n') : lines.filter(l => l.trim()).slice(0, 30).join('\n');
}
function compressDocker(output: string): string {
const lines = output.split('\n').map(l => stripAnsi(l));
const keep: string[] = [];
for (const l of lines) {
const t = l.trim();
if (/^Step \d+|^STEP \d+|^Successfully|^ERROR|^FROM|=> \[/.test(t)) keep.push(t);
}
return keep.length ? keep.join('\n') : lines.filter(l => l.trim()).slice(0, 30).join('\n');
}
function compressLs(output: string): string {
// ls -la → just filenames + sizes
const lines = output.split('\n');
const files: string[] = [];
for (const l of lines) {
if (!l.trim() || l.startsWith('total')) continue;
const parts = l.split(/\s+/);
if (parts.length >= 9) {
const size = parts[4] ?? '';
const name = parts.slice(8).join(' ');
const isDir = l.startsWith('d');
files.push(`${isDir ? '/' : ' '}${name} (${size})`);
}
}
return files.length ? files.join('\n') : output;
}
function compressGeneral(output: string): string {
const lines = output.split('\n').map(l => stripAnsi(l));
// Deduplicate consecutive identical lines
const out: string[] = [];
let prev = '';
let dupCount = 0;
for (const l of lines) {
if (l === prev) { dupCount++; continue; }
if (dupCount > 0) { out.push(` [×${dupCount + 1} repeated]`); dupCount = 0; }
prev = l;
out.push(l.trimEnd());
}
if (dupCount > 0) out.push(` [×${dupCount + 1} repeated]`);
return collapseBlankLines(out.join('\n'));
}
/** RTK-style shell output compression — detects command type and applies appropriate pattern */
export function compressShellOutput(command: string, output: string): {
text: string;
originalTokens: number;
compressedTokens: number;
savedTokens: number;
method: string;
} {
const stripped = stripAnsi(output);
const originalTokens = estimateTokens(stripped);
const cmd = command.trim().toLowerCase();
let compressed = stripped;
let method = 'general';
if (/^git log/.test(cmd)) { compressed = compressGitLog(stripped); method = 'git-log'; }
else if (/^git status/.test(cmd)) { compressed = compressGitStatus(stripped); method = 'git-status'; }
else if (/^git diff/.test(cmd)) { compressed = compressGitDiff(stripped); method = 'git-diff'; }
else if (/^git /.test(cmd)) { compressed = compressGeneral(stripped); method = 'git-general'; }
else if (/^npm /.test(cmd)) { compressed = compressNpm(stripped); method = 'npm'; }
else if (/^cargo /.test(cmd)) { compressed = compressCargo(stripped); method = 'cargo'; }
else if (/^docker /.test(cmd)) { compressed = compressDocker(stripped); method = 'docker'; }
else if (/^ls\b/.test(cmd)) { compressed = compressLs(stripped); method = 'ls'; }
else { compressed = compressGeneral(stripped); method = 'general'; }
const compressedTokens = estimateTokens(compressed);
const savedTokens = Math.max(0, originalTokens - compressedTokens);
return { text: compressed, originalTokens, compressedTokens, savedTokens, method };
}
// ─── Generic compress-any-text ────────────────────────────────────────────────
export interface CompressStats {
originalChars: number;
compressedChars: number;
originalTokens: number;
compressedTokens: number;
savedTokens: number;
savingsPct: number;
}
export function compressAny(text: string, mode: 'basic' | 'aggressive' | 'shell' = 'basic'): {
text: string;
stats: CompressStats;
} {
const originalChars = text.length;
const originalTokens = estimateTokens(text);
let compressed: string;
if (mode === 'aggressive') compressed = compressAggressive(text);
else if (mode === 'shell') compressed = compressShellOutput('', text).text;
else compressed = compressBasic(text);
const compressedChars = compressed.length;
const compressedTokens = estimateTokens(compressed);
const savedTokens = Math.max(0, originalTokens - compressedTokens);
const savingsPct = originalTokens > 0 ? (savedTokens / originalTokens) * 100 : 0;
return {
text: compressed,
stats: { originalChars, compressedChars, originalTokens, compressedTokens, savedTokens, savingsPct },
};
}

View File

@ -1,6 +1,10 @@
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { z } from 'zod'; import { z } from 'zod';
import { readFile } from 'node:fs/promises';
import { execSync } from 'node:child_process';
import { extname, resolve } from 'node:path';
import { applyMode, compressShellOutput, compressAny, estimateTokens } from './compression.js';
const CORE_URL = process.env['TOKENVAULT_URL'] ?? 'http://localhost:3300'; const CORE_URL = process.env['TOKENVAULT_URL'] ?? 'http://localhost:3300';
@ -15,6 +19,188 @@ const server = new McpServer({
version: '0.1.0', version: '0.1.0',
}); });
// ─── tv_read: File reader with lean-ctx compression modes ────────────────────
server.tool(
'tv_read',
`Read a file with automatic token compression. Use INSTEAD of native Read for significant token savings.
Modes:
full Basic whitespace normalization (safe for all files, ~10-20% savings)
signatures Function/class/interface signatures only (~70% savings, great for exploring)
map Imports + exports + top-level structure (~60% savings, great for navigation)
aggressive Remove all comments, blank lines, docstrings (~40% savings, keep logic only)
entropy Auto-select mode based on file type and size (recommended default)
lines Read specific line range (use with 'lines' parameter)`,
{
path: z.string().describe('File path to read (absolute or relative)'),
mode: z.enum(['full', 'signatures', 'map', 'aggressive', 'entropy', 'lines'])
.optional()
.describe('Compression mode (default: entropy)'),
lines: z.string().optional().describe('Line range for lines mode, e.g. "10-50" or "100-200"'),
},
async ({ path: filePath, mode = 'entropy', lines }) => {
const absPath = resolve(filePath);
let content = await readFile(absPath, 'utf-8');
const ext = extname(absPath).toLowerCase();
const originalTokens = estimateTokens(content);
// Line range selection
if (mode === 'lines' || lines) {
const range = lines ?? '1-100';
const [startStr, endStr] = range.split('-');
const start = Math.max(1, parseInt(startStr ?? '1', 10));
const end = parseInt(endStr ?? String(start + 99), 10);
const allLines = content.split('\n');
content = allLines.slice(start - 1, end).join('\n');
const compTokens = estimateTokens(content);
return {
content: [{
type: 'text' as const,
text: `[tv_read: ${filePath} lines ${start}-${end} | ${compTokens} tokens]\n\n${content}`,
}],
};
}
const compressed = applyMode(content, ext, mode as 'full' | 'signatures' | 'map' | 'aggressive' | 'entropy');
const compressedTokens = estimateTokens(compressed);
const savedTokens = Math.max(0, originalTokens - compressedTokens);
const savingsPct = originalTokens > 0 ? ((savedTokens / originalTokens) * 100).toFixed(0) : '0';
return {
content: [{
type: 'text' as const,
text: `[tv_read: ${filePath} | mode=${mode} | ${compressedTokens}/${originalTokens} tokens (${savingsPct}% saved)]\n\n${compressed}`,
}],
};
},
);
// ─── tv_shell: Shell runner with RTK-style output compression ─────────────────
server.tool(
'tv_shell',
`Run a shell command and compress the output using RTK-style patterns.
Saves 60-90% tokens on git/npm/cargo/docker output.
Handles: git log/status/diff, npm install/audit, cargo build/test, docker build/ps, ls -la.`,
{
command: z.string().describe('Shell command to run'),
compress: z.boolean().optional().describe('Apply RTK compression (default: true)'),
cwd: z.string().optional().describe('Working directory (default: current directory)'),
timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
},
async ({ command, compress = true, cwd, timeout = 30_000 }) => {
let rawOutput: string;
try {
rawOutput = execSync(command, {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
cwd: cwd ? resolve(cwd) : process.cwd(),
maxBuffer: 10 * 1024 * 1024,
timeout,
});
} catch (err: unknown) {
const e = err as { stdout?: string; stderr?: string; message?: string };
rawOutput = [e.stdout, e.stderr].filter(Boolean).join('\n') || String(err);
}
if (!compress) {
return { content: [{ type: 'text' as const, text: rawOutput }] };
}
const result = compressShellOutput(command, rawOutput);
const pct = result.originalTokens > 0
? ((result.savedTokens / result.originalTokens) * 100).toFixed(0)
: '0';
return {
content: [{
type: 'text' as const,
text: `[tv_shell: ${command} | method=${result.method} | ${result.compressedTokens}/${result.originalTokens} tokens (${pct}% saved)]\n\n${result.text}`,
}],
};
},
);
// ─── tv_search: Code search with compact results ──────────────────────────────
server.tool(
'tv_search',
`Search code/files using ripgrep with compact, token-efficient results.
Use INSTEAD of native Grep for token savings.`,
{
pattern: z.string().describe('Regex pattern to search for'),
path: z.string().optional().describe('Directory or file to search in (default: current dir)'),
glob: z.string().optional().describe('File glob filter, e.g. "*.ts" or "src/**/*.py"'),
context: z.number().optional().describe('Lines of context around each match (default: 0)'),
max_results: z.number().optional().describe('Maximum number of results (default: 50)'),
case_insensitive: z.boolean().optional().describe('Case-insensitive search (default: false)'),
},
async ({ pattern, path: searchPath, glob, context = 0, max_results = 50, case_insensitive }) => {
const target = searchPath ? resolve(searchPath) : process.cwd();
const args = [
'rg',
'--no-heading',
'-n',
case_insensitive ? '-i' : '',
context > 0 ? `-C ${context}` : '',
glob ? `--glob "${glob}"` : '',
`--max-count ${max_results}`,
`"${pattern.replace(/"/g, '\\"')}"`,
`"${target}"`,
].filter(Boolean);
let rawOutput: string;
try {
rawOutput = execSync(args.join(' '), {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
maxBuffer: 5 * 1024 * 1024,
});
} catch (err: unknown) {
const e = err as { stdout?: string };
rawOutput = e.stdout ?? 'No matches found';
}
const lines = rawOutput.split('\n').filter(l => l.trim());
const originalTokens = estimateTokens(rawOutput);
const compactLines = lines.slice(0, max_results);
const compacted = compactLines.join('\n');
const compressedTokens = estimateTokens(compacted);
const truncated = lines.length > max_results ? `\n[...${lines.length - max_results} more matches]` : '';
return {
content: [{
type: 'text' as const,
text: `[tv_search: "${pattern}" in ${target} | ${compressedTokens}/${originalTokens} tokens | ${compactLines.length} matches]\n\n${compacted}${truncated}`,
}],
};
},
);
// ─── tv_compress: Compress any text on demand ─────────────────────────────────
server.tool(
'tv_compress',
`Compress any text to reduce token count before sending to any LLM.
Useful for compressing pasted code, logs, documents, or tool output.`,
{
text: z.string().describe('Text to compress'),
mode: z.enum(['basic', 'aggressive', 'shell'])
.optional()
.describe('Compression mode: basic (safe), aggressive (max), shell (RTK patterns)'),
},
async ({ text, mode = 'basic' }) => {
const { text: compressed, stats } = compressAny(text, mode);
return {
content: [{
type: 'text' as const,
text: [
`[tv_compress: ${stats.savedTokens} tokens saved (${stats.savingsPct.toFixed(0)}%) | ${stats.compressedTokens}/${stats.originalTokens} tokens | mode=${mode}]`,
'',
compressed,
].join('\n'),
}],
};
},
);
// ─── tv_ticket: View and search tickets ────────────────────────────────────── // ─── tv_ticket: View and search tickets ──────────────────────────────────────
server.tool( server.tool(
'tv_ticket', 'tv_ticket',
@ -32,12 +218,10 @@ server.tool(
const ticket = await fetchCore(`/v1/tickets/${id}`); const ticket = await fetchCore(`/v1/tickets/${id}`);
return { content: [{ type: 'text' as const, text: JSON.stringify(ticket, null, 2) }] }; return { content: [{ type: 'text' as const, text: JSON.stringify(ticket, null, 2) }] };
} }
if (action === 'stats') { if (action === 'stats') {
const stats = await fetchCore(`/v1/tickets/stats?period=${period ?? 'today'}`); const stats = await fetchCore(`/v1/tickets/stats?period=${period ?? 'today'}`);
return { content: [{ type: 'text' as const, text: JSON.stringify(stats, null, 2) }] }; return { content: [{ type: 'text' as const, text: JSON.stringify(stats, null, 2) }] };
} }
const params = new URLSearchParams(); const params = new URLSearchParams();
if (provider) params.set('provider', provider); if (provider) params.set('provider', provider);
if (project) params.set('project', project); if (project) params.set('project', project);
@ -60,11 +244,8 @@ server.tool(
fetchCore(`/v1/cost?period=${period ?? 'month'}`), fetchCore(`/v1/cost?period=${period ?? 'month'}`),
group_by ? fetchCore(`/v1/cost/breakdown?group_by=${group_by}`) : Promise.resolve(null), group_by ? fetchCore(`/v1/cost/breakdown?group_by=${group_by}`) : Promise.resolve(null),
]); ]);
const parts = [`# Cost Summary (${period ?? 'month'})\n${JSON.stringify(summary, null, 2)}`]; const parts = [`# Cost Summary (${period ?? 'month'})\n${JSON.stringify(summary, null, 2)}`];
if (breakdown) { if (breakdown) parts.push(`\n# Breakdown by ${group_by}\n${JSON.stringify(breakdown, null, 2)}`);
parts.push(`\n# Breakdown by ${group_by}\n${JSON.stringify(breakdown, null, 2)}`);
}
return { content: [{ type: 'text' as const, text: parts.join('\n') }] }; return { content: [{ type: 'text' as const, text: parts.join('\n') }] };
}, },
); );
@ -72,11 +253,111 @@ server.tool(
// ─── tv_health: Service health ─────────────────────────────────────────────── // ─── tv_health: Service health ───────────────────────────────────────────────
server.tool( server.tool(
'tv_health', 'tv_health',
'Check TokenVault service health and configured providers.', 'Check TokenVault service health, configured providers, and tool versions (lean-ctx, RTK).',
{}, {},
async () => { async () => {
const health = await fetchCore('/health'); const health = await fetchCore('/health');
return { content: [{ type: 'text' as const, text: JSON.stringify(health, null, 2) }] };
// Check lean-ctx version
let leanCtxVersion = 'unknown';
try {
leanCtxVersion = execSync('lean-ctx --version 2>/dev/null || echo unknown', {
encoding: 'utf-8', timeout: 5000,
}).trim();
} catch { /* ignore */ }
// Check RTK version
let rtkVersion = 'unknown';
try {
rtkVersion = execSync('rtk --version 2>/dev/null || echo unknown', {
encoding: 'utf-8', timeout: 5000,
}).trim();
} catch { /* ignore */ }
return {
content: [{
type: 'text' as const,
text: JSON.stringify({
...(health as object),
compression_tools: {
'lean-ctx': leanCtxVersion,
rtk: rtkVersion,
},
}, null, 2),
}],
};
},
);
// ─── tv_update: Auto-update lean-ctx and RTK ─────────────────────────────────
server.tool(
'tv_update',
`Check for and install updates to lean-ctx (cargo) and RTK (homebrew).
Run this periodically to keep compression tools up to date.`,
{
dry_run: z.boolean().optional().describe('Check for updates without installing (default: false)'),
tool: z.enum(['all', 'lean-ctx', 'rtk']).optional().describe('Which tool to update (default: all)'),
},
async ({ dry_run = false, tool = 'all' }) => {
const results: Record<string, string> = {};
if (tool === 'all' || tool === 'lean-ctx') {
try {
// Check current version
const currentVersion = execSync('lean-ctx --version 2>/dev/null || echo none', {
encoding: 'utf-8', timeout: 5000,
}).trim();
results['lean-ctx-current'] = currentVersion;
if (!dry_run) {
// cargo install always installs latest from crates.io
const installOut = execSync('cargo install lean-ctx 2>&1', {
encoding: 'utf-8', timeout: 300_000, // 5min for compile
});
const newVersion = execSync('lean-ctx --version 2>/dev/null || echo unknown', {
encoding: 'utf-8', timeout: 5000,
}).trim();
results['lean-ctx-new'] = newVersion;
results['lean-ctx-status'] = currentVersion === newVersion ? 'already-latest' : 'updated';
results['lean-ctx-output'] = installOut.slice(-200); // last 200 chars of output
} else {
results['lean-ctx-status'] = 'dry-run — would run: cargo install lean-ctx';
}
} catch (err) {
results['lean-ctx-error'] = String(err).slice(0, 200);
}
}
if (tool === 'all' || tool === 'rtk') {
try {
const currentVersion = execSync('rtk --version 2>/dev/null || echo none', {
encoding: 'utf-8', timeout: 5000,
}).trim();
results['rtk-current'] = currentVersion;
if (!dry_run) {
const upgradeOut = execSync('brew upgrade rtk 2>&1 || brew install rtk 2>&1', {
encoding: 'utf-8', timeout: 120_000,
});
const newVersion = execSync('rtk --version 2>/dev/null || echo unknown', {
encoding: 'utf-8', timeout: 5000,
}).trim();
results['rtk-new'] = newVersion;
results['rtk-status'] = upgradeOut.includes('already installed') ? 'already-latest' : 'updated';
} else {
results['rtk-status'] = 'dry-run — would run: brew upgrade rtk';
}
} catch (err) {
results['rtk-error'] = String(err).slice(0, 200);
}
}
return {
content: [{
type: 'text' as const,
text: `# tv_update results\n${JSON.stringify(results, null, 2)}`,
}],
};
}, },
); );

View File

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>org.tokenvault.update-compression-tools</string>
<key>ProgramArguments</key>
<array>
<string>/bin/bash</string>
<string>/Users/renefichtmueller/Desktop/Claude Code/tokenvault/scripts/update-compression-tools.sh</string>
</array>
<!-- Run daily at 03:00 AM -->
<key>StartCalendarInterval</key>
<dict>
<key>Hour</key>
<integer>3</integer>
<key>Minute</key>
<integer>0</integer>
</dict>
<key>StandardOutPath</key>
<string>/Users/renefichtmueller/Library/Logs/tokenvault-update.log</string>
<key>StandardErrorPath</key>
<string>/Users/renefichtmueller/Library/Logs/tokenvault-update-error.log</string>
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/Users/renefichtmueller/.cargo/bin</string>
<key>HOME</key>
<string>/Users/renefichtmueller</string>
</dict>
<key>RunAtLoad</key>
<false/>
</dict>
</plist>

View File

@ -0,0 +1,60 @@
#!/usr/bin/env bash
# TokenVault — Auto-update lean-ctx (cargo) and RTK (homebrew)
# Runs daily via LaunchAgent: ~/Library/LaunchAgents/org.tokenvault.update-compression-tools.plist
#
# Logs: ~/Library/Logs/tokenvault-update.log
set -euo pipefail
LOG="$HOME/Library/Logs/tokenvault-update.log"
DATE=$(date '+%Y-%m-%d %H:%M:%S')
log() { echo "[$DATE] $*" | tee -a "$LOG"; }
log "=== TokenVault compression tool update check ==="
# ── lean-ctx (Rust binary via cargo) ──────────────────────────────────────────
LEAN_CTX_BIN="$HOME/.cargo/bin/lean-ctx"
if [ -f "$LEAN_CTX_BIN" ]; then
BEFORE=$("$LEAN_CTX_BIN" --version 2>/dev/null || echo "unknown")
log "lean-ctx before: $BEFORE"
# cargo install always fetches latest from crates.io
if "$HOME/.cargo/bin/cargo" install lean-ctx 2>&1 | tail -5 | tee -a "$LOG"; then
AFTER=$("$LEAN_CTX_BIN" --version 2>/dev/null || echo "unknown")
log "lean-ctx after: $AFTER"
if [ "$BEFORE" != "$AFTER" ]; then
log "lean-ctx UPDATED: $BEFORE$AFTER"
else
log "lean-ctx already at latest"
fi
else
log "lean-ctx update FAILED (cargo install returned error)"
fi
else
log "lean-ctx not found at $LEAN_CTX_BIN — skipping"
fi
# ── RTK (Homebrew) ────────────────────────────────────────────────────────────
BREW="/opt/homebrew/bin/brew"
if [ -f "$BREW" ]; then
BEFORE=$(/opt/homebrew/bin/rtk --version 2>/dev/null || rtk --version 2>/dev/null || echo "unknown")
log "rtk before: $BEFORE"
if "$BREW" upgrade rtk 2>&1 | tee -a "$LOG"; then
AFTER=$(/opt/homebrew/bin/rtk --version 2>/dev/null || rtk --version 2>/dev/null || echo "unknown")
log "rtk after: $AFTER"
if [ "$BEFORE" != "$AFTER" ]; then
log "RTK UPDATED: $BEFORE$AFTER"
else
log "RTK already at latest"
fi
else
# brew upgrade exits non-zero if already latest — that's fine
log "RTK already at latest (brew upgrade: no update available)"
fi
else
log "Homebrew not found at $BREW — skipping RTK update"
fi
log "=== Update check complete ==="