feat: proxy-side compression for all providers + full MCP tool suite
- message-compressor.ts: code-block-aware compression for Anthropic + OpenAI messages - anthropic-proxy.ts: fix OAuth Bearer token forwarding (Claude Code subscription auth) + integrate proxy-side compression, track tokens_saved per ticket - openai-proxy.ts: integrate proxy-side compression, track tokens_saved - mcp/compression.ts: lean-ctx-style modes (full/signatures/map/aggressive/entropy) + RTK-style patterns for git/npm/cargo/docker/ls output - mcp/server.ts: 8 tools — tv_read, tv_shell, tv_search, tv_compress, tv_update, tv_ticket, tv_cost, tv_health - tv_update: auto-update lean-ctx (cargo install) + RTK (brew upgrade) - scripts/update-compression-tools.sh + LaunchAgent plist: daily auto-update at 03:00
This commit is contained in:
parent
7ea2d1e266
commit
587431a915
168
packages/core/src/compression/message-compressor.ts
Normal file
168
packages/core/src/compression/message-compressor.ts
Normal file
@ -0,0 +1,168 @@
|
||||
/**
|
||||
* Proxy-side message compression — applied to ALL providers before forwarding.
|
||||
*
|
||||
* Strategy: conservative, lossless-for-meaning compression that is safe to
|
||||
* apply to any LLM message without changing semantics:
|
||||
* 1. Preserve content inside code blocks (``` / ~~~) verbatim
|
||||
* 2. Collapse 3+ consecutive blank lines → 1 blank line outside code blocks
|
||||
* 3. Strip trailing whitespace from every non-code line
|
||||
* 4. Remove HTML/XML comments outside code blocks
|
||||
* 5. Collapse runs of 4+ identical separator lines (e.g. ────────)
|
||||
*
|
||||
* Token estimation: 1 token ≈ 4 chars (GPT-3.5/4 / Claude approximation).
|
||||
*/
|
||||
|
||||
export interface CompressResult {
|
||||
text: string;
|
||||
originalChars: number;
|
||||
compressedChars: number;
|
||||
estimatedTokensSaved: number;
|
||||
}
|
||||
|
||||
/** Compress a plain text string, preserving code blocks verbatim. */
|
||||
export function compressText(input: string): CompressResult {
|
||||
if (!input || input.length < 50) {
|
||||
return { text: input, originalChars: input.length, compressedChars: input.length, estimatedTokensSaved: 0 };
|
||||
}
|
||||
|
||||
const lines = input.split('\n');
|
||||
const out: string[] = [];
|
||||
let inCode = false;
|
||||
let blankRun = 0;
|
||||
let sepLine: string | null = null;
|
||||
let sepRun = 0;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const raw = lines[i]!;
|
||||
const trimmed = raw.trim();
|
||||
|
||||
// Detect fenced code blocks (``` or ~~~)
|
||||
if (/^(`{3,}|~{3,})/.test(trimmed)) {
|
||||
inCode = !inCode;
|
||||
out.push(raw);
|
||||
blankRun = 0;
|
||||
sepLine = null;
|
||||
sepRun = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Inside code block — pass through verbatim
|
||||
if (inCode) {
|
||||
out.push(raw);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Blank line handling — collapse 2+ blanks to 1
|
||||
if (trimmed === '') {
|
||||
blankRun++;
|
||||
if (blankRun === 1) out.push('');
|
||||
continue;
|
||||
}
|
||||
blankRun = 0;
|
||||
|
||||
// Collapse runs of separator lines (─, =, -, *, #, /)
|
||||
if (/^[-=─━*#/]{4,}$/.test(trimmed)) {
|
||||
if (trimmed === sepLine) {
|
||||
sepRun++;
|
||||
if (sepRun <= 1) out.push(raw.trimEnd());
|
||||
continue;
|
||||
}
|
||||
sepLine = trimmed;
|
||||
sepRun = 0;
|
||||
} else {
|
||||
sepLine = null;
|
||||
sepRun = 0;
|
||||
}
|
||||
|
||||
// Strip HTML/XML comments
|
||||
const noComment = raw.replace(/<!--[\s\S]*?-->/g, '');
|
||||
|
||||
// Trim trailing whitespace
|
||||
out.push(noComment.trimEnd());
|
||||
}
|
||||
|
||||
const compressed = out.join('\n').trimEnd();
|
||||
const originalChars = input.length;
|
||||
const compressedChars = compressed.length;
|
||||
const estimatedTokensSaved = Math.max(0, Math.floor((originalChars - compressedChars) / 4));
|
||||
|
||||
return { text: compressed, originalChars, compressedChars, estimatedTokensSaved };
|
||||
}
|
||||
|
||||
// ─── Anthropic format ────────────────────────────────────────────────────────
|
||||
|
||||
type AnthropicContentBlock = { type: string; text?: string; [k: string]: unknown };
|
||||
type AnthropicMessage = { role: string; content: string | AnthropicContentBlock[]; [k: string]: unknown };
|
||||
|
||||
export interface AnthropicCompressResult {
|
||||
messages: AnthropicMessage[];
|
||||
system?: string;
|
||||
totalTokensSaved: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress an Anthropic messages request body.
|
||||
* Handles both string content and content-block arrays.
|
||||
* Also compresses the top-level `system` prompt.
|
||||
*/
|
||||
export function compressAnthropicBody(
|
||||
messages: AnthropicMessage[],
|
||||
system?: string,
|
||||
): AnthropicCompressResult {
|
||||
let totalTokensSaved = 0;
|
||||
|
||||
const compressedMessages = messages.map(msg => {
|
||||
const content = msg.content;
|
||||
if (typeof content === 'string') {
|
||||
const r = compressText(content);
|
||||
totalTokensSaved += r.estimatedTokensSaved;
|
||||
return { ...msg, content: r.text };
|
||||
}
|
||||
if (Array.isArray(content)) {
|
||||
const blocks = content.map((block: AnthropicContentBlock) => {
|
||||
if (block.type === 'text' && typeof block.text === 'string') {
|
||||
const r = compressText(block.text);
|
||||
totalTokensSaved += r.estimatedTokensSaved;
|
||||
return { ...block, text: r.text };
|
||||
}
|
||||
return block;
|
||||
});
|
||||
return { ...msg, content: blocks };
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
|
||||
let compressedSystem = system;
|
||||
if (system) {
|
||||
const r = compressText(system);
|
||||
totalTokensSaved += r.estimatedTokensSaved;
|
||||
compressedSystem = r.text;
|
||||
}
|
||||
|
||||
return { messages: compressedMessages, system: compressedSystem, totalTokensSaved };
|
||||
}
|
||||
|
||||
// ─── OpenAI format ───────────────────────────────────────────────────────────
|
||||
|
||||
type OpenAIMessage = { role: string; content: string | null; [k: string]: unknown };
|
||||
|
||||
export interface OpenAICompressResult {
|
||||
messages: OpenAIMessage[];
|
||||
totalTokensSaved: number;
|
||||
}
|
||||
|
||||
/** Compress an OpenAI messages array. */
|
||||
export function compressOpenAIMessages(messages: OpenAIMessage[]): OpenAICompressResult {
|
||||
let totalTokensSaved = 0;
|
||||
|
||||
const compressedMessages = messages.map(msg => {
|
||||
if (typeof msg.content === 'string') {
|
||||
const r = compressText(msg.content);
|
||||
totalTokensSaved += r.estimatedTokensSaved;
|
||||
return { ...msg, content: r.text };
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
|
||||
return { messages: compressedMessages, totalTokensSaved };
|
||||
}
|
||||
@ -2,14 +2,17 @@ import type { FastifyInstance } from 'fastify';
|
||||
import { createTicket, hashContent } from '../tickets/ticket-service.js';
|
||||
import { logger } from '../observability/logger.js';
|
||||
import { config } from '../config.js';
|
||||
import { compressAnthropicBody } from '../compression/message-compressor.js';
|
||||
|
||||
// Anthropic pricing per 1M tokens (USD)
|
||||
const ANTHROPIC_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
|
||||
'claude-opus-4-20250514': { input: 15.0, output: 75.0, cached: 1.50 },
|
||||
'claude-opus-4-5': { input: 15.0, output: 75.0, cached: 1.50 },
|
||||
'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cached: 0.30 },
|
||||
'claude-sonnet-4-6': { input: 3.0, output: 15.0, cached: 0.30 },
|
||||
'claude-sonnet-4-5-20251001':{ input: 3.0, output: 15.0, cached: 0.30 },
|
||||
'claude-haiku-3-5-20251022': { input: 0.80, output: 4.0, cached: 0.08 },
|
||||
'claude-haiku-4-5': { input: 0.80, output: 4.0, cached: 0.08 },
|
||||
'claude-haiku-3-20250307': { input: 0.25, output: 1.25, cached: 0.025 },
|
||||
};
|
||||
|
||||
@ -22,24 +25,23 @@ function calcCost(model: string, inputTokens: number, outputTokens: number, cach
|
||||
}
|
||||
|
||||
/**
|
||||
* Anthropic API passthrough — enables ANTHROPIC_BASE_URL routing.
|
||||
* Anthropic API passthrough with proxy-side compression.
|
||||
*
|
||||
* Claude Code (and any Anthropic SDK client) sends requests here when
|
||||
* ANTHROPIC_BASE_URL=https://tokenvault.fichtmueller.org is set.
|
||||
* We forward the request to Anthropic using the client's own API key,
|
||||
* then create a TokenVault ticket to track usage and cost.
|
||||
* Auth handling:
|
||||
* x-api-key: sk-ant-* → forward as x-api-key (API key users)
|
||||
* Authorization: Bearer sk-ant-* → forward as x-api-key (SDK Bearer format)
|
||||
* Authorization: Bearer <oauth> → forward as Authorization: Bearer (Claude Code subscription)
|
||||
* fallback → configured server API key
|
||||
*/
|
||||
export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void> {
|
||||
|
||||
// ─── POST /v1/messages — Anthropic Messages API passthrough ──────────────
|
||||
app.post('/v1/messages', async (req, reply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const model = (body['model'] as string) ?? 'claude-sonnet-4-20250514';
|
||||
|
||||
// Use the client's API key if provided, fall back to configured key
|
||||
const apiKey = (req.headers['x-api-key'] as string)
|
||||
|| (req.headers['authorization'] as string)?.replace(/^Bearer /, '')
|
||||
|| config.providers.anthropic.apiKey;
|
||||
const apiKeyHeader = req.headers['x-api-key'] as string | undefined;
|
||||
const authHeader = req.headers['authorization'] as string | undefined;
|
||||
const configApiKey = config.providers.anthropic.apiKey;
|
||||
|
||||
const anthropicVersion = (req.headers['anthropic-version'] as string) ?? '2023-06-01';
|
||||
const anthropicBeta = req.headers['anthropic-beta'] as string | undefined;
|
||||
@ -48,26 +50,59 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
|
||||
const project = req.headers['x-tokenvault-project'] as string | undefined;
|
||||
const team = req.headers['x-tokenvault-team'] as string | undefined;
|
||||
|
||||
const start = Date.now();
|
||||
// ── Proxy-side compression ─────────────────────────────────────────────
|
||||
const rawMessages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? [];
|
||||
const rawSystem = body['system'] as string | undefined;
|
||||
const { messages: cMsgs, system: cSystem, totalTokensSaved: compressionSaved } =
|
||||
compressAnthropicBody(
|
||||
rawMessages as Parameters<typeof compressAnthropicBody>[0],
|
||||
rawSystem,
|
||||
);
|
||||
const compressedBody: Record<string, unknown> = { ...body, messages: cMsgs };
|
||||
if (cSystem !== undefined) compressedBody['system'] = cSystem;
|
||||
|
||||
// ── Forward to Anthropic ────────────────────────────────────────────────
|
||||
// ── Auth forwarding — detect API key vs OAuth session ──────────────────
|
||||
const forwardHeaders: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': apiKey,
|
||||
'anthropic-version': anthropicVersion,
|
||||
'Content-Type': 'application/json',
|
||||
'anthropic-version': anthropicVersion,
|
||||
};
|
||||
if (anthropicBeta) forwardHeaders['anthropic-beta'] = anthropicBeta;
|
||||
|
||||
let apiKeyForLogging = '(none)';
|
||||
if (apiKeyHeader?.startsWith('sk-ant-')) {
|
||||
// Direct API key in x-api-key
|
||||
forwardHeaders['x-api-key'] = apiKeyHeader;
|
||||
apiKeyForLogging = 'x-api-key:sk-ant-*';
|
||||
} else if (authHeader) {
|
||||
const bearerToken = authHeader.replace(/^Bearer\s+/i, '');
|
||||
if (bearerToken.startsWith('sk-ant-')) {
|
||||
// API key in Authorization: Bearer format (some SDK configs)
|
||||
forwardHeaders['x-api-key'] = bearerToken;
|
||||
apiKeyForLogging = 'bearer:sk-ant-*';
|
||||
} else {
|
||||
// OAuth session token (Claude Code subscription) — forward as-is
|
||||
forwardHeaders['authorization'] = authHeader;
|
||||
apiKeyForLogging = 'bearer:oauth-session';
|
||||
}
|
||||
} else if (apiKeyHeader) {
|
||||
forwardHeaders['x-api-key'] = apiKeyHeader;
|
||||
apiKeyForLogging = 'x-api-key:custom';
|
||||
} else if (configApiKey) {
|
||||
forwardHeaders['x-api-key'] = configApiKey;
|
||||
apiKeyForLogging = 'config-key';
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
|
||||
const upstream = await fetch('https://api.anthropic.com/v1/messages', {
|
||||
method: 'POST',
|
||||
headers: forwardHeaders,
|
||||
body: JSON.stringify(body),
|
||||
body: JSON.stringify(compressedBody),
|
||||
});
|
||||
|
||||
const latency = Date.now() - start;
|
||||
const responseBody = await upstream.json() as Record<string, unknown>;
|
||||
|
||||
// ── Track as TokenVault ticket (best-effort, non-blocking) ─────────────
|
||||
if (upstream.ok) {
|
||||
const usage = responseBody['usage'] as {
|
||||
input_tokens?: number;
|
||||
@ -82,8 +117,7 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
|
||||
const cachedTokens = usage.cache_read_input_tokens ?? 0;
|
||||
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
|
||||
|
||||
const messages = (body['messages'] as Array<{ role: string; content: unknown }>) ?? [];
|
||||
const inputText = messages.map(m => {
|
||||
const inputText = rawMessages.map(m => {
|
||||
if (typeof m.content === 'string') return m.content;
|
||||
if (Array.isArray(m.content)) return m.content.map((c: Record<string, unknown>) => c['text'] ?? '').join('');
|
||||
return '';
|
||||
@ -92,25 +126,26 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
|
||||
const outputText = outputContent.map(c => c.text ?? '').join('');
|
||||
|
||||
createTicket({
|
||||
provider: 'anthropic',
|
||||
provider: 'anthropic',
|
||||
model,
|
||||
status: 'completed',
|
||||
tokens_in: inputTokens,
|
||||
tokens_out: outputTokens,
|
||||
status: 'completed',
|
||||
tokens_in: inputTokens,
|
||||
tokens_out: outputTokens,
|
||||
tokens_cached: cachedTokens,
|
||||
tokens_saved: 0,
|
||||
cost_usd: cost,
|
||||
latency_ms: latency,
|
||||
cache_hit: cachedTokens > 0,
|
||||
caller: caller ?? 'claude-code',
|
||||
tokens_saved: compressionSaved,
|
||||
cost_usd: cost,
|
||||
latency_ms: latency,
|
||||
cache_hit: cachedTokens > 0,
|
||||
caller: caller ?? 'claude-code',
|
||||
project,
|
||||
team,
|
||||
input_hash: hashContent(inputText),
|
||||
output_hash: hashContent(outputText),
|
||||
}).catch(err => logger.warn({ err }, 'Failed to create ticket for Anthropic passthrough'));
|
||||
input_hash: hashContent(inputText),
|
||||
output_hash: hashContent(outputText),
|
||||
}).catch(err => logger.warn({ err }, 'Failed to create ticket'));
|
||||
}
|
||||
} else {
|
||||
// Track failed requests too
|
||||
logger.warn({ model, status: upstream.status, auth: apiKeyForLogging }, 'Anthropic upstream error');
|
||||
|
||||
createTicket({
|
||||
provider: 'anthropic',
|
||||
model,
|
||||
@ -122,21 +157,15 @@ export async function anthropicProxyRoutes(app: FastifyInstance): Promise<void>
|
||||
caller: caller ?? 'claude-code',
|
||||
project,
|
||||
team,
|
||||
input_hash: hashContent(model),
|
||||
input_hash: hashContent(model),
|
||||
output_hash: '',
|
||||
}).catch(() => { /* best effort */ });
|
||||
|
||||
logger.warn({ model, status: upstream.status }, 'Anthropic upstream error');
|
||||
}
|
||||
|
||||
// ── Return Anthropic response as-is ────────────────────────────────────
|
||||
reply.code(upstream.status);
|
||||
reply.header('Content-Type', 'application/json');
|
||||
|
||||
// Pass through useful Anthropic response headers
|
||||
const anthropicReqId = upstream.headers.get('request-id');
|
||||
if (anthropicReqId) reply.header('request-id', anthropicReqId);
|
||||
|
||||
const reqId = upstream.headers.get('request-id');
|
||||
if (reqId) reply.header('request-id', reqId);
|
||||
return responseBody;
|
||||
});
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@ import type { FastifyInstance } from 'fastify';
|
||||
import { createTicket, hashContent } from '../tickets/ticket-service.js';
|
||||
import { logger } from '../observability/logger.js';
|
||||
import { config } from '../config.js';
|
||||
import { compressOpenAIMessages } from '../compression/message-compressor.js';
|
||||
|
||||
// OpenAI pricing per 1M tokens (USD)
|
||||
const OPENAI_PRICING: Record<string, { input: number; output: number; cached?: number }> = {
|
||||
@ -12,11 +13,12 @@ const OPENAI_PRICING: Record<string, { input: number; output: number; cached?: n
|
||||
'gpt-3.5-turbo': { input: 0.50, output: 1.50 },
|
||||
'o1': { input: 15.0, output: 60.0 },
|
||||
'o1-mini': { input: 3.0, output: 12.0 },
|
||||
'o3': { input: 10.0, output: 40.0 },
|
||||
'o3-mini': { input: 1.10, output: 4.40 },
|
||||
'o4-mini': { input: 1.10, output: 4.40 },
|
||||
};
|
||||
|
||||
function calcCost(model: string, inputTokens: number, outputTokens: number, cachedTokens: number): number {
|
||||
// Match model prefix (e.g. "gpt-4o-2024-11-20" → "gpt-4o")
|
||||
const key = Object.keys(OPENAI_PRICING).find(k => model === k || model.startsWith(k + '-')) ?? null;
|
||||
if (!key) return 0;
|
||||
const pricing = OPENAI_PRICING[key]!;
|
||||
@ -26,23 +28,15 @@ function calcCost(model: string, inputTokens: number, outputTokens: number, cach
|
||||
return Math.max(0, inputCost + outputCost + cacheCost);
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI API passthrough — enables OPENAI_BASE_URL routing.
|
||||
*
|
||||
* Any OpenAI SDK client routes through here when
|
||||
* OPENAI_BASE_URL=https://tokenvault.fichtmueller.org is set.
|
||||
* Forwards to OpenAI with the client's API key and creates a TokenVault ticket.
|
||||
*/
|
||||
export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
|
||||
|
||||
// ─── POST /v1/chat/completions — OpenAI Chat API passthrough ─────────────
|
||||
// ─── POST /v1/chat/completions ────────────────────────────────────────────
|
||||
app.post('/v1/chat/completions', async (req, reply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const model = (body['model'] as string) ?? 'gpt-4o';
|
||||
|
||||
// Use client's API key, fall back to configured key
|
||||
const authHeader = req.headers['authorization'] as string | undefined;
|
||||
const apiKey = authHeader?.replace(/^Bearer /, '')
|
||||
const apiKey = authHeader?.replace(/^Bearer\s+/i, '')
|
||||
|| config.providers.openai?.apiKey
|
||||
|| '';
|
||||
|
||||
@ -51,63 +45,66 @@ export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
|
||||
const team = req.headers['x-tokenvault-team'] as string | undefined;
|
||||
const orgId = req.headers['openai-organization'] as string | undefined;
|
||||
|
||||
const start = Date.now();
|
||||
// ── Proxy-side compression ───────────────────────────────────────────
|
||||
const rawMessages = (body['messages'] as Array<{ role: string; content: string | null }>) ?? [];
|
||||
const { messages: cMsgs, totalTokensSaved: compressionSaved } = compressOpenAIMessages(rawMessages);
|
||||
const compressedBody = { ...body, messages: cMsgs };
|
||||
|
||||
// ── Forward to OpenAI ───────────────────────────────────────────────────
|
||||
const forwardHeaders: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
};
|
||||
if (orgId) forwardHeaders['OpenAI-Organization'] = orgId;
|
||||
|
||||
const start = Date.now();
|
||||
|
||||
const upstream = await fetch('https://api.openai.com/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: forwardHeaders,
|
||||
body: JSON.stringify(body),
|
||||
body: JSON.stringify(compressedBody),
|
||||
});
|
||||
|
||||
const latency = Date.now() - start;
|
||||
const responseBody = await upstream.json() as Record<string, unknown>;
|
||||
|
||||
// ── Track as TokenVault ticket ──────────────────────────────────────────
|
||||
if (upstream.ok) {
|
||||
const usage = responseBody['usage'] as {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
prompt_tokens_details?: { cached_tokens?: number };
|
||||
} | undefined;
|
||||
|
||||
if (usage) {
|
||||
const inputTokens = usage.prompt_tokens ?? 0;
|
||||
const outputTokens = usage.completion_tokens ?? 0;
|
||||
const inputTokens = usage.prompt_tokens ?? 0;
|
||||
const outputTokens = usage.completion_tokens ?? 0;
|
||||
const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
|
||||
const cost = calcCost(model, inputTokens, outputTokens, cachedTokens);
|
||||
|
||||
const messages = (body['messages'] as Array<{ role: string; content: string }>) ?? [];
|
||||
const inputText = messages.map(m => m.content).join('\n');
|
||||
const choices = (responseBody['choices'] as Array<{ message?: { content?: string } }>) ?? [];
|
||||
const inputText = rawMessages.map(m => m.content ?? '').join('\n');
|
||||
const choices = (responseBody['choices'] as Array<{ message?: { content?: string } }>) ?? [];
|
||||
const outputText = choices.map(c => c.message?.content ?? '').join('');
|
||||
|
||||
createTicket({
|
||||
provider: 'openai',
|
||||
provider: 'openai',
|
||||
model,
|
||||
status: 'completed',
|
||||
tokens_in: inputTokens,
|
||||
tokens_out: outputTokens,
|
||||
status: 'completed',
|
||||
tokens_in: inputTokens,
|
||||
tokens_out: outputTokens,
|
||||
tokens_cached: cachedTokens,
|
||||
tokens_saved: 0,
|
||||
cost_usd: cost,
|
||||
latency_ms: latency,
|
||||
cache_hit: cachedTokens > 0,
|
||||
caller: caller ?? 'openai-sdk',
|
||||
tokens_saved: compressionSaved,
|
||||
cost_usd: cost,
|
||||
latency_ms: latency,
|
||||
cache_hit: cachedTokens > 0,
|
||||
caller: caller ?? 'openai-sdk',
|
||||
project,
|
||||
team,
|
||||
input_hash: hashContent(inputText),
|
||||
output_hash: hashContent(outputText),
|
||||
input_hash: hashContent(inputText),
|
||||
output_hash: hashContent(outputText),
|
||||
}).catch(err => logger.warn({ err }, 'Failed to create ticket for OpenAI passthrough'));
|
||||
}
|
||||
} else {
|
||||
logger.warn({ model, status: upstream.status }, 'OpenAI upstream error');
|
||||
|
||||
createTicket({
|
||||
provider: 'openai',
|
||||
model,
|
||||
@ -119,29 +116,20 @@ export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
|
||||
caller: caller ?? 'openai-sdk',
|
||||
project,
|
||||
team,
|
||||
input_hash: hashContent(model),
|
||||
input_hash: hashContent(model),
|
||||
output_hash: '',
|
||||
}).catch(() => { /* best effort */ });
|
||||
|
||||
logger.warn({ model, status: upstream.status }, 'OpenAI upstream error');
|
||||
}
|
||||
|
||||
// ── Return OpenAI response as-is + TokenVault headers ──────────────────
|
||||
reply.code(upstream.status);
|
||||
reply.header('Content-Type', 'application/json');
|
||||
|
||||
if (upstream.ok) {
|
||||
const ticket_num = (responseBody['tokenvault'] as Record<string, unknown> | undefined)?.['ticket_number'];
|
||||
if (ticket_num) reply.header('X-TokenVault-Ticket', String(ticket_num));
|
||||
}
|
||||
|
||||
return responseBody;
|
||||
});
|
||||
|
||||
// ─── GET /v1/models — OpenAI models list passthrough ────────────────────
|
||||
// ─── GET /v1/models ────────────────────────────────────────────────────────
|
||||
app.get('/v1/models', async (req, reply) => {
|
||||
const authHeader = req.headers['authorization'] as string | undefined;
|
||||
const apiKey = authHeader?.replace(/^Bearer /, '') || config.providers.openai?.apiKey || '';
|
||||
const apiKey = authHeader?.replace(/^Bearer\s+/i, '') || config.providers.openai?.apiKey || '';
|
||||
|
||||
const upstream = await fetch('https://api.openai.com/v1/models', {
|
||||
headers: { 'Authorization': `Bearer ${apiKey}` },
|
||||
@ -151,4 +139,5 @@ export async function openaiProxyRoutes(app: FastifyInstance): Promise<void> {
|
||||
reply.header('Content-Type', 'application/json');
|
||||
return upstream.json();
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
393
packages/mcp/src/compression.ts
Normal file
393
packages/mcp/src/compression.ts
Normal file
@ -0,0 +1,393 @@
|
||||
/**
|
||||
* TokenVault MCP Compression Library
|
||||
*
|
||||
* Implements lean-ctx-style file compression modes + RTK-style shell
|
||||
* output compression — all inline so the MCP server has zero extra deps.
|
||||
*
|
||||
* lean-ctx modes: full | signatures | map | aggressive | entropy | lines
|
||||
* RTK patterns: git | npm | cargo | docker | general
|
||||
*/
|
||||
|
||||
// ─── Shared primitives ────────────────────────────────────────────────────────
|
||||
|
||||
/** Strip ANSI escape codes */
|
||||
export function stripAnsi(s: string): string {
|
||||
// eslint-disable-next-line no-control-regex
|
||||
return s.replace(/\x1B\[[0-9;]*[mGKHF]/g, '').replace(/\x1B\[[0-9]*[ABCD]/g, '');
|
||||
}
|
||||
|
||||
/** Collapse 3+ blank lines → 1 */
|
||||
function collapseBlankLines(s: string): string {
|
||||
return s.replace(/\n{3,}/g, '\n\n');
|
||||
}
|
||||
|
||||
/** Trim trailing whitespace per line */
|
||||
function trimLines(s: string): string {
|
||||
return s.replace(/[ \t]+$/gm, '');
|
||||
}
|
||||
|
||||
/** Basic safe compression: whitespace + blank lines (preserves code blocks) */
|
||||
export function compressBasic(input: string): string {
|
||||
if (!input) return input;
|
||||
const lines = input.split('\n');
|
||||
const out: string[] = [];
|
||||
let inCode = false;
|
||||
let blanks = 0;
|
||||
for (const raw of lines) {
|
||||
const t = raw.trim();
|
||||
if (/^(`{3,}|~{3,})/.test(t)) { inCode = !inCode; out.push(raw); blanks = 0; continue; }
|
||||
if (inCode) { out.push(raw); continue; }
|
||||
if (t === '') { blanks++; if (blanks <= 1) out.push(''); continue; }
|
||||
blanks = 0;
|
||||
out.push(raw.trimEnd());
|
||||
}
|
||||
return out.join('\n').trim();
|
||||
}
|
||||
|
||||
/** Estimate token count (1 token ≈ 4 chars) */
|
||||
export function estimateTokens(s: string): number {
|
||||
return Math.ceil(s.length / 4);
|
||||
}
|
||||
|
||||
// ─── lean-ctx Compression Modes ───────────────────────────────────────────────
|
||||
|
||||
/** Extract function/class/interface signatures from source code */
|
||||
function extractSignatures(content: string, ext: string): string {
|
||||
const lines = content.split('\n');
|
||||
const sigs: string[] = [];
|
||||
|
||||
// Signature patterns by language family
|
||||
const patterns: RegExp[] = [];
|
||||
|
||||
if (['.ts', '.tsx', '.js', '.jsx', '.mts', '.mjs'].includes(ext)) {
|
||||
patterns.push(
|
||||
/^(export\s+)?(async\s+)?function[\s*]+\w+/,
|
||||
/^(export\s+)?(abstract\s+)?class\s+\w+/,
|
||||
/^(export\s+)?interface\s+\w+/,
|
||||
/^(export\s+)?type\s+\w+\s*=/,
|
||||
/^(export\s+)?enum\s+\w+/,
|
||||
/^(export\s+)?(const|let|var)\s+\w+\s*[:=]/,
|
||||
/^\s+(public|private|protected|static|abstract|async|readonly)\s+\w+/,
|
||||
/^\s+(async\s+)?\w+\s*\([^)]*\)\s*[:{\-=]/,
|
||||
);
|
||||
} else if (['.py'].includes(ext)) {
|
||||
patterns.push(
|
||||
/^(async\s+)?def\s+\w+/,
|
||||
/^class\s+\w+/,
|
||||
/^@\w+/,
|
||||
);
|
||||
} else if (['.go'].includes(ext)) {
|
||||
patterns.push(
|
||||
/^func\s+/,
|
||||
/^type\s+\w+\s+(struct|interface)/,
|
||||
/^var\s+|^const\s+/,
|
||||
);
|
||||
} else if (['.rs'].includes(ext)) {
|
||||
patterns.push(
|
||||
/^(pub\s+)?(async\s+)?fn\s+\w+/,
|
||||
/^(pub\s+)?(struct|enum|trait|impl|type|mod)\s+\w+/,
|
||||
);
|
||||
} else if (['.java', '.kt'].includes(ext)) {
|
||||
patterns.push(
|
||||
/^\s*(public|private|protected|static|final|abstract|override)\s+/,
|
||||
/^(class|interface|enum|record|object)\s+\w+/,
|
||||
);
|
||||
}
|
||||
|
||||
// Fallback: anything that looks like a declaration
|
||||
if (patterns.length === 0) {
|
||||
patterns.push(/^(function|class|def|fn|func|type|interface|const|let|var|export)\s+/);
|
||||
}
|
||||
|
||||
let inComment = false;
|
||||
let lineNum = 0;
|
||||
for (const line of lines) {
|
||||
lineNum++;
|
||||
const t = line.trim();
|
||||
if (t.startsWith('/*') || t.startsWith('/**')) { inComment = true; continue; }
|
||||
if (inComment) { if (t.includes('*/')) inComment = false; continue; }
|
||||
if (t.startsWith('//') || t.startsWith('#')) continue;
|
||||
if (t === '' || t === '{' || t === '}') continue;
|
||||
|
||||
if (patterns.some(p => p.test(line))) {
|
||||
sigs.push(`L${lineNum}: ${line.trimEnd()}`);
|
||||
}
|
||||
}
|
||||
|
||||
return sigs.length > 0
|
||||
? `// Signatures (${sigs.length} found, ${lines.length} total lines)\n${sigs.join('\n')}`
|
||||
: compressBasic(content);
|
||||
}
|
||||
|
||||
/** Map mode: imports + exports + top-level structure */
|
||||
function extractMap(content: string, ext: string): string {
|
||||
const lines = content.split('\n');
|
||||
const imports: string[] = [];
|
||||
const exports: string[] = [];
|
||||
const structs: string[] = [];
|
||||
let lineNum = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
lineNum++;
|
||||
const t = line.trim();
|
||||
if (!t) continue;
|
||||
if (/^import\s/.test(t) || /^from\s+['"]/.test(t) || /^require\s*\(/.test(t)) {
|
||||
imports.push(line.trimEnd());
|
||||
} else if (/^export\s+(default\s+)?(function|class|const|let|var|type|interface|enum|async)/.test(t)) {
|
||||
exports.push(`L${lineNum}: ${t.slice(0, 80)}`);
|
||||
} else if (/^(function|class|interface|type|enum|struct|impl|trait|def\s|fn\s|func\s)/.test(t)) {
|
||||
structs.push(`L${lineNum}: ${t.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
const parts: string[] = [`// File map (${lines.length} lines, ext=${ext})`];
|
||||
if (imports.length) parts.push(`\n// IMPORTS (${imports.length})\n${imports.join('\n')}`);
|
||||
if (exports.length) parts.push(`\n// EXPORTS (${exports.length})\n${exports.join('\n')}`);
|
||||
if (structs.length) parts.push(`\n// DECLARATIONS (${structs.length})\n${structs.join('\n')}`);
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
/** Aggressive mode: strip comments, blank lines, docstrings — keep logic only */
|
||||
function compressAggressive(content: string): string {
|
||||
const lines = content.split('\n');
|
||||
const out: string[] = [];
|
||||
let inCode = false;
|
||||
let inJsDoc = false;
|
||||
|
||||
for (const raw of lines) {
|
||||
const t = raw.trim();
|
||||
if (/^(`{3,}|~{3,})/.test(t)) { inCode = !inCode; out.push(raw); continue; }
|
||||
if (inCode) { out.push(raw); continue; }
|
||||
|
||||
// JSDoc blocks
|
||||
if (t.startsWith('/**')) { inJsDoc = true; continue; }
|
||||
if (inJsDoc) { if (t.includes('*/')) inJsDoc = false; continue; }
|
||||
|
||||
// Single-line comments
|
||||
if (t.startsWith('//') || t.startsWith('#!') || (t.startsWith('#') && !t.startsWith('#!'))) continue;
|
||||
|
||||
// Block comments
|
||||
if (t.startsWith('/*')) continue;
|
||||
if (t === '') continue;
|
||||
|
||||
out.push(raw.trimEnd());
|
||||
}
|
||||
return out.join('\n');
|
||||
}
|
||||
|
||||
/** Entropy-based auto mode selection */
|
||||
function detectMode(content: string, ext: string): 'signatures' | 'aggressive' | 'full' {
|
||||
const codeExts = ['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.kt', '.cpp', '.c', '.h'];
|
||||
if (!codeExts.includes(ext)) return 'full';
|
||||
|
||||
const lines = content.split('\n');
|
||||
const codeLines = lines.filter(l => {
|
||||
const t = l.trim();
|
||||
return t.length > 0 && !t.startsWith('//') && !t.startsWith('#') && !t.startsWith('*');
|
||||
});
|
||||
const commentRatio = 1 - (codeLines.length / Math.max(lines.length, 1));
|
||||
|
||||
// High comment ratio → aggressive removes them; low density → signatures
|
||||
if (commentRatio > 0.3) return 'aggressive';
|
||||
if (content.length > 5000) return 'signatures';
|
||||
return 'full';
|
||||
}
|
||||
|
||||
/** Apply a lean-ctx compression mode to file content */
|
||||
export function applyMode(
|
||||
content: string,
|
||||
ext: string,
|
||||
mode: 'full' | 'signatures' | 'map' | 'aggressive' | 'entropy',
|
||||
): string {
|
||||
switch (mode) {
|
||||
case 'signatures': return extractSignatures(content, ext);
|
||||
case 'map': return extractMap(content, ext);
|
||||
case 'aggressive': return compressAggressive(content);
|
||||
case 'entropy': return applyMode(content, ext, detectMode(content, ext));
|
||||
case 'full':
|
||||
default: return compressBasic(content);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── RTK Shell Compression Patterns ─────────────────────────────────────────
|
||||
|
||||
function compressGitLog(output: string): string {
|
||||
const lines = output.split('\n');
|
||||
const out: string[] = [];
|
||||
for (const line of lines) {
|
||||
const t = line.trim();
|
||||
// Keep commit hash lines (short summary)
|
||||
if (/^commit [a-f0-9]{40}/.test(t)) {
|
||||
out.push(line.slice(0, 15) + '...');
|
||||
continue;
|
||||
}
|
||||
// Keep first message line (skip Author/Date/blank)
|
||||
if (/^Author:|^Date:|^Merge:/.test(t)) continue;
|
||||
if (t) out.push(line.trimEnd());
|
||||
}
|
||||
return out.join('\n');
|
||||
}
|
||||
|
||||
function compressGitStatus(output: string): string {
|
||||
// Count file groups, don't list every file
|
||||
const lines = output.split('\n');
|
||||
const staged: string[] = [], changed: string[] = [], untracked: string[] = [];
|
||||
for (const l of lines) {
|
||||
if (/^\s+modified:|^\s+new file:|^\s+deleted:/.test(l)) staged.push(l.trim());
|
||||
else if (/^\s+M\s|^\s+A\s|^\s+D\s/.test(l)) changed.push(l.trim());
|
||||
else if (/^\?\?/.test(l)) untracked.push(l.slice(3).trim());
|
||||
}
|
||||
const parts: string[] = [];
|
||||
if (staged.length) parts.push(`Staged (${staged.length}):\n ${staged.slice(0, 5).join('\n ')}${staged.length > 5 ? `\n ...+${staged.length - 5} more` : ''}`);
|
||||
if (changed.length) parts.push(`Changed (${changed.length}):\n ${changed.slice(0, 5).join('\n ')}${changed.length > 5 ? `\n ...+${changed.length - 5} more` : ''}`);
|
||||
if (untracked.length) parts.push(`Untracked (${untracked.length}):\n ${untracked.slice(0, 3).join('\n ')}${untracked.length > 3 ? `\n ...+${untracked.length - 3} more` : ''}`);
|
||||
return parts.join('\n\n') || output;
|
||||
}
|
||||
|
||||
function compressGitDiff(output: string): string {
|
||||
// Show file names + line counts only, not full diff
|
||||
const lines = output.split('\n');
|
||||
const files: string[] = [];
|
||||
let current = '';
|
||||
let adds = 0, dels = 0;
|
||||
const flush = () => { if (current) files.push(`${current} (+${adds} -${dels})`); };
|
||||
for (const l of lines) {
|
||||
if (l.startsWith('diff --git ')) { flush(); current = l.replace('diff --git a/', ''); adds = 0; dels = 0; }
|
||||
else if (l.startsWith('+') && !l.startsWith('+++')) adds++;
|
||||
else if (l.startsWith('-') && !l.startsWith('---')) dels++;
|
||||
}
|
||||
flush();
|
||||
return files.length ? `Changed files:\n${files.join('\n')}` : output;
|
||||
}
|
||||
|
||||
function compressNpm(output: string): string {
|
||||
const lines = output.split('\n').map(l => stripAnsi(l));
|
||||
const keep: string[] = [];
|
||||
for (const l of lines) {
|
||||
const t = l.trim();
|
||||
// Keep summary lines, skip individual package installs
|
||||
if (/^added \d+|^changed \d+|^found \d+|^npm warn|^npm error|ERROR|WARN|^\s*\d+ package/.test(t)) keep.push(t);
|
||||
else if (/^up to date|^audited/.test(t)) keep.push(t);
|
||||
}
|
||||
return keep.length ? keep.join('\n') : lines.slice(0, 20).join('\n');
|
||||
}
|
||||
|
||||
function compressCargo(output: string): string {
|
||||
const lines = output.split('\n').map(l => stripAnsi(l));
|
||||
const keep: string[] = [];
|
||||
for (const l of lines) {
|
||||
const t = l.trim();
|
||||
// Keep errors, warnings summary, Finished line — skip Compiling/Checking individual crates
|
||||
if (/^error|^warning\[|Finished|^ = |^ -->/.test(t)) keep.push(t);
|
||||
else if (/^warning: .+ warnings? emitted/.test(t)) keep.push(t);
|
||||
}
|
||||
return keep.length ? keep.join('\n') : lines.filter(l => l.trim()).slice(0, 30).join('\n');
|
||||
}
|
||||
|
||||
function compressDocker(output: string): string {
|
||||
const lines = output.split('\n').map(l => stripAnsi(l));
|
||||
const keep: string[] = [];
|
||||
for (const l of lines) {
|
||||
const t = l.trim();
|
||||
if (/^Step \d+|^STEP \d+|^Successfully|^ERROR|^FROM|=> \[/.test(t)) keep.push(t);
|
||||
}
|
||||
return keep.length ? keep.join('\n') : lines.filter(l => l.trim()).slice(0, 30).join('\n');
|
||||
}
|
||||
|
||||
function compressLs(output: string): string {
|
||||
// ls -la → just filenames + sizes
|
||||
const lines = output.split('\n');
|
||||
const files: string[] = [];
|
||||
for (const l of lines) {
|
||||
if (!l.trim() || l.startsWith('total')) continue;
|
||||
const parts = l.split(/\s+/);
|
||||
if (parts.length >= 9) {
|
||||
const size = parts[4] ?? '';
|
||||
const name = parts.slice(8).join(' ');
|
||||
const isDir = l.startsWith('d');
|
||||
files.push(`${isDir ? '/' : ' '}${name} (${size})`);
|
||||
}
|
||||
}
|
||||
return files.length ? files.join('\n') : output;
|
||||
}
|
||||
|
||||
function compressGeneral(output: string): string {
|
||||
const lines = output.split('\n').map(l => stripAnsi(l));
|
||||
// Deduplicate consecutive identical lines
|
||||
const out: string[] = [];
|
||||
let prev = '';
|
||||
let dupCount = 0;
|
||||
for (const l of lines) {
|
||||
if (l === prev) { dupCount++; continue; }
|
||||
if (dupCount > 0) { out.push(` [×${dupCount + 1} repeated]`); dupCount = 0; }
|
||||
prev = l;
|
||||
out.push(l.trimEnd());
|
||||
}
|
||||
if (dupCount > 0) out.push(` [×${dupCount + 1} repeated]`);
|
||||
return collapseBlankLines(out.join('\n'));
|
||||
}
|
||||
|
||||
/** RTK-style shell output compression — detects command type and applies appropriate pattern */
|
||||
export function compressShellOutput(command: string, output: string): {
|
||||
text: string;
|
||||
originalTokens: number;
|
||||
compressedTokens: number;
|
||||
savedTokens: number;
|
||||
method: string;
|
||||
} {
|
||||
const stripped = stripAnsi(output);
|
||||
const originalTokens = estimateTokens(stripped);
|
||||
|
||||
const cmd = command.trim().toLowerCase();
|
||||
let compressed = stripped;
|
||||
let method = 'general';
|
||||
|
||||
if (/^git log/.test(cmd)) { compressed = compressGitLog(stripped); method = 'git-log'; }
|
||||
else if (/^git status/.test(cmd)) { compressed = compressGitStatus(stripped); method = 'git-status'; }
|
||||
else if (/^git diff/.test(cmd)) { compressed = compressGitDiff(stripped); method = 'git-diff'; }
|
||||
else if (/^git /.test(cmd)) { compressed = compressGeneral(stripped); method = 'git-general'; }
|
||||
else if (/^npm /.test(cmd)) { compressed = compressNpm(stripped); method = 'npm'; }
|
||||
else if (/^cargo /.test(cmd)) { compressed = compressCargo(stripped); method = 'cargo'; }
|
||||
else if (/^docker /.test(cmd)) { compressed = compressDocker(stripped); method = 'docker'; }
|
||||
else if (/^ls\b/.test(cmd)) { compressed = compressLs(stripped); method = 'ls'; }
|
||||
else { compressed = compressGeneral(stripped); method = 'general'; }
|
||||
|
||||
const compressedTokens = estimateTokens(compressed);
|
||||
const savedTokens = Math.max(0, originalTokens - compressedTokens);
|
||||
|
||||
return { text: compressed, originalTokens, compressedTokens, savedTokens, method };
|
||||
}
|
||||
|
||||
// ─── Generic compress-any-text ────────────────────────────────────────────────
|
||||
|
||||
export interface CompressStats {
|
||||
originalChars: number;
|
||||
compressedChars: number;
|
||||
originalTokens: number;
|
||||
compressedTokens: number;
|
||||
savedTokens: number;
|
||||
savingsPct: number;
|
||||
}
|
||||
|
||||
export function compressAny(text: string, mode: 'basic' | 'aggressive' | 'shell' = 'basic'): {
|
||||
text: string;
|
||||
stats: CompressStats;
|
||||
} {
|
||||
const originalChars = text.length;
|
||||
const originalTokens = estimateTokens(text);
|
||||
|
||||
let compressed: string;
|
||||
if (mode === 'aggressive') compressed = compressAggressive(text);
|
||||
else if (mode === 'shell') compressed = compressShellOutput('', text).text;
|
||||
else compressed = compressBasic(text);
|
||||
|
||||
const compressedChars = compressed.length;
|
||||
const compressedTokens = estimateTokens(compressed);
|
||||
const savedTokens = Math.max(0, originalTokens - compressedTokens);
|
||||
const savingsPct = originalTokens > 0 ? (savedTokens / originalTokens) * 100 : 0;
|
||||
|
||||
return {
|
||||
text: compressed,
|
||||
stats: { originalChars, compressedChars, originalTokens, compressedTokens, savedTokens, savingsPct },
|
||||
};
|
||||
}
|
||||
@ -1,6 +1,10 @@
|
||||
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
||||
import { z } from 'zod';
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { extname, resolve } from 'node:path';
|
||||
import { applyMode, compressShellOutput, compressAny, estimateTokens } from './compression.js';
|
||||
|
||||
const CORE_URL = process.env['TOKENVAULT_URL'] ?? 'http://localhost:3300';
|
||||
|
||||
@ -15,6 +19,188 @@ const server = new McpServer({
|
||||
version: '0.1.0',
|
||||
});
|
||||
|
||||
// ─── tv_read: File reader with lean-ctx compression modes ────────────────────
|
||||
server.tool(
|
||||
'tv_read',
|
||||
`Read a file with automatic token compression. Use INSTEAD of native Read for significant token savings.
|
||||
|
||||
Modes:
|
||||
full — Basic whitespace normalization (safe for all files, ~10-20% savings)
|
||||
signatures — Function/class/interface signatures only (~70% savings, great for exploring)
|
||||
map — Imports + exports + top-level structure (~60% savings, great for navigation)
|
||||
aggressive — Remove all comments, blank lines, docstrings (~40% savings, keep logic only)
|
||||
entropy — Auto-select mode based on file type and size (recommended default)
|
||||
lines — Read specific line range (use with 'lines' parameter)`,
|
||||
{
|
||||
path: z.string().describe('File path to read (absolute or relative)'),
|
||||
mode: z.enum(['full', 'signatures', 'map', 'aggressive', 'entropy', 'lines'])
|
||||
.optional()
|
||||
.describe('Compression mode (default: entropy)'),
|
||||
lines: z.string().optional().describe('Line range for lines mode, e.g. "10-50" or "100-200"'),
|
||||
},
|
||||
async ({ path: filePath, mode = 'entropy', lines }) => {
|
||||
const absPath = resolve(filePath);
|
||||
let content = await readFile(absPath, 'utf-8');
|
||||
const ext = extname(absPath).toLowerCase();
|
||||
const originalTokens = estimateTokens(content);
|
||||
|
||||
// Line range selection
|
||||
if (mode === 'lines' || lines) {
|
||||
const range = lines ?? '1-100';
|
||||
const [startStr, endStr] = range.split('-');
|
||||
const start = Math.max(1, parseInt(startStr ?? '1', 10));
|
||||
const end = parseInt(endStr ?? String(start + 99), 10);
|
||||
const allLines = content.split('\n');
|
||||
content = allLines.slice(start - 1, end).join('\n');
|
||||
const compTokens = estimateTokens(content);
|
||||
return {
|
||||
content: [{
|
||||
type: 'text' as const,
|
||||
text: `[tv_read: ${filePath} lines ${start}-${end} | ${compTokens} tokens]\n\n${content}`,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
const compressed = applyMode(content, ext, mode as 'full' | 'signatures' | 'map' | 'aggressive' | 'entropy');
|
||||
const compressedTokens = estimateTokens(compressed);
|
||||
const savedTokens = Math.max(0, originalTokens - compressedTokens);
|
||||
const savingsPct = originalTokens > 0 ? ((savedTokens / originalTokens) * 100).toFixed(0) : '0';
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: 'text' as const,
|
||||
text: `[tv_read: ${filePath} | mode=${mode} | ${compressedTokens}/${originalTokens} tokens (${savingsPct}% saved)]\n\n${compressed}`,
|
||||
}],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// ─── tv_shell: Shell runner with RTK-style output compression ─────────────────
|
||||
server.tool(
|
||||
'tv_shell',
|
||||
`Run a shell command and compress the output using RTK-style patterns.
|
||||
Saves 60-90% tokens on git/npm/cargo/docker output.
|
||||
Handles: git log/status/diff, npm install/audit, cargo build/test, docker build/ps, ls -la.`,
|
||||
{
|
||||
command: z.string().describe('Shell command to run'),
|
||||
compress: z.boolean().optional().describe('Apply RTK compression (default: true)'),
|
||||
cwd: z.string().optional().describe('Working directory (default: current directory)'),
|
||||
timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
|
||||
},
|
||||
async ({ command, compress = true, cwd, timeout = 30_000 }) => {
|
||||
let rawOutput: string;
|
||||
try {
|
||||
rawOutput = execSync(command, {
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
cwd: cwd ? resolve(cwd) : process.cwd(),
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
timeout,
|
||||
});
|
||||
} catch (err: unknown) {
|
||||
const e = err as { stdout?: string; stderr?: string; message?: string };
|
||||
rawOutput = [e.stdout, e.stderr].filter(Boolean).join('\n') || String(err);
|
||||
}
|
||||
|
||||
if (!compress) {
|
||||
return { content: [{ type: 'text' as const, text: rawOutput }] };
|
||||
}
|
||||
|
||||
const result = compressShellOutput(command, rawOutput);
|
||||
const pct = result.originalTokens > 0
|
||||
? ((result.savedTokens / result.originalTokens) * 100).toFixed(0)
|
||||
: '0';
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: 'text' as const,
|
||||
text: `[tv_shell: ${command} | method=${result.method} | ${result.compressedTokens}/${result.originalTokens} tokens (${pct}% saved)]\n\n${result.text}`,
|
||||
}],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// ─── tv_search: Code search with compact results ──────────────────────────────
|
||||
server.tool(
|
||||
'tv_search',
|
||||
`Search code/files using ripgrep with compact, token-efficient results.
|
||||
Use INSTEAD of native Grep for token savings.`,
|
||||
{
|
||||
pattern: z.string().describe('Regex pattern to search for'),
|
||||
path: z.string().optional().describe('Directory or file to search in (default: current dir)'),
|
||||
glob: z.string().optional().describe('File glob filter, e.g. "*.ts" or "src/**/*.py"'),
|
||||
context: z.number().optional().describe('Lines of context around each match (default: 0)'),
|
||||
max_results: z.number().optional().describe('Maximum number of results (default: 50)'),
|
||||
case_insensitive: z.boolean().optional().describe('Case-insensitive search (default: false)'),
|
||||
},
|
||||
async ({ pattern, path: searchPath, glob, context = 0, max_results = 50, case_insensitive }) => {
|
||||
const target = searchPath ? resolve(searchPath) : process.cwd();
|
||||
const args = [
|
||||
'rg',
|
||||
'--no-heading',
|
||||
'-n',
|
||||
case_insensitive ? '-i' : '',
|
||||
context > 0 ? `-C ${context}` : '',
|
||||
glob ? `--glob "${glob}"` : '',
|
||||
`--max-count ${max_results}`,
|
||||
`"${pattern.replace(/"/g, '\\"')}"`,
|
||||
`"${target}"`,
|
||||
].filter(Boolean);
|
||||
|
||||
let rawOutput: string;
|
||||
try {
|
||||
rawOutput = execSync(args.join(' '), {
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
maxBuffer: 5 * 1024 * 1024,
|
||||
});
|
||||
} catch (err: unknown) {
|
||||
const e = err as { stdout?: string };
|
||||
rawOutput = e.stdout ?? 'No matches found';
|
||||
}
|
||||
|
||||
const lines = rawOutput.split('\n').filter(l => l.trim());
|
||||
const originalTokens = estimateTokens(rawOutput);
|
||||
const compactLines = lines.slice(0, max_results);
|
||||
const compacted = compactLines.join('\n');
|
||||
const compressedTokens = estimateTokens(compacted);
|
||||
const truncated = lines.length > max_results ? `\n[...${lines.length - max_results} more matches]` : '';
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: 'text' as const,
|
||||
text: `[tv_search: "${pattern}" in ${target} | ${compressedTokens}/${originalTokens} tokens | ${compactLines.length} matches]\n\n${compacted}${truncated}`,
|
||||
}],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// ─── tv_compress: Compress any text on demand ─────────────────────────────────
|
||||
server.tool(
|
||||
'tv_compress',
|
||||
`Compress any text to reduce token count before sending to any LLM.
|
||||
Useful for compressing pasted code, logs, documents, or tool output.`,
|
||||
{
|
||||
text: z.string().describe('Text to compress'),
|
||||
mode: z.enum(['basic', 'aggressive', 'shell'])
|
||||
.optional()
|
||||
.describe('Compression mode: basic (safe), aggressive (max), shell (RTK patterns)'),
|
||||
},
|
||||
async ({ text, mode = 'basic' }) => {
|
||||
const { text: compressed, stats } = compressAny(text, mode);
|
||||
return {
|
||||
content: [{
|
||||
type: 'text' as const,
|
||||
text: [
|
||||
`[tv_compress: ${stats.savedTokens} tokens saved (${stats.savingsPct.toFixed(0)}%) | ${stats.compressedTokens}/${stats.originalTokens} tokens | mode=${mode}]`,
|
||||
'',
|
||||
compressed,
|
||||
].join('\n'),
|
||||
}],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// ─── tv_ticket: View and search tickets ──────────────────────────────────────
|
||||
server.tool(
|
||||
'tv_ticket',
|
||||
@ -32,12 +218,10 @@ server.tool(
|
||||
const ticket = await fetchCore(`/v1/tickets/${id}`);
|
||||
return { content: [{ type: 'text' as const, text: JSON.stringify(ticket, null, 2) }] };
|
||||
}
|
||||
|
||||
if (action === 'stats') {
|
||||
const stats = await fetchCore(`/v1/tickets/stats?period=${period ?? 'today'}`);
|
||||
return { content: [{ type: 'text' as const, text: JSON.stringify(stats, null, 2) }] };
|
||||
}
|
||||
|
||||
const params = new URLSearchParams();
|
||||
if (provider) params.set('provider', provider);
|
||||
if (project) params.set('project', project);
|
||||
@ -60,11 +244,8 @@ server.tool(
|
||||
fetchCore(`/v1/cost?period=${period ?? 'month'}`),
|
||||
group_by ? fetchCore(`/v1/cost/breakdown?group_by=${group_by}`) : Promise.resolve(null),
|
||||
]);
|
||||
|
||||
const parts = [`# Cost Summary (${period ?? 'month'})\n${JSON.stringify(summary, null, 2)}`];
|
||||
if (breakdown) {
|
||||
parts.push(`\n# Breakdown by ${group_by}\n${JSON.stringify(breakdown, null, 2)}`);
|
||||
}
|
||||
if (breakdown) parts.push(`\n# Breakdown by ${group_by}\n${JSON.stringify(breakdown, null, 2)}`);
|
||||
return { content: [{ type: 'text' as const, text: parts.join('\n') }] };
|
||||
},
|
||||
);
|
||||
@ -72,11 +253,111 @@ server.tool(
|
||||
// ─── tv_health: Service health ───────────────────────────────────────────────
|
||||
server.tool(
|
||||
'tv_health',
|
||||
'Check TokenVault service health and configured providers.',
|
||||
'Check TokenVault service health, configured providers, and tool versions (lean-ctx, RTK).',
|
||||
{},
|
||||
async () => {
|
||||
const health = await fetchCore('/health');
|
||||
return { content: [{ type: 'text' as const, text: JSON.stringify(health, null, 2) }] };
|
||||
|
||||
// Check lean-ctx version
|
||||
let leanCtxVersion = 'unknown';
|
||||
try {
|
||||
leanCtxVersion = execSync('lean-ctx --version 2>/dev/null || echo unknown', {
|
||||
encoding: 'utf-8', timeout: 5000,
|
||||
}).trim();
|
||||
} catch { /* ignore */ }
|
||||
|
||||
// Check RTK version
|
||||
let rtkVersion = 'unknown';
|
||||
try {
|
||||
rtkVersion = execSync('rtk --version 2>/dev/null || echo unknown', {
|
||||
encoding: 'utf-8', timeout: 5000,
|
||||
}).trim();
|
||||
} catch { /* ignore */ }
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify({
|
||||
...(health as object),
|
||||
compression_tools: {
|
||||
'lean-ctx': leanCtxVersion,
|
||||
rtk: rtkVersion,
|
||||
},
|
||||
}, null, 2),
|
||||
}],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// ─── tv_update: Auto-update lean-ctx and RTK ─────────────────────────────────
|
||||
server.tool(
|
||||
'tv_update',
|
||||
`Check for and install updates to lean-ctx (cargo) and RTK (homebrew).
|
||||
Run this periodically to keep compression tools up to date.`,
|
||||
{
|
||||
dry_run: z.boolean().optional().describe('Check for updates without installing (default: false)'),
|
||||
tool: z.enum(['all', 'lean-ctx', 'rtk']).optional().describe('Which tool to update (default: all)'),
|
||||
},
|
||||
async ({ dry_run = false, tool = 'all' }) => {
|
||||
const results: Record<string, string> = {};
|
||||
|
||||
if (tool === 'all' || tool === 'lean-ctx') {
|
||||
try {
|
||||
// Check current version
|
||||
const currentVersion = execSync('lean-ctx --version 2>/dev/null || echo none', {
|
||||
encoding: 'utf-8', timeout: 5000,
|
||||
}).trim();
|
||||
results['lean-ctx-current'] = currentVersion;
|
||||
|
||||
if (!dry_run) {
|
||||
// cargo install always installs latest from crates.io
|
||||
const installOut = execSync('cargo install lean-ctx 2>&1', {
|
||||
encoding: 'utf-8', timeout: 300_000, // 5min for compile
|
||||
});
|
||||
const newVersion = execSync('lean-ctx --version 2>/dev/null || echo unknown', {
|
||||
encoding: 'utf-8', timeout: 5000,
|
||||
}).trim();
|
||||
results['lean-ctx-new'] = newVersion;
|
||||
results['lean-ctx-status'] = currentVersion === newVersion ? 'already-latest' : 'updated';
|
||||
results['lean-ctx-output'] = installOut.slice(-200); // last 200 chars of output
|
||||
} else {
|
||||
results['lean-ctx-status'] = 'dry-run — would run: cargo install lean-ctx';
|
||||
}
|
||||
} catch (err) {
|
||||
results['lean-ctx-error'] = String(err).slice(0, 200);
|
||||
}
|
||||
}
|
||||
|
||||
if (tool === 'all' || tool === 'rtk') {
|
||||
try {
|
||||
const currentVersion = execSync('rtk --version 2>/dev/null || echo none', {
|
||||
encoding: 'utf-8', timeout: 5000,
|
||||
}).trim();
|
||||
results['rtk-current'] = currentVersion;
|
||||
|
||||
if (!dry_run) {
|
||||
const upgradeOut = execSync('brew upgrade rtk 2>&1 || brew install rtk 2>&1', {
|
||||
encoding: 'utf-8', timeout: 120_000,
|
||||
});
|
||||
const newVersion = execSync('rtk --version 2>/dev/null || echo unknown', {
|
||||
encoding: 'utf-8', timeout: 5000,
|
||||
}).trim();
|
||||
results['rtk-new'] = newVersion;
|
||||
results['rtk-status'] = upgradeOut.includes('already installed') ? 'already-latest' : 'updated';
|
||||
} else {
|
||||
results['rtk-status'] = 'dry-run — would run: brew upgrade rtk';
|
||||
}
|
||||
} catch (err) {
|
||||
results['rtk-error'] = String(err).slice(0, 200);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: 'text' as const,
|
||||
text: `# tv_update results\n${JSON.stringify(results, null, 2)}`,
|
||||
}],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
|
||||
39
scripts/org.tokenvault.update-compression-tools.plist
Normal file
39
scripts/org.tokenvault.update-compression-tools.plist
Normal file
@ -0,0 +1,39 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>org.tokenvault.update-compression-tools</string>
|
||||
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>/bin/bash</string>
|
||||
<string>/Users/renefichtmueller/Desktop/Claude Code/tokenvault/scripts/update-compression-tools.sh</string>
|
||||
</array>
|
||||
|
||||
<!-- Run daily at 03:00 AM -->
|
||||
<key>StartCalendarInterval</key>
|
||||
<dict>
|
||||
<key>Hour</key>
|
||||
<integer>3</integer>
|
||||
<key>Minute</key>
|
||||
<integer>0</integer>
|
||||
</dict>
|
||||
|
||||
<key>StandardOutPath</key>
|
||||
<string>/Users/renefichtmueller/Library/Logs/tokenvault-update.log</string>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>/Users/renefichtmueller/Library/Logs/tokenvault-update-error.log</string>
|
||||
|
||||
<key>EnvironmentVariables</key>
|
||||
<dict>
|
||||
<key>PATH</key>
|
||||
<string>/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/Users/renefichtmueller/.cargo/bin</string>
|
||||
<key>HOME</key>
|
||||
<string>/Users/renefichtmueller</string>
|
||||
</dict>
|
||||
|
||||
<key>RunAtLoad</key>
|
||||
<false/>
|
||||
</dict>
|
||||
</plist>
|
||||
60
scripts/update-compression-tools.sh
Executable file
60
scripts/update-compression-tools.sh
Executable file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
# TokenVault — Auto-update lean-ctx (cargo) and RTK (homebrew)
|
||||
# Runs daily via LaunchAgent: ~/Library/LaunchAgents/org.tokenvault.update-compression-tools.plist
|
||||
#
|
||||
# Logs: ~/Library/Logs/tokenvault-update.log
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
LOG="$HOME/Library/Logs/tokenvault-update.log"
|
||||
DATE=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
|
||||
log() { echo "[$DATE] $*" | tee -a "$LOG"; }
|
||||
|
||||
log "=== TokenVault compression tool update check ==="
|
||||
|
||||
# ── lean-ctx (Rust binary via cargo) ──────────────────────────────────────────
|
||||
LEAN_CTX_BIN="$HOME/.cargo/bin/lean-ctx"
|
||||
if [ -f "$LEAN_CTX_BIN" ]; then
|
||||
BEFORE=$("$LEAN_CTX_BIN" --version 2>/dev/null || echo "unknown")
|
||||
log "lean-ctx before: $BEFORE"
|
||||
|
||||
# cargo install always fetches latest from crates.io
|
||||
if "$HOME/.cargo/bin/cargo" install lean-ctx 2>&1 | tail -5 | tee -a "$LOG"; then
|
||||
AFTER=$("$LEAN_CTX_BIN" --version 2>/dev/null || echo "unknown")
|
||||
log "lean-ctx after: $AFTER"
|
||||
if [ "$BEFORE" != "$AFTER" ]; then
|
||||
log "lean-ctx UPDATED: $BEFORE → $AFTER"
|
||||
else
|
||||
log "lean-ctx already at latest"
|
||||
fi
|
||||
else
|
||||
log "lean-ctx update FAILED (cargo install returned error)"
|
||||
fi
|
||||
else
|
||||
log "lean-ctx not found at $LEAN_CTX_BIN — skipping"
|
||||
fi
|
||||
|
||||
# ── RTK (Homebrew) ────────────────────────────────────────────────────────────
|
||||
BREW="/opt/homebrew/bin/brew"
|
||||
if [ -f "$BREW" ]; then
|
||||
BEFORE=$(/opt/homebrew/bin/rtk --version 2>/dev/null || rtk --version 2>/dev/null || echo "unknown")
|
||||
log "rtk before: $BEFORE"
|
||||
|
||||
if "$BREW" upgrade rtk 2>&1 | tee -a "$LOG"; then
|
||||
AFTER=$(/opt/homebrew/bin/rtk --version 2>/dev/null || rtk --version 2>/dev/null || echo "unknown")
|
||||
log "rtk after: $AFTER"
|
||||
if [ "$BEFORE" != "$AFTER" ]; then
|
||||
log "RTK UPDATED: $BEFORE → $AFTER"
|
||||
else
|
||||
log "RTK already at latest"
|
||||
fi
|
||||
else
|
||||
# brew upgrade exits non-zero if already latest — that's fine
|
||||
log "RTK already at latest (brew upgrade: no update available)"
|
||||
fi
|
||||
else
|
||||
log "Homebrew not found at $BREW — skipping RTK update"
|
||||
fi
|
||||
|
||||
log "=== Update check complete ==="
|
||||
Loading…
x
Reference in New Issue
Block a user