chore: commit deployed gateway state (dashboard, streaming, routing, bridges, cost-tracking)

Live production state on Erik that had drifted from Gitea — deployed across several
sessions but never committed. Excludes deploy/ecosystem.config.cjs (holds live tokens).

- dashboard: passive usage-report endpoint, per-device entries, CEST timezone, cost-panel rounding
- completion: SSE + HTTP/2 streaming
- pipeline: routing-rules, request-scorer, external-providers (subscription bridges)
- cost-tracking: tokenvault migration, cost-calculator, request-logger
- infra: docker-compose bridge env, server/health/tls, deps
This commit is contained in:
Rene Fichtmueller 2026-06-05 20:23:33 +00:00
parent 91384dbb2a
commit 0191c60b64
23 changed files with 6210 additions and 996 deletions

View File

@ -36,7 +36,6 @@ COPY --from=builder /app/packages/gateway/dist ./packages/gateway/dist
# Copy production node_modules
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/packages/gateway/node_modules ./packages/gateway/node_modules 2>/dev/null || true
# Copy runtime assets (prompt templates, config)
COPY packages/gateway/prompts ./packages/gateway/prompts

View File

@ -4,15 +4,31 @@ services:
container_name: llm-gateway
ports:
- "3100:3100"
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
NODE_ENV: production
PORT: "3100"
DATABASE_URL: "${DATABASE_URL}"
TIP_DATABASE_URL: "${TIP_DATABASE_URL}"
OLLAMA_URL: "http://192.168.178.169:11434"
OLLAMA_BASE_URL: "${OLLAMA_BASE_URL:-https://ollama.fichtmueller.org}"
CLAUDE_BRIDGE_ENABLED: "true"
CLAUDE_BRIDGE_URL: "${CLAUDE_BRIDGE_URL:-http://host.docker.internal:3250}"
CLAUDE_CODE_URL: "${CLAUDE_CODE_URL:-http://host.docker.internal:3250}"
OPENAI_BRIDGE_URL: "${OPENAI_BRIDGE_URL:-http://host.docker.internal:3251}"
CHATGPT_BRIDGE_URL: "${CHATGPT_BRIDGE_URL:-http://host.docker.internal:3251}"
COPILOT_BRIDGE_URL: "${COPILOT_BRIDGE_URL:-http://host.docker.internal:3252}"
GEMINI_BRIDGE_URL: "${GEMINI_BRIDGE_URL:-http://host.docker.internal:3254}"
CODEX_BRIDGE_URL: "${CODEX_BRIDGE_URL:-http://host.docker.internal:3253}"
OPENAI_CODEX_URL: "${OPENAI_CODEX_URL:-http://host.docker.internal:3253}"
AIDER_BRIDGE_URL: "${AIDER_BRIDGE_URL:-http://host.docker.internal:3256}"
SHIELDX_URL: "${SHIELDX_URL:-}"
GITEA_URL: "http://gitea.context-x.org"
LOG_LEVEL: "${LOG_LEVEL:-info}"
DASHBOARD_AUTH_TOKEN: "${DASHBOARD_AUTH_TOKEN:-}"
REFERENCE_INPUT_COST_PER_1K: "${REFERENCE_INPUT_COST_PER_1K:-0.005}"
REFERENCE_OUTPUT_COST_PER_1K: "${REFERENCE_OUTPUT_COST_PER_1K:-0.015}"
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-q", "-O-", "http://localhost:3100/health/live"]

57
package-lock.json generated
View File

@ -11,10 +11,10 @@
"packages/*"
],
"dependencies": {
"jose": "^6.2.2"
"jose": "^6.2.3"
}
},
"../../../shieldx": {
"../../shieldx": {
"extraneous": true
},
"node_modules/@esbuild/darwin-arm64": {
@ -305,6 +305,10 @@
"resolved": "packages/codex-lsp-adapter",
"link": true
},
"node_modules/@llm-gateway/companion": {
"resolved": "packages/companion",
"link": true
},
"node_modules/@llm-gateway/ctx-health": {
"resolved": "packages/ctx-health",
"link": true
@ -321,6 +325,10 @@
"resolved": "packages/learning-integration",
"link": true
},
"node_modules/@llm-gateway/mcp-server": {
"resolved": "packages/mcp-server",
"link": true
},
"node_modules/@llm-gateway/prompt-optimizer": {
"resolved": "packages/prompt-optimizer",
"link": true
@ -1127,6 +1135,8 @@
},
"node_modules/fastify-plugin": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/fastify-plugin/-/fastify-plugin-5.1.0.tgz",
"integrity": "sha512-FAIDA8eovSt5qcDgcBvDuX/v0Cjz0ohGhENZ/wpc3y+oZCY2afZ9Baqql3g/lC+OHRnciQol4ww7tuthOb9idw==",
"funding": [
{
"type": "github",
@ -1475,9 +1485,9 @@
}
},
"node_modules/jose": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz",
"integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==",
"version": "6.2.3",
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz",
"integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/panva"
@ -3178,6 +3188,21 @@
"node": ">=0.4"
}
},
"node_modules/yaml": {
"version": "2.9.0",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.9.0.tgz",
"integrity": "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==",
"license": "ISC",
"bin": {
"yaml": "bin.mjs"
},
"engines": {
"node": ">= 14.6"
},
"funding": {
"url": "https://github.com/sponsors/eemeli"
}
},
"node_modules/yocto-queue": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
@ -4086,6 +4111,16 @@
}
}
},
"packages/companion": {
"name": "@llm-gateway/companion",
"version": "1.0.0",
"bin": {
"llm-gateway-companion": "bin/llm-gateway-companion.js"
},
"engines": {
"node": ">=18"
}
},
"packages/ctx-health": {
"name": "@llm-gateway/ctx-health",
"version": "1.0.0",
@ -4114,6 +4149,7 @@
"@fastify/static": "^8.3.0",
"ajv": "^8.17.1",
"fastify": "^5.8.5",
"fastify-plugin": "^5.1.0",
"franc": "^6.2.0",
"jose": "^5.4.0",
"js-yaml": "^4.1.0",
@ -4122,6 +4158,7 @@
"pg-boss": "^10.1.3",
"pino": "^9.5.0",
"prom-client": "^15.1.3",
"yaml": "^2.9.0",
"zod": "^3.23.8"
},
"devDependencies": {
@ -4448,6 +4485,16 @@
}
}
},
"packages/mcp-server": {
"name": "@llm-gateway/mcp-server",
"version": "1.0.0",
"bin": {
"llm-gateway-mcp": "bin/llm-gateway-mcp.js"
},
"engines": {
"node": ">=18"
}
},
"packages/prompt-optimizer": {
"name": "@llm-gateway/prompt-optimizer",
"version": "0.1.0",

View File

@ -18,6 +18,6 @@
"ctx-health:dev": "npm run dev --workspace=packages/ctx-health"
},
"dependencies": {
"jose": "^6.2.2"
"jose": "^6.2.3"
}
}

View File

@ -7,7 +7,8 @@
"build": "tsc && npm run build:copy-assets",
"build:copy-assets": "mkdir -p dist/db/migrations dist/config dist/public && cp -r src/db/migrations/*.sql dist/db/migrations/ 2>/dev/null || true && cp -r src/config/*.yaml dist/config/ 2>/dev/null || true && cp -r public/* dist/public/ 2>/dev/null || true",
"start": "node dist/server.js",
"test": "vitest"
"test": "vitest",
"prestart": "node scripts/check-build-drift.mjs"
},
"dependencies": {
"@fastify/cors": "^10.1.0",
@ -16,6 +17,7 @@
"@fastify/static": "^8.3.0",
"ajv": "^8.17.1",
"fastify": "^5.8.5",
"fastify-plugin": "^5.1.0",
"franc": "^6.2.0",
"jose": "^5.4.0",
"js-yaml": "^4.1.0",
@ -24,6 +26,7 @@
"pg-boss": "^10.1.3",
"pino": "^9.5.0",
"prom-client": "^15.1.3",
"yaml": "^2.9.0",
"zod": "^3.23.8"
},
"devDependencies": {

View File

@ -1,63 +1,105 @@
id: linkedin_post
version: "1.0.0"
version: "2.0.0"
task_type: linkedin_post
description: "LinkedIn teaser in Rene Fichtmueller's voice. Anti-AI, anti-marketing, technical, direct."
system_prompt: |
You are a professional LinkedIn content writer. Write engaging, authentic posts that sound human.
You write a single short LinkedIn post in Rene Fichtmueller's voice. Rene is a network/optics engineer who blogs at blog.fichtmueller.org. His voice is direct, technical, sometimes contrarian, never marketing.
Rules:
- Maximum 1300 characters (LinkedIn soft limit)
- No hashtag spam (max 3 relevant hashtags)
- No engagement-bait questions at the end
- No "In today's fast-paced world" openings
- Write in first person, direct and confident tone
- Include a clear value point or insight
- Current date: {{current_date}}
HARD RULES — do not violate:
- 2 to 3 short sentences. Maximum 4. Period.
- No hashtags. None. Not at the end, not anywhere.
- No emojis. Not even one.
- No engagement-bait. Do not end with "What do you think?", "Thoughts?", "Have you seen this?".
- No call-to-action language ("Check it out", "Read more", "Don't miss").
- No meta-references to the blog post itself: do not write "I wrote about this", "I published a piece", "I broke this down", "more in the article".
- End with the URL on its own line. Nothing after the URL.
BANNED PHRASES — never use any of these:
- delve, leverage, robust, journey, embark, paradigm, unlock, seamlessly, holistic, harness, foster, amplify, underscore, indelible, profound, intricate, meticulous, testament, vibrant, bespoke, encompass, hitherto, realm, utilize, synergy
- "leaving money on the table"
- "until it's too late"
- "the line item most X skip"
- "turns out"
- "the unexpected part is"
- "the gap between X and Y is wider than"
- "in today's fast-paced", "in the world of", "in the realm of"
- "it's important to note", "it's worth noting"
- "let's dive into", "let's explore"
- "the future of X", "the next generation of X" (unless quoting someone)
- "game-changer", "cutting-edge", "groundbreaking", "comprehensive"
TONE — match these traits:
- Specific numbers over generalities. 20W is better than "high power". 14 weeks is better than "long lead time".
- Named products, standards, RFCs when relevant. 400ZR+, RPKI, IEEE 802.3.
- First person ("I", "my", "we") where genuine.
- Short sentences. Period. Short sentences. Period.
- Concession sometimes: admit what you don't know or what surprised you.
- Closing line stands on its own. No qualifier, no hedge.
Current date: {{current_date}}
{{few_shot_examples}}
system_prompt_de: |
Du bist ein professioneller LinkedIn-Content-Writer. Schreibe authentische, menschlich klingende Beiträge.
Du schreibst einen kurzen LinkedIn-Post in der Stimme von Rene Fichtmueller. Direkt, technisch, manchmal contrarian, nie Marketing.
Regeln:
- Maximal 1300 Zeichen (LinkedIn Soft-Limit)
- Keine Hashtag-Spam (max. 3 relevante Hashtags)
- Keine Engagement-Bait-Fragen am Ende
- Keine Einstiege mit "In der heutigen schnelllebigen Welt"
- Schreibe in der Ich-Perspektive, direkt und selbstsicher
- Enthalte einen klaren Mehrwert oder Einblick
- Aktuelles Datum: {{current_date}}
HARTE REGELN — nie verletzen:
- 2 bis 3 kurze Sätze. Maximal 4. Punkt.
- Keine Hashtags. Keine. Nirgendwo.
- Keine Emojis. Auch nicht einer.
- Kein Engagement-Bait. Niemals enden mit "Was meint ihr?", "Eure Erfahrung?".
- Keine Call-to-Action-Sprache ("Schaut mal rein", "Hier mehr lesen").
- Keine Meta-Referenzen auf den Blog-Post: kein "Ich habe dazu geschrieben", "Mehr im Artikel".
- URL alleine in der letzten Zeile. Nichts danach.
VERBOTENE WORTE/PHRASEN:
- "leverage", "delve", "robust", "harness", "navigieren", "Reise", "Paradigma", "freischalten", "ganzheitlich", "Synergie", "umfassend"
- "in der heutigen schnelllebigen Welt"
- "es lohnt sich zu erwähnen"
TON:
- Konkrete Zahlen statt Verallgemeinerungen.
- Erste Person wo authentisch.
- Kurze Sätze. Punkt.
- Konzession wo möglich: zugeben was überrascht hat.
Aktuelles Datum: {{current_date}}
user_template: |
Topic/Input:
Article info:
{{input}}
Context:
{{user_context}}
Write a LinkedIn post:
Write the LinkedIn post now. 2-3 sentences. No hashtags. URL on last line. Stop after the URL.
user_template_de: |
Thema/Eingabe:
Artikel-Infos:
{{input}}
Kontext:
{{user_context}}
Schreibe einen LinkedIn-Beitrag:
Schreibe den LinkedIn-Post jetzt. 2-3 Sätze. Keine Hashtags. URL in der letzten Zeile. Stopp nach der URL.
few_shot_examples:
- user: "We deployed a new BGP monitoring system last week"
- user: "Article info:\nTitle: BGP Route Leak Hit My Network Last Tuesday\nExcerpt: A transit provider re-advertised our customer cone to the wrong peers. Detection took 4 hours. The mitigation took 14 minutes once we saw it.\nURL: https://blog.fichtmueller.org/example1/"
assistant: |
We spent 18 months dealing with BGP route leaks that cost us 4 hours of network visibility per incident.
A transit provider re-advertised our customer cone to the wrong peers last Tuesday. We caught it after 4 hours. The fix took 14 minutes once we saw it — the cost was the 4 hours we didn't.
Last week we deployed our own monitoring system. First result: caught a route leak from a transit provider within 3 minutes, before any customer noticed.
https://blog.fichtmueller.org/example1/
The fix wasn't sophisticated. It was systematic alerting on RPKI invalid routes combined with PeeringDB cross-referencing.
- user: "Article info:\nTitle: 400ZR Reach Math Nobody Shows You\nExcerpt: Datasheets quote a maximum reach number. That number is for a clean link with margin you don't have.\nURL: https://blog.fichtmueller.org/example2/"
assistant: |
The 400ZR datasheet number is a ceiling, not a target. Real-world links have eight connectors, two amplifier hops, and aged fiber — the spec assumes none of that.
If you're running a mid-size ISP and still relying on customer tickets to discover routing issues, that's the gap worth closing.
Build to the budget you actually have, not the one the marketing slide showed you.
#BGP #NetworkEngineering #ISP
https://blog.fichtmueller.org/example2/
- user: "Article info:\nTitle: ADHD Diary #008: The 47-Tabs Problem\nExcerpt: I closed 47 browser tabs Tuesday. By Wednesday I had 51 again. This is not a tool problem.\nURL: https://blog.fichtmueller.org/example3/"
assistant: |
I closed 47 browser tabs on Tuesday. By Wednesday morning I had 51 again.
This isn't a tool problem. Every tab is a "I might do this later" promise I made to myself. The fix isn't a better tab manager — it's saying no.
https://blog.fichtmueller.org/example3/
variables:
- input

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
# LLM Gateway Model Configuration
# Ollama base URL: http://192.168.178.169:11434
ollama_base_url: "https://ollama.fichtmueller.org"
ollama_base_url: "http://127.0.0.1:11434"
tiers:
fast:
@ -26,7 +26,7 @@ models:
qwen2.5:3b:
tier: fast
context_length: 32768
strengths: [classification, short_text, routing]
strengths: [classification, summarization, routing]
max_tokens_default: 512
qwen2.5:7b:
@ -35,83 +35,58 @@ models:
strengths: [classification, summarization, short_analysis]
max_tokens_default: 1024
phi3.5:3.8b:
qwen2.5:7b-instruct:
tier: fast
context_length: 128000
strengths: [classification, summarization]
context_length: 32768
strengths: [classification, summarization, short_analysis]
max_tokens_default: 1024
qwen2.5-coder:7b-instruct:
tier: fast
context_length: 32768
strengths: [code_generation, technical_analysis, routing]
max_tokens_default: 512
# ─── MAGATAMA — Fine-tuned Security Intelligence (Context X) ─────────────────
magatama:32b:
tier: large
context_length: 131072
strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting]
strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting, complex_writing, deep_analysis, technical]
max_tokens_default: 4096
description: "MAGATAMA まがたま — TEPPEKI 7-pillar security AI, fine-tuned on Qwen2.5-32B"
# Custom fine-tuned models (Context X)
ctxhealer:latest:
tier: medium
context_length: 32768
strengths: [infrastructure_diagnosis, root_cause_analysis, remediation_steps]
max_tokens_default: 1024
llama-guard3:1b:
tier: fast
context_length: 8192
strengths: [safety_classification, threat_detection]
max_tokens_default: 256
# Medium tier
qwen2.5:14b:
tier: medium
context_length: 131072
strengths: [general, writing, analysis, coding]
strengths: [general, writing, analysis, coding, dialogue]
max_tokens_default: 2048
mistral:7b:
magatama-llm-v2-0:latest:
tier: medium
context_length: 32768
strengths: [general, writing]
context_length: 131072
strengths: [general, writing, analysis, coding, dialogue]
max_tokens_default: 2048
llama3.2:8b:
tier: medium
context_length: 128000
strengths: [general, chat, analysis]
max_tokens_default: 2048
deepseek-r1:8b:
magatama-coder:latest:
tier: medium
context_length: 65536
strengths: [reasoning, analysis, coding]
strengths: [code_generation, technical_analysis, debugging]
max_tokens_default: 2048
# Large tier
qwen2.5:32b:
tier: large
context_length: 131072
strengths: [complex_writing, deep_analysis, technical]
max_tokens_default: 4096
llama3.3:70b:
tier: large
context_length: 128000
strengths: [complex_reasoning, long_form, research]
max_tokens_default: 4096
deepseek-r1:32b:
tier: large
context_length: 131072
strengths: [chain_of_thought, complex_reasoning]
strengths: [complex_writing, deep_analysis, technical, security_analysis]
max_tokens_default: 4096
# Fallback chains per tier
fallback_chains:
fast: [qwen2.5:3b, qwen2.5:7b, phi3.5:3.8b]
medium: [qwen2.5:14b, mistral:7b, llama3.2:8b]
large: [qwen2.5:32b, llama3.3:70b, deepseek-r1:32b]
code_generation: [deepseek-r1:32b, qwen2.5:32b, llama3.3:70b]
fast: [qwen2.5:7b-instruct, qwen2.5-coder:7b-instruct]
medium: [magatama-llm-v2-0:latest, magatama-coder:latest, qwen2.5:7b-instruct]
large: [magatama:32b, magatama-llm-v2-0:latest]
code_generation: [magatama-coder:latest, qwen2.5-coder:7b-instruct]
# Cross-tier fallback when primary tier fails
tier_fallback:

View File

@ -1110,7 +1110,7 @@ routing_rules:
# ─── CONTENT / LINKEDIN ──────────────────────────────────────────────────────
linkedin_post:
model: qwen2.5:32b
model: fo-blog-v10
tier: large
prompt_template: linkedin_post
temperature: 0.7
@ -1118,7 +1118,7 @@ routing_rules:
output_format: text
requires_fact_check: false
validators: [banlist, language, length, question_closer]
callers: [n8n, internal]
callers: [n8n, internal, linkedin-distributor]
linkedin_comment:
model: qwen2.5:14b

View File

@ -3,7 +3,7 @@
-- Purpose: Track token compression and cost analytics
-- PostgreSQL compatible version (version 16+)
-- Table: Token compression metrics (LeanCTX, RTK)
-- Table: Token compression metrics (LLM Gateway)
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
id SERIAL PRIMARY KEY,
file_path VARCHAR(255),

View File

@ -1,12 +1,12 @@
-- Tokenvault & Cost Tracking Schema Extensions
-- Created: 2026-04-19
-- Purpose: Track token compression (LeanCTX + RTK) and cost analytics
-- Purpose: Track token compression (LLM Gateway) and cost analytics
-- Table: Token compression metrics (LeanCTX, RTK)
-- Table: Token compression metrics (LLM Gateway)
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
id SERIAL PRIMARY KEY,
file_path VARCHAR(255),
mode VARCHAR(50), -- 'lean-aggressive', 'lean-map', 'rtk-max', etc.
mode VARCHAR(50), -- 'gateway-aggressive', 'gateway-map', 'gateway-trim', etc.
tokens_before INT,
tokens_after INT,
savings_pct DECIMAL(5,2),
@ -26,7 +26,7 @@ CREATE TABLE IF NOT EXISTS cost_analytics (
agent_id VARCHAR(50), -- 'claude-code', 'qwen-reviewer', etc.
tokens_in INT,
tokens_out INT,
tokens_compressed INT, -- After LeanCTX + RTK
tokens_compressed INT, -- After LLM Gateway compression
cost_usd DECIMAL(10,6),
cost_saved_usd DECIMAL(10,6),
provider VARCHAR(50), -- 'ollama', 'cerebras', 'groq', 'claude', etc.

View File

@ -109,6 +109,11 @@ export class RequestLogger {
cost_usd: number;
latency_ms: number;
fallback_used: boolean;
compression_mode?: string;
compression_tokens_before?: number;
compression_tokens_after?: number;
compression_tokens_saved?: number;
compression_savings_pct?: number;
error_message?: string;
created_at: string;
}>
@ -116,22 +121,35 @@ export class RequestLogger {
const result = await this.db.query(
`
SELECT
request_id,
caller_id as caller,
task_type,
model,
status,
confidence_score,
tokens_in,
tokens_out,
cost_usd,
latency_ms,
fallback_used,
error_message,
created_at
FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)
ORDER BY created_at DESC
rt.request_id,
rt.caller_id as caller,
rt.task_type,
rt.model,
rt.status,
rt.confidence_score,
rt.tokens_in,
rt.tokens_out,
rt.cost_usd,
rt.latency_ms,
rt.fallback_used,
tv.mode as compression_mode,
tv.tokens_before as compression_tokens_before,
tv.tokens_after as compression_tokens_after,
GREATEST(COALESCE(tv.tokens_before, 0) - COALESCE(tv.tokens_after, 0), 0) as compression_tokens_saved,
tv.savings_pct as compression_savings_pct,
rt.error_message,
rt.created_at
FROM request_tracking rt
LEFT JOIN LATERAL (
SELECT mode, tokens_before, tokens_after, savings_pct
FROM tokenvault_metrics
WHERE tool_used = 'gateway'
AND file_path = rt.request_id
ORDER BY created_at DESC
LIMIT 1
) tv ON true
WHERE rt.created_at > NOW() - MAKE_INTERVAL(hours => $1)
ORDER BY rt.created_at DESC
LIMIT $2
`,
[offsetHours, limit]
@ -149,6 +167,11 @@ export class RequestLogger {
cost_usd: row.cost_usd,
latency_ms: row.latency_ms,
fallback_used: row.fallback_used,
compression_mode: row.compression_mode,
compression_tokens_before: row.compression_tokens_before ? parseInt(row.compression_tokens_before, 10) : undefined,
compression_tokens_after: row.compression_tokens_after ? parseInt(row.compression_tokens_after, 10) : undefined,
compression_tokens_saved: row.compression_tokens_saved ? parseInt(row.compression_tokens_saved, 10) : 0,
compression_savings_pct: row.compression_savings_pct ? parseFloat(row.compression_savings_pct) : 0,
error_message: row.error_message,
created_at: row.created_at
}));
@ -160,6 +183,17 @@ export class RequestLogger {
async getMetrics(bucketMinutes: number = 60): Promise<{
total_requests: number;
total_cost: number;
estimated_api_cost: number;
estimated_api_cost_avoided: number;
total_tokens_in: number;
total_tokens_out: number;
total_tokens: number;
compression_operations: number;
compression_tokens_before: number;
compression_tokens_after: number;
compression_tokens_saved: number;
compression_rate: number;
cache_hit_rate: number;
avg_latency: number;
success_rate: number;
avg_confidence: number;
@ -177,13 +211,15 @@ export class RequestLogger {
`
SELECT
COUNT(*) as total_requests,
SUM(cost_usd) as total_cost,
AVG(latency_ms) as avg_latency,
SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as success_rate,
AVG(confidence_score) as avg_confidence,
SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as fallback_percentage
COALESCE(SUM(cost_usd), 0) as total_cost,
COALESCE(SUM(tokens_in), 0) as total_tokens_in,
COALESCE(SUM(tokens_out), 0) as total_tokens_out,
COALESCE(AVG(latency_ms), 0) as avg_latency,
CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as success_rate,
COALESCE(AVG(confidence_score), 0) as avg_confidence,
CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as fallback_percentage
FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
`,
[bucketMinutes]
);
@ -192,7 +228,7 @@ export class RequestLogger {
`
SELECT caller_id as caller, COUNT(*) as count
FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
GROUP BY caller_id
ORDER BY count DESC
LIMIT 5
@ -204,7 +240,7 @@ export class RequestLogger {
`
SELECT model, COUNT(*) as count
FROM request_tracking
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
GROUP BY model
ORDER BY count DESC
LIMIT 5
@ -224,11 +260,47 @@ export class RequestLogger {
[bucketMinutes]
);
const compressionResult = await this.db.query(
`
SELECT
COUNT(*) as operations,
COALESCE(SUM(tokens_before), 0) as tokens_before,
COALESCE(SUM(tokens_after), 0) as tokens_after,
COALESCE(SUM(GREATEST(tokens_before - tokens_after, 0)), 0) as tokens_saved
FROM tokenvault_metrics
WHERE tool_used = 'gateway'
AND created_at > NOW() - ($1 * INTERVAL '1 minute')
`,
[bucketMinutes]
);
const metrics = metricsResult.rows[0];
const totalTokensIn = parseInt(metrics.total_tokens_in, 10) || 0;
const totalTokensOut = parseInt(metrics.total_tokens_out, 10) || 0;
const totalTokens = totalTokensIn + totalTokensOut;
const compression = compressionResult.rows[0] ?? {};
const compressionTokensBefore = parseInt(compression.tokens_before, 10) || 0;
const compressionTokensAfter = parseInt(compression.tokens_after, 10) || 0;
const compressionTokensSaved = parseInt(compression.tokens_saved, 10) || 0;
const referenceInputCostPer1k = parseFloat(process.env['REFERENCE_INPUT_COST_PER_1K'] ?? '0.005');
const referenceOutputCostPer1k = parseFloat(process.env['REFERENCE_OUTPUT_COST_PER_1K'] ?? '0.015');
const estimatedApiCost = (totalTokensIn / 1000) * referenceInputCostPer1k + (totalTokensOut / 1000) * referenceOutputCostPer1k;
const totalCost = parseFloat(metrics.total_cost) || 0;
return {
total_requests: parseInt(metrics.total_requests) || 0,
total_cost: parseFloat(metrics.total_cost) || 0,
total_cost: totalCost,
estimated_api_cost: estimatedApiCost,
estimated_api_cost_avoided: Math.max(0, estimatedApiCost - totalCost),
total_tokens_in: totalTokensIn,
total_tokens_out: totalTokensOut,
total_tokens: totalTokens,
compression_operations: parseInt(compression.operations, 10) || 0,
compression_tokens_before: compressionTokensBefore,
compression_tokens_after: compressionTokensAfter,
compression_tokens_saved: compressionTokensSaved,
compression_rate: compressionTokensBefore > 0 ? compressionTokensSaved / compressionTokensBefore : 0,
cache_hit_rate: 0,
avg_latency: Math.round(parseFloat(metrics.avg_latency) || 0),
success_rate: parseFloat(metrics.success_rate) || 0,
avg_confidence: parseFloat(metrics.avg_confidence) || 0,

View File

@ -101,7 +101,7 @@ export function calculateCost(
/**
* Calculate cost savings from compression
* @param model Model identifier
* @param tokensBeforeCompression Tokens before LeanCTX + RTK
* @param tokensBeforeCompression Tokens before LLM Gateway compression
* @param tokensAfterCompression Tokens after compression
* @returns Savings in USD
*/

View File

@ -47,7 +47,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
enabled: true,
models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
],
},
@ -86,6 +86,17 @@ const PROVIDERS: readonly ExternalProvider[] = [
{ id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 4096 },
],
},
{
name: 'm365-copilot-bridge',
baseUrl: '', // constructed from M365_COPILOT_BRIDGE_URL env var
envKey: 'M365_COPILOT_BRIDGE_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'microsoft-365-copilot', tier: 'reasoning', contextLength: 128000 },
{ id: 'm365-copilot-chat', tier: 'large', contextLength: 128000 },
],
},
{
name: 'cerebras',
baseUrl: 'https://api.cerebras.ai/v1',
@ -146,12 +157,13 @@ const PROVIDERS: readonly ExternalProvider[] = [
{
name: 'openai-codex',
baseUrl: 'https://api.openai.com/v1',
envKey: 'OPENAI_API_KEY',
envKey: 'OPENAI_CODEX_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'gpt-4-turbo', tier: 'reasoning', contextLength: 128000 },
{ id: 'gpt-3.5-turbo', tier: 'fast', contextLength: 16384 },
{ id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
{ id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
],
},
{
@ -162,23 +174,35 @@ const PROVIDERS: readonly ExternalProvider[] = [
enabled: true,
models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
],
},
{
name: 'codex',
baseUrl: 'https://api.github.com/copilot_inner/v2',
envKey: 'GITHUB_CODEX_TOKEN',
envKey: 'CODEX_BRIDGE_URL',
rateLimitRpm: 60,
enabled: true,
models: [
{ id: 'github-copilot-x', tier: 'large', contextLength: 8192 },
{ id: 'code-davinci-002', tier: 'medium', contextLength: 4096 },
{ id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
{ id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
],
},
];
const AUTHLESS_BRIDGE_PROVIDERS = new Set([
'claude-bridge',
'claude-code',
'openai-bridge',
'chatgpt-bridge',
'copilot-bridge',
'm365-copilot-bridge',
]);
const GENERATE_BRIDGE_PROVIDERS = new Set(['claude-bridge', 'claude-code']);
// ─── Rate Limiter (simple sliding window) ───────────────────────────
const requestTimestamps: Map<string, number[]> = new Map();
@ -213,25 +237,34 @@ function getApiKey(provider: ExternalProvider): string | undefined {
return url ? 'claude-code-enabled' : undefined;
}
if (provider.name === 'openai-bridge') {
// openai-bridge uses OPENAI_API_KEY for auth, but also needs bridge URL
const apiKey = process.env['OPENAI_API_KEY'];
// Subscription bridge auth is handled by the bridge process/CLI session.
const url = process.env['OPENAI_BRIDGE_URL'];
return apiKey && url ? apiKey : undefined;
return url ? 'openai-bridge-enabled' : undefined;
}
if (provider.name === 'chatgpt-bridge') {
// chatgpt-bridge can use same URL as openai-bridge (same service), but needs API key
const apiKey = process.env['OPENAI_API_KEY'];
// ChatGPT Plus bridge can reuse the OpenAI bridge when configured that way.
const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'];
return apiKey && url ? apiKey : undefined;
return url ? 'chatgpt-bridge-enabled' : undefined;
}
if (provider.name === 'copilot-bridge') {
// copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api)
// Just needs URL to be configured
// copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api).
const url = process.env['COPILOT_BRIDGE_URL'];
return url ? 'copilot-authenticated' : undefined;
}
if (provider.name === 'm365-copilot-bridge') {
// Microsoft 365 Copilot uses Microsoft Graph delegated auth inside the bridge.
const url = process.env['M365_COPILOT_BRIDGE_URL'];
return url ? 'm365-copilot-bridge-enabled' : undefined;
}
if (provider.name === 'openai-codex') {
const bridgeUrl = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
if (bridgeUrl) return 'openai-codex-bridge-enabled';
return process.env['OPENAI_API_KEY'] || undefined;
}
if (provider.name === 'codex') {
// codex uses GitHub Codex API token
// Codex can run through an authless local/subscription bridge. A token remains supported as fallback.
const bridgeUrl = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
if (bridgeUrl) return 'codex-bridge-enabled';
const token = process.env['GITHUB_CODEX_TOKEN'];
return token ? token : undefined;
}
@ -241,11 +274,11 @@ function getApiKey(provider: ExternalProvider): string | undefined {
function getBaseUrl(provider: ExternalProvider): string {
if (provider.name === 'claude-bridge') {
const url = process.env['CLAUDE_BRIDGE_URL'];
return url ? `${url}/v1` : '';
return url ?? '';
}
if (provider.name === 'claude-code') {
const url = process.env['CLAUDE_CODE_URL'];
return url ? `${url}/v1` : '';
return url ?? '';
}
if (provider.name === 'openai-bridge') {
const url = process.env['OPENAI_BRIDGE_URL'];
@ -257,7 +290,19 @@ function getBaseUrl(provider: ExternalProvider): string {
}
if (provider.name === 'copilot-bridge') {
const url = process.env['COPILOT_BRIDGE_URL'];
return url ? `${url}` : '';
return url ? `${url}/v1` : '';
}
if (provider.name === 'm365-copilot-bridge') {
const url = process.env['M365_COPILOT_BRIDGE_URL'];
return url ? `${url}/v1` : '';
}
if (provider.name === 'openai-codex') {
const url = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
return url ? `${url}/v1` : provider.baseUrl;
}
if (provider.name === 'codex') {
const url = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
return url ? `${url}/v1` : provider.baseUrl;
}
if (provider.name === 'cloudflare') {
const accountId = process.env['CLOUDFLARE_ACCOUNT_ID'];
@ -271,6 +316,11 @@ export function getAvailableProviders(): readonly ExternalProvider[] {
return PROVIDERS.filter((p) => p.enabled && getApiKey(p));
}
/** Returns ALL configured providers (enabled or not, with or without API key). For dashboard listing. */
export function getAllProviders(): readonly ExternalProvider[] {
return PROVIDERS;
}
function findBestModel(
provider: ExternalProvider,
targetTier: 'fast' | 'medium' | 'large' | 'reasoning',
@ -296,7 +346,11 @@ function findBestModel(
function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> {
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
if (!['claude-bridge', 'claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge'].includes(provider.name)) {
const usesAuthlessBridge = AUTHLESS_BRIDGE_PROVIDERS.has(provider.name)
|| (provider.name === 'openai-codex' && !!(process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL']))
|| (provider.name === 'codex' && !!(process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL']));
if (!usesAuthlessBridge) {
headers['Authorization'] = `Bearer ${apiKey}`;
}
return headers;
@ -311,13 +365,29 @@ function buildRequestPayload(model: ExternalModel, request: ExternalCompletionRe
};
}
function buildGenerateBridgePayload(model: ExternalModel, request: ExternalCompletionRequest): Record<string, unknown> {
const system = request.messages.find((m) => m.role === 'system')?.content;
const prompt = request.messages
.filter((m) => m.role !== 'system')
.map((m) => `${m.role}: ${m.content}`)
.join('\n\n');
return {
model: model.id,
prompt,
system,
temperature: request.temperature ?? 0.3,
max_tokens: request.max_tokens ?? 2048,
};
}
function parseExternalResponse(
data: any,
model: ExternalModel,
provider: ExternalProvider,
start: number,
): ExternalCompletionResponse {
const content = data.choices?.[0]?.message?.content ?? '';
const content = data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? data.message?.content ?? '';
recordRequest(provider.name);
return {
response: content,
@ -341,14 +411,15 @@ async function callProvider(
const baseUrl = getBaseUrl(provider);
if (!baseUrl) throw new Error(`No base URL for ${provider.name}`);
const url = `${baseUrl}/chat/completions`;
const generateBridge = GENERATE_BRIDGE_PROVIDERS.has(provider.name);
const url = generateBridge ? `${baseUrl}/api/generate` : `${baseUrl}/chat/completions`;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
const start = Date.now();
try {
const headers = buildRequestHeaders(provider, apiKey);
const payload = buildRequestPayload(model, request);
const payload = generateBridge ? buildGenerateBridgePayload(model, request) : buildRequestPayload(model, request);
const response = await fetch(url, {
method: 'POST',

View File

@ -728,6 +728,36 @@ function handleFormalLogicOverride(
return result;
}
// ── Helper: Code Generation Intent Override ───────────────────────────────
const CODE_GENERATION_PATTERNS = [
/\bwrite\s+(?:a\s+)?(?:typescript|javascript|python|go|rust|react|next\.js|node)?\s*(?:function|class|script|module|component|test|handler|middleware)\b/i,
/\b(?:implement|create|build|generate|scaffold)\b[\s\S]{0,160}\b(?:api|endpoint|function|class|component|service|schema|migration|crud|jwt|test|project|module)\b/i,
/\b(?:rest|graphql)\s+api\b[\s\S]{0,160}\b(?:implement|create|build|endpoint|authentication|jwt)\b/i,
];
function handleCodeGenerationOverride(
fullText: string,
input: ScorerInput,
userMessages: readonly WeightedMessage[],
): ScoringResult | null {
if (!CODE_GENERATION_PATTERNS.some((pattern) => pattern.test(fullText))) {
return null;
}
const dimensions = computeAllDimensions(input, userMessages, fullText);
const result: ScoringResult = {
tier: 'code_generation',
score: 0.62,
confidence: 0.86,
reason: 'code generation intent detected',
dimensions,
};
recordSessionTier('code_generation');
logger.debug({ tier: 'code_generation', reason: 'code_generation_override' }, 'Request scored via code generation override');
return result;
}
// ── Helper: Apply Score Overrides ──────────────────────────────────────────
interface ScoreOverridesInput {
@ -754,6 +784,7 @@ function applyScoreOverrides(
const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
if (codeGenDim && codeGenDim.rawScore > 0.25) {
tier = 'code_generation';
confidence = Math.max(confidence, 0.78);
reason = 'code generation keywords detected';
}
@ -771,7 +802,7 @@ function applyScoreOverrides(
}
// Ambiguity check
if (confidence < 0.45) {
if (confidence < 0.45 && tier !== 'code_generation' && tier !== 'reasoning') {
tier = 'medium';
reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
}
@ -795,6 +826,9 @@ export function scoreRequest(
const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages);
if (formalLogicResult) return formalLogicResult;
const codeGenerationResult = handleCodeGenerationOverride(fullText, input, userMessages);
if (codeGenerationResult) return codeGenerationResult;
const dimensions = computeAllDimensions(input, userMessages, fullText);
let rawScore = 0;
for (const dim of dimensions) {

View File

@ -184,14 +184,14 @@ export function getOllamaBaseUrl(): string {
/**
* Maps a scorer tier to the best primary model and its fallback chain.
* The 'reasoning' tier uses llama3.3:70b (complex_reasoning strength) from the large tier.
* The 'code_generation' tier uses OpenAI Codex (gpt-4-turbo) as primary via external provider.
* The 'code_generation' tier uses OpenAI Codex as primary via external provider.
*/
const TIER_MODEL_MAP: Record<Tier, { primary: string; configTier: 'fast' | 'medium' | 'large'; provider?: string }> = {
fast: { primary: 'qwen2.5:3b', configTier: 'fast' },
medium: { primary: 'qwen2.5:14b', configTier: 'medium' },
large: { primary: 'qwen2.5:32b', configTier: 'large' },
reasoning: { primary: 'llama3.3:70b', configTier: 'large' },
code_generation: { primary: 'gpt-4-turbo', configTier: 'large', provider: 'openai-codex' },
code_generation: { primary: 'gpt-5.1-codex-mini', configTier: 'large', provider: 'openai-codex' },
};
function buildMediumTierFallback(
@ -223,7 +223,8 @@ function buildScoredFallbackChain(
models: ModelsYaml,
): string[] {
if (tier === 'reasoning' || tier === 'code_generation') {
return [selectedModel, ...buildFallbackChain(selectedModel, configTier, models).filter((m) => m !== selectedModel)];
const fallbackTier = tier === 'code_generation' ? 'code_generation' : configTier;
return [selectedModel, ...buildFallbackChain(selectedModel, fallbackTier, models).filter((m) => m !== selectedModel)];
}
return buildFallbackChain(selectedModel, configTier, models);
}
@ -302,7 +303,7 @@ export function routeByScore(
const mapping = TIER_MODEL_MAP[scoringResult.tier];
const selectedModel = mapping.primary;
const configTier = mapping.configTier;
const tierConfig = models.tiers[configTier];
const tierConfig = models.tiers[scoringResult.tier] ?? models.tiers[configTier];
if (!tierConfig) {
logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium');

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -38,22 +38,40 @@ async function checkOllama(baseUrl: string): Promise<{ status: 'ok' | 'down'; la
async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> {
try {
await query('SELECT 1');
await withTimeout(query('SELECT 1'), 2500, 'database check timed out');
return { status: 'ok' };
} catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
}
}
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, message: string): Promise<T> {
let timer: NodeJS.Timeout | undefined;
try {
return await Promise.race([
promise,
new Promise<T>((_resolve, reject) => {
timer = setTimeout(() => reject(new Error(message)), timeoutMs);
}),
]);
} finally {
if (timer) clearTimeout(timer);
}
}
async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> {
const boss = getPgBoss();
if (!boss) return { status: 'unknown' };
try {
const [queued, active] = await Promise.all([
boss.getQueueSize('llm-batch', { before: 'completed' }),
boss.getQueueSize('llm-batch', { before: 'active' }),
]);
const [queued, active] = await withTimeout(
Promise.all([
boss.getQueueSize('llm-batch', { before: 'completed' }),
boss.getQueueSize('llm-batch', { before: 'active' }),
]),
2500,
'queue check timed out',
);
return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) };
} catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
@ -62,8 +80,10 @@ async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?
async function getReviewQueueCount(): Promise<number> {
try {
const result = await query<{ count: string }>(
'SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL',
const result = await withTimeout(
query<{ count: string }>('SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL'),
2500,
'review queue check timed out',
);
return parseInt(result.rows[0]?.count ?? '0', 10);
} catch {
@ -78,8 +98,9 @@ export async function healthRoute(fastify: FastifyInstance): Promise<void> {
// Check if this is a dashboard UI request with ?ui=1 or ?dashboard=1
const query = request.query as any;
const isDashboardRequest = query.ui || query.dashboard;
const acceptsHtml = String(request.headers.accept ?? '').includes('text/html');
if (isDashboardRequest) {
if (isDashboardRequest || acceptsHtml) {
try {
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@ -108,8 +129,8 @@ export async function healthRoute(fastify: FastifyInstance): Promise<void> {
const breakerStates = getAllBreakerStates();
const isDown = ollamaCheck.status === 'down' || dbCheck.status === 'down';
const isDegraded = queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
const isDown = dbCheck.status === 'down';
const isDegraded = ollamaCheck.status === 'down' || queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok';

View File

@ -11,6 +11,22 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
logger.info({ publicDir }, 'Static file serving initialized');
function sendHtml(filename: string, reply: any) {
const filePath = join(publicDir, filename);
if (!existsSync(filePath)) {
logger.warn({ path: filePath }, `${filename} not found`);
return reply.status(404).send({ error: `${filename} not found` });
}
const content = readFileSync(filePath, 'utf-8');
return reply
.header('Cache-Control', 'no-cache, no-store, must-revalidate, max-age=0')
.header('Pragma', 'no-cache')
.header('Expires', '0')
.type('text/html')
.send(content);
}
// Serve root path
fastify.get('/', async (request, reply) => {
logger.info({ method: request.method, url: request.url, host: request.hostname }, 'Root path requested');
@ -26,13 +42,47 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
// Serve /dashboard.html
fastify.get('/dashboard.html', async (_request, reply) => {
const dashboardPath = join(publicDir, 'dashboard.html');
if (!existsSync(dashboardPath)) {
logger.warn({ path: dashboardPath }, 'dashboard.html not found');
return reply.status(404).send({ error: 'dashboard.html not found' });
}
const content = readFileSync(dashboardPath, 'utf-8');
return reply.type('text/html').send(content);
return sendHtml('dashboard.html', reply);
});
fastify.get('/dashboard-v2.html', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/v2/dashboard', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/v2/dashboard/', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/v2/', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/dashboard/v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/dashboard/v2/', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/api/dashboard-v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/api/v2/dashboard', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
fastify.get('/api/dashboard/v2', async (_request, reply) => {
return sendHtml('dashboard-v2.html', reply);
});
// Serve /api/dashboard as HTML for compatibility

View File

@ -107,6 +107,25 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
return;
}
const hostHeader = String(request.headers['host'] ?? '');
const forwardedHost = String(request.headers['x-forwarded-host'] ?? '');
const remoteAddress = request.ip ?? '';
const host = forwardedHost || hostHeader;
const isLoopbackHost =
/^localhost(?::\d+)?$/i.test(host) ||
/^127\.0\.0\.1(?::\d+)?$/.test(host) ||
/^\[::1\](?::\d+)?$/.test(host);
const isLoopbackRemote =
remoteAddress === '127.0.0.1' ||
remoteAddress === '::1' ||
remoteAddress === '::ffff:127.0.0.1';
// Internal loopback callers such as Magatama Core run behind the same host
// and must not be redirected to HTTPS unless the Gateway actually serves TLS.
if (isLoopbackHost || isLoopbackRemote) {
return;
}
// Check if connection is not secure
// In production, X-Forwarded-Proto is set by reverse proxy (Cloudflare)
const isSecure =
@ -114,7 +133,6 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
(request.headers['x-forwarded-proto'] === 'https');
if (!isSecure && process.env['NODE_ENV'] === 'production') {
const host = request.headers['x-forwarded-host'] || request.headers['host'];
return reply.redirect(`https://${host}${request.url}`);
}
});
@ -126,10 +144,10 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
*/
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
server.addHook('onSend', async (request, reply) => {
// Content Security Policy - strict, no inline scripts
// Content Security Policy for the self-contained dashboard UI.
reply.header(
'Content-Security-Policy',
"default-src 'self'; script-src 'self'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
"default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
);
// Prevent clickjacking

View File

@ -12,11 +12,23 @@ import { dashboardRoute } from './routes/dashboard.js';
import { streamRoute } from './routes/stream.js';
import { learningInsightsRoute } from './routes/learning-insights.js';
import { staticRoute } from './routes/static.js';
import tenantAuth from './security/tenant-auth.js';
import { internalRoute } from './routes/internal.js';
import { getPool } from './db/client.js';
import { runMigrations } from './db/migrate.js';
import { initPgBoss } from './queue/pg-boss-client.js';
import { logger } from './observability/logger.js';
import { scheduleLearningCycles } from './learning/learning-engine.js';
import { autoSpawnOnBoot } from './modules/auto-discovery.js';
import { embeddingsRoute } from './routes/embeddings.js';
import { replayRoute } from './routes/replay.js';
import { audioRoute } from './routes/audio.js';
import { mcpRoute } from './modules/mcp-server.js';
import { loadWorkspacePreset, applyWorkspaceDefaults } from './modules/workspace-presets.js';
import { loadPlugins } from './modules/plugin-system.js';
import { ingestPeerStats, scheduleFederationPublisher, buildStats } from './modules/federated-stats.js';
import { scheduleAdaptiveLearner, getAllRecommendations } from './modules/adaptive-routing.js';
import { startBridgeWatchdog } from './modules/bridge-watchdog.js';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { readFileSync, existsSync } from 'fs';
@ -77,6 +89,7 @@ async function buildServer() {
directives: {
defaultSrc: ["'self'"],
scriptSrc: ["'self'", "'unsafe-inline'"],
styleSrc: ["'self'", "'unsafe-inline'"],
objectSrc: ["'none'"],
},
},
@ -92,15 +105,17 @@ async function buildServer() {
'http://192.168.178.196:3000',
/^http:\/\/192\.168\.178\.\d+/,
/^https:\/\/.*\.context-x\.org$/,
/^https:\/\/(www\.)?runwerk\.app$/,
/^https:\/\/.*\.runwerk\.app$/,
],
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID'],
allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID', 'X-Runwerk-Caller', 'X-Runwerk-Privacy', 'X-Runwerk-Tier', 'X-Runwerk-Purpose'],
credentials: true,
});
await server.register(fastifyRateLimit, {
global: true,
max: 100,
max: 1000,
timeWindow: '1 minute',
keyGenerator: (request) => {
const caller = (request.headers['x-caller-id'] as string) ?? 'default';
@ -113,7 +128,17 @@ async function buildServer() {
}),
});
await server.register(tenantAuth);
await server.register(internalRoute);
await server.register(completionRoute, { prefix: '/v1' });
await server.register(embeddingsRoute, { prefix: '/v1' });
await server.register(replayRoute, { prefix: '/v1' });
await server.register(audioRoute, { prefix: '/v1' });
await server.register(mcpRoute);
server.post('/v1/federation/ingest', async (request, reply) => {
const result = ingestPeerStats(request.body as never);
return reply.send({ success: true, ...result });
});
await server.register(batchRoute, { prefix: '/v1' });
await server.register(classifyRoute, { prefix: '/v1' });
await server.register(reviewRoute, { prefix: '/v1' });
@ -192,9 +217,54 @@ async function main() {
} catch (pgErr) {
logger.warn({ pgErr }, 'PgBoss init failed - continuing without queue');
}
// Workspace preset (apply env defaults from workspace.yaml if present)
try {
const preset = await loadWorkspacePreset();
if (preset) applyWorkspaceDefaults(preset);
} catch (err) {
logger.warn({ err }, 'Workspace preset load failed (non-fatal)');
}
// Plugin system (load pre/post hooks from PLUGINS_DIR)
try {
await loadPlugins();
} catch (err) {
logger.warn({ err }, 'Plugin loading failed (non-fatal)');
}
scheduleLearningCycles();
await server.listen({ port, host });
logger.info({ port, host }, 'LLM Gateway started');
// Auto-spawn detected subscription bridges if AUTO_SPAWN_BRIDGES=1
void autoSpawnOnBoot();
// Bridge watchdog (opt-in via WATCHDOG_ENABLED=1)
try {
startBridgeWatchdog();
} catch (err) {
logger.warn({ err }, 'Bridge watchdog start failed');
}
// Adaptive routing learner (opt-in via ADAPTIVE_ROUTING_ENABLED=1)
try {
const pool = getPool();
scheduleAdaptiveLearner(pool as never);
} catch (err) {
logger.warn({ err }, 'Adaptive learner scheduling failed');
}
// Federation publisher (opt-in via FEDERATION_ENABLED=1)
scheduleFederationPublisher(async () => {
const recos = getAllRecommendations();
return buildStats(recos.map((r) => ({
task_type: r.taskType,
model_used: r.preferredModel,
samples: r.rationale.samples,
success_rate: r.rationale.successRate,
avg_latency_ms: r.rationale.avgLatencyMs,
})));
});
} catch (err) {
logger.error({ err }, 'Failed to start server');
process.exit(1);

View File

@ -1,5 +1,5 @@
// Tokenvault Integration Hooks
// Instruments LeanCTX and RTK compression tracking
// Instruments LLM Gateway compression tracking (legacy hook names retained for backward compat)
// Updated: 2026-04-19
import { Pool, QueryResult } from 'pg';
@ -62,13 +62,13 @@ export function estimateTokens(text: string | object): number {
}
/**
* Log compression ratio for RTK output
* Log compression ratio for token-trim output
*/
export async function logRTKCompression(
export async function logGatewayTrimCompression(
db: Pool,
rawOutput: string,
compressedOutput: string,
toolUsed: string = 'rtk'
toolUsed: string = 'llm-gateway-trim'
): Promise<CompressionMetric> {
const tokensBefore = estimateTokens(rawOutput);
const tokensAfter = estimateTokens(compressedOutput);
@ -93,9 +93,9 @@ export async function logRTKCompression(
}
/**
* Track LeanCTX file read operations
* Track gateway file-read operations
*/
export async function logLeanCTXRead(
export async function logGatewayFileRead(
db: Pool,
filePath: string,
mode: string,
@ -115,7 +115,7 @@ export async function logLeanCTXRead(
tokensBefore: rawTokens,
tokensAfter: compressedTokens,
savingsPct,
toolUsed: 'lean-ctx'
toolUsed: 'llm-gateway'
};
await logCompressionMetric(db, metric);
@ -207,7 +207,7 @@ export async function getCompressionStats(
tool_used,
COUNT(*) as count
FROM tokenvault_metrics
WHERE created_at > NOW() - INTERVAL $1 HOUR
WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
GROUP BY tool_used`,
[hoursBack]
);
@ -270,7 +270,7 @@ export async function getCostSummary(
project,
SUM(CASE WHEN cost_usd > 0 THEN 1 ELSE 0 END) as paid_tasks
FROM cost_analytics
WHERE created_at > NOW() - INTERVAL $1 HOUR
WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
GROUP BY project`,
[hoursBack]
);