feat: merge Gitea main (injection-defense, bridges, dashboard) + Erik WIP features

Reconcile 6-week divergence: Gitea main (injection-defense, output-defense,
prompt-guard-client, admin-auth, start-with-env, dashboard-v2, savings-calculator,
race-mode, gamification + 13 more modules) merged with Erik's deployed features
(usage-report endpoint, per-device entries, CEST timezone, cost-panel, bridge routing).
ecosystem.config.cjs excluded (live token, never commit).
This commit is contained in:
Rene Fichtmueller 2026-06-05 21:07:57 +00:00
parent c53e0d2165
commit c7c457ae2a
21 changed files with 940 additions and 171 deletions

View File

@ -36,7 +36,6 @@ COPY --from=builder /app/packages/gateway/dist ./packages/gateway/dist
# Copy production node_modules # Copy production node_modules
COPY --from=builder /app/node_modules ./node_modules COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/packages/gateway/node_modules ./packages/gateway/node_modules 2>/dev/null || true
# Copy runtime assets (prompt templates, config) # Copy runtime assets (prompt templates, config)
COPY packages/gateway/prompts ./packages/gateway/prompts COPY packages/gateway/prompts ./packages/gateway/prompts

View File

@ -4,15 +4,31 @@ services:
container_name: llm-gateway container_name: llm-gateway
ports: ports:
- "3100:3100" - "3100:3100"
extra_hosts:
- "host.docker.internal:host-gateway"
environment: environment:
NODE_ENV: production NODE_ENV: production
PORT: "3100" PORT: "3100"
DATABASE_URL: "${DATABASE_URL}" DATABASE_URL: "${DATABASE_URL}"
TIP_DATABASE_URL: "${TIP_DATABASE_URL}" TIP_DATABASE_URL: "${TIP_DATABASE_URL}"
OLLAMA_URL: "http://192.168.178.169:11434" OLLAMA_URL: "http://192.168.178.169:11434"
OLLAMA_BASE_URL: "${OLLAMA_BASE_URL:-https://ollama.fichtmueller.org}"
CLAUDE_BRIDGE_ENABLED: "true"
CLAUDE_BRIDGE_URL: "${CLAUDE_BRIDGE_URL:-http://host.docker.internal:3250}"
CLAUDE_CODE_URL: "${CLAUDE_CODE_URL:-http://host.docker.internal:3250}"
OPENAI_BRIDGE_URL: "${OPENAI_BRIDGE_URL:-http://host.docker.internal:3251}"
CHATGPT_BRIDGE_URL: "${CHATGPT_BRIDGE_URL:-http://host.docker.internal:3251}"
COPILOT_BRIDGE_URL: "${COPILOT_BRIDGE_URL:-http://host.docker.internal:3252}"
GEMINI_BRIDGE_URL: "${GEMINI_BRIDGE_URL:-http://host.docker.internal:3254}"
CODEX_BRIDGE_URL: "${CODEX_BRIDGE_URL:-http://host.docker.internal:3253}"
OPENAI_CODEX_URL: "${OPENAI_CODEX_URL:-http://host.docker.internal:3253}"
AIDER_BRIDGE_URL: "${AIDER_BRIDGE_URL:-http://host.docker.internal:3256}"
SHIELDX_URL: "${SHIELDX_URL:-}" SHIELDX_URL: "${SHIELDX_URL:-}"
GITEA_URL: "http://gitea.context-x.org" GITEA_URL: "http://gitea.context-x.org"
LOG_LEVEL: "${LOG_LEVEL:-info}" LOG_LEVEL: "${LOG_LEVEL:-info}"
DASHBOARD_AUTH_TOKEN: "${DASHBOARD_AUTH_TOKEN:-}"
REFERENCE_INPUT_COST_PER_1K: "${REFERENCE_INPUT_COST_PER_1K:-0.005}"
REFERENCE_OUTPUT_COST_PER_1K: "${REFERENCE_OUTPUT_COST_PER_1K:-0.015}"
restart: unless-stopped restart: unless-stopped
healthcheck: healthcheck:
test: ["CMD", "wget", "-q", "-O-", "http://localhost:3100/health/live"] test: ["CMD", "wget", "-q", "-O-", "http://localhost:3100/health/live"]

57
package-lock.json generated
View File

@ -11,10 +11,10 @@
"packages/*" "packages/*"
], ],
"dependencies": { "dependencies": {
"jose": "^6.2.2" "jose": "^6.2.3"
} }
}, },
"../../../shieldx": { "../../shieldx": {
"extraneous": true "extraneous": true
}, },
"node_modules/@esbuild/darwin-arm64": { "node_modules/@esbuild/darwin-arm64": {
@ -305,6 +305,10 @@
"resolved": "packages/codex-lsp-adapter", "resolved": "packages/codex-lsp-adapter",
"link": true "link": true
}, },
"node_modules/@llm-gateway/companion": {
"resolved": "packages/companion",
"link": true
},
"node_modules/@llm-gateway/ctx-health": { "node_modules/@llm-gateway/ctx-health": {
"resolved": "packages/ctx-health", "resolved": "packages/ctx-health",
"link": true "link": true
@ -321,6 +325,10 @@
"resolved": "packages/learning-integration", "resolved": "packages/learning-integration",
"link": true "link": true
}, },
"node_modules/@llm-gateway/mcp-server": {
"resolved": "packages/mcp-server",
"link": true
},
"node_modules/@llm-gateway/prompt-optimizer": { "node_modules/@llm-gateway/prompt-optimizer": {
"resolved": "packages/prompt-optimizer", "resolved": "packages/prompt-optimizer",
"link": true "link": true
@ -1127,6 +1135,8 @@
}, },
"node_modules/fastify-plugin": { "node_modules/fastify-plugin": {
"version": "5.1.0", "version": "5.1.0",
"resolved": "https://registry.npmjs.org/fastify-plugin/-/fastify-plugin-5.1.0.tgz",
"integrity": "sha512-FAIDA8eovSt5qcDgcBvDuX/v0Cjz0ohGhENZ/wpc3y+oZCY2afZ9Baqql3g/lC+OHRnciQol4ww7tuthOb9idw==",
"funding": [ "funding": [
{ {
"type": "github", "type": "github",
@ -1475,9 +1485,9 @@
} }
}, },
"node_modules/jose": { "node_modules/jose": {
"version": "6.2.2", "version": "6.2.3",
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz",
"integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==", "integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==",
"license": "MIT", "license": "MIT",
"funding": { "funding": {
"url": "https://github.com/sponsors/panva" "url": "https://github.com/sponsors/panva"
@ -3178,6 +3188,21 @@
"node": ">=0.4" "node": ">=0.4"
} }
}, },
"node_modules/yaml": {
"version": "2.9.0",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.9.0.tgz",
"integrity": "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==",
"license": "ISC",
"bin": {
"yaml": "bin.mjs"
},
"engines": {
"node": ">= 14.6"
},
"funding": {
"url": "https://github.com/sponsors/eemeli"
}
},
"node_modules/yocto-queue": { "node_modules/yocto-queue": {
"version": "1.2.2", "version": "1.2.2",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
@ -4086,6 +4111,16 @@
} }
} }
}, },
"packages/companion": {
"name": "@llm-gateway/companion",
"version": "1.0.0",
"bin": {
"llm-gateway-companion": "bin/llm-gateway-companion.js"
},
"engines": {
"node": ">=18"
}
},
"packages/ctx-health": { "packages/ctx-health": {
"name": "@llm-gateway/ctx-health", "name": "@llm-gateway/ctx-health",
"version": "1.0.0", "version": "1.0.0",
@ -4114,6 +4149,7 @@
"@fastify/static": "^8.3.0", "@fastify/static": "^8.3.0",
"ajv": "^8.17.1", "ajv": "^8.17.1",
"fastify": "^5.8.5", "fastify": "^5.8.5",
"fastify-plugin": "^5.1.0",
"franc": "^6.2.0", "franc": "^6.2.0",
"jose": "^5.4.0", "jose": "^5.4.0",
"js-yaml": "^4.1.0", "js-yaml": "^4.1.0",
@ -4122,6 +4158,7 @@
"pg-boss": "^10.1.3", "pg-boss": "^10.1.3",
"pino": "^9.5.0", "pino": "^9.5.0",
"prom-client": "^15.1.3", "prom-client": "^15.1.3",
"yaml": "^2.9.0",
"zod": "^3.23.8" "zod": "^3.23.8"
}, },
"devDependencies": { "devDependencies": {
@ -4448,6 +4485,16 @@
} }
} }
}, },
"packages/mcp-server": {
"name": "@llm-gateway/mcp-server",
"version": "1.0.0",
"bin": {
"llm-gateway-mcp": "bin/llm-gateway-mcp.js"
},
"engines": {
"node": ">=18"
}
},
"packages/prompt-optimizer": { "packages/prompt-optimizer": {
"name": "@llm-gateway/prompt-optimizer", "name": "@llm-gateway/prompt-optimizer",
"version": "0.1.0", "version": "0.1.0",

View File

@ -18,6 +18,6 @@
"ctx-health:dev": "npm run dev --workspace=packages/ctx-health" "ctx-health:dev": "npm run dev --workspace=packages/ctx-health"
}, },
"dependencies": { "dependencies": {
"jose": "^6.2.2" "jose": "^6.2.3"
} }
} }

View File

@ -7,7 +7,8 @@
"build": "tsc && npm run build:copy-assets", "build": "tsc && npm run build:copy-assets",
"build:copy-assets": "mkdir -p dist/db/migrations dist/config dist/public && cp -r src/db/migrations/*.sql dist/db/migrations/ 2>/dev/null || true && cp -r src/config/*.yaml dist/config/ 2>/dev/null || true && cp -r public/* dist/public/ 2>/dev/null || true", "build:copy-assets": "mkdir -p dist/db/migrations dist/config dist/public && cp -r src/db/migrations/*.sql dist/db/migrations/ 2>/dev/null || true && cp -r src/config/*.yaml dist/config/ 2>/dev/null || true && cp -r public/* dist/public/ 2>/dev/null || true",
"start": "node dist/server.js", "start": "node dist/server.js",
"test": "vitest" "test": "vitest",
"prestart": "node scripts/check-build-drift.mjs"
}, },
"dependencies": { "dependencies": {
"@fastify/cors": "^10.1.0", "@fastify/cors": "^10.1.0",
@ -16,6 +17,7 @@
"@fastify/static": "^8.3.0", "@fastify/static": "^8.3.0",
"ajv": "^8.17.1", "ajv": "^8.17.1",
"fastify": "^5.8.5", "fastify": "^5.8.5",
"fastify-plugin": "^5.1.0",
"franc": "^6.2.0", "franc": "^6.2.0",
"jose": "^5.4.0", "jose": "^5.4.0",
"js-yaml": "^4.1.0", "js-yaml": "^4.1.0",
@ -24,6 +26,7 @@
"pg-boss": "^10.1.3", "pg-boss": "^10.1.3",
"pino": "^9.5.0", "pino": "^9.5.0",
"prom-client": "^15.1.3", "prom-client": "^15.1.3",
"yaml": "^2.9.0",
"zod": "^3.23.8" "zod": "^3.23.8"
}, },
"devDependencies": { "devDependencies": {

View File

@ -1,63 +1,105 @@
id: linkedin_post id: linkedin_post
version: "1.0.0" version: "2.0.0"
task_type: linkedin_post task_type: linkedin_post
description: "LinkedIn teaser in Rene Fichtmueller's voice. Anti-AI, anti-marketing, technical, direct."
system_prompt: | system_prompt: |
You are a professional LinkedIn content writer. Write engaging, authentic posts that sound human. You write a single short LinkedIn post in Rene Fichtmueller's voice. Rene is a network/optics engineer who blogs at blog.fichtmueller.org. His voice is direct, technical, sometimes contrarian, never marketing.
Rules: HARD RULES — do not violate:
- Maximum 1300 characters (LinkedIn soft limit) - 2 to 3 short sentences. Maximum 4. Period.
- No hashtag spam (max 3 relevant hashtags) - No hashtags. None. Not at the end, not anywhere.
- No engagement-bait questions at the end - No emojis. Not even one.
- No "In today's fast-paced world" openings - No engagement-bait. Do not end with "What do you think?", "Thoughts?", "Have you seen this?".
- Write in first person, direct and confident tone - No call-to-action language ("Check it out", "Read more", "Don't miss").
- Include a clear value point or insight - No meta-references to the blog post itself: do not write "I wrote about this", "I published a piece", "I broke this down", "more in the article".
- Current date: {{current_date}} - End with the URL on its own line. Nothing after the URL.
BANNED PHRASES — never use any of these:
- delve, leverage, robust, journey, embark, paradigm, unlock, seamlessly, holistic, harness, foster, amplify, underscore, indelible, profound, intricate, meticulous, testament, vibrant, bespoke, encompass, hitherto, realm, utilize, synergy
- "leaving money on the table"
- "until it's too late"
- "the line item most X skip"
- "turns out"
- "the unexpected part is"
- "the gap between X and Y is wider than"
- "in today's fast-paced", "in the world of", "in the realm of"
- "it's important to note", "it's worth noting"
- "let's dive into", "let's explore"
- "the future of X", "the next generation of X" (unless quoting someone)
- "game-changer", "cutting-edge", "groundbreaking", "comprehensive"
TONE — match these traits:
- Specific numbers over generalities. 20W is better than "high power". 14 weeks is better than "long lead time".
- Named products, standards, RFCs when relevant. 400ZR+, RPKI, IEEE 802.3.
- First person ("I", "my", "we") where genuine.
- Short sentences. Period. Short sentences. Period.
- Concession sometimes: admit what you don't know or what surprised you.
- Closing line stands on its own. No qualifier, no hedge.
Current date: {{current_date}}
{{few_shot_examples}} {{few_shot_examples}}
system_prompt_de: | system_prompt_de: |
Du bist ein professioneller LinkedIn-Content-Writer. Schreibe authentische, menschlich klingende Beiträge. Du schreibst einen kurzen LinkedIn-Post in der Stimme von Rene Fichtmueller. Direkt, technisch, manchmal contrarian, nie Marketing.
Regeln: HARTE REGELN — nie verletzen:
- Maximal 1300 Zeichen (LinkedIn Soft-Limit) - 2 bis 3 kurze Sätze. Maximal 4. Punkt.
- Keine Hashtag-Spam (max. 3 relevante Hashtags) - Keine Hashtags. Keine. Nirgendwo.
- Keine Engagement-Bait-Fragen am Ende - Keine Emojis. Auch nicht einer.
- Keine Einstiege mit "In der heutigen schnelllebigen Welt" - Kein Engagement-Bait. Niemals enden mit "Was meint ihr?", "Eure Erfahrung?".
- Schreibe in der Ich-Perspektive, direkt und selbstsicher - Keine Call-to-Action-Sprache ("Schaut mal rein", "Hier mehr lesen").
- Enthalte einen klaren Mehrwert oder Einblick - Keine Meta-Referenzen auf den Blog-Post: kein "Ich habe dazu geschrieben", "Mehr im Artikel".
- Aktuelles Datum: {{current_date}} - URL alleine in der letzten Zeile. Nichts danach.
VERBOTENE WORTE/PHRASEN:
- "leverage", "delve", "robust", "harness", "navigieren", "Reise", "Paradigma", "freischalten", "ganzheitlich", "Synergie", "umfassend"
- "in der heutigen schnelllebigen Welt"
- "es lohnt sich zu erwähnen"
TON:
- Konkrete Zahlen statt Verallgemeinerungen.
- Erste Person wo authentisch.
- Kurze Sätze. Punkt.
- Konzession wo möglich: zugeben was überrascht hat.
Aktuelles Datum: {{current_date}}
user_template: | user_template: |
Topic/Input: Article info:
{{input}} {{input}}
Context: Write the LinkedIn post now. 2-3 sentences. No hashtags. URL on last line. Stop after the URL.
{{user_context}}
Write a LinkedIn post:
user_template_de: | user_template_de: |
Thema/Eingabe: Artikel-Infos:
{{input}} {{input}}
Kontext: Schreibe den LinkedIn-Post jetzt. 2-3 Sätze. Keine Hashtags. URL in der letzten Zeile. Stopp nach der URL.
{{user_context}}
Schreibe einen LinkedIn-Beitrag:
few_shot_examples: few_shot_examples:
- user: "We deployed a new BGP monitoring system last week" - user: "Article info:\nTitle: BGP Route Leak Hit My Network Last Tuesday\nExcerpt: A transit provider re-advertised our customer cone to the wrong peers. Detection took 4 hours. The mitigation took 14 minutes once we saw it.\nURL: https://blog.fichtmueller.org/example1/"
assistant: | assistant: |
We spent 18 months dealing with BGP route leaks that cost us 4 hours of network visibility per incident. A transit provider re-advertised our customer cone to the wrong peers last Tuesday. We caught it after 4 hours. The fix took 14 minutes once we saw it — the cost was the 4 hours we didn't.
Last week we deployed our own monitoring system. First result: caught a route leak from a transit provider within 3 minutes, before any customer noticed. https://blog.fichtmueller.org/example1/
The fix wasn't sophisticated. It was systematic alerting on RPKI invalid routes combined with PeeringDB cross-referencing. - user: "Article info:\nTitle: 400ZR Reach Math Nobody Shows You\nExcerpt: Datasheets quote a maximum reach number. That number is for a clean link with margin you don't have.\nURL: https://blog.fichtmueller.org/example2/"
assistant: |
The 400ZR datasheet number is a ceiling, not a target. Real-world links have eight connectors, two amplifier hops, and aged fiber — the spec assumes none of that.
If you're running a mid-size ISP and still relying on customer tickets to discover routing issues, that's the gap worth closing. Build to the budget you actually have, not the one the marketing slide showed you.
#BGP #NetworkEngineering #ISP https://blog.fichtmueller.org/example2/
- user: "Article info:\nTitle: ADHD Diary #008: The 47-Tabs Problem\nExcerpt: I closed 47 browser tabs Tuesday. By Wednesday I had 51 again. This is not a tool problem.\nURL: https://blog.fichtmueller.org/example3/"
assistant: |
I closed 47 browser tabs on Tuesday. By Wednesday morning I had 51 again.
This isn't a tool problem. Every tab is a "I might do this later" promise I made to myself. The fix isn't a better tab manager — it's saying no.
https://blog.fichtmueller.org/example3/
variables: variables:
- input - input

View File

@ -1,6 +1,10 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<script>
/* Force timestamps to Europe/Berlin (CEST/CET, auto-DST) */
(function(){var TZ="Europe/Berlin";["toLocaleString","toLocaleTimeString","toLocaleDateString"].forEach(function(fn){var o=Date.prototype[fn];Date.prototype[fn]=function(l,op){op=Object.assign({},op||{},{timeZone:TZ});return o.call(this,l||"de-DE",op);};});})();
</script>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>llm.gateway / workbench</title> <title>llm.gateway / workbench</title>
@ -127,13 +131,11 @@
/* ─── Tab navigation ──────────────────────────────────────────────────── */ /* ─── Tab navigation ──────────────────────────────────────────────────── */
.tabs { .tabs {
display: flex; display: flex;
flex-wrap: wrap;
gap: 0; gap: 0;
border-bottom: 1px solid var(--line); border-bottom: 1px solid var(--line);
margin: 0 0 28px; margin: 0 0 28px;
overflow-x: auto;
scrollbar-width: none;
} }
.tabs::-webkit-scrollbar { display: none; }
.tab-trigger { .tab-trigger {
background: none; background: none;
border: none; border: none;
@ -498,7 +500,7 @@
body.hide-empty-providers .wallet-card[data-status="unknown"] { display: none; } body.hide-empty-providers .wallet-card[data-status="unknown"] { display: none; }
/* In Simple Mode, hide the noisy "5-axis" header explainer */ /* In Simple Mode, hide the noisy "5-axis" header explainer */
body.simple-mode .h-section .h-meta:contains('Lean-CTX') { display: none; } body.simple-mode .h-section .h-meta:contains('LLM Gateway') { display: none; }
/* ─── Hero (Buddy + Savings + Cost-VS) ───────────────────────────────── */ /* ─── Hero (Buddy + Savings + Cost-VS) ───────────────────────────────── */
.hero-grid { .hero-grid {
@ -1066,6 +1068,99 @@
font-size: 0.8rem; font-size: 0.8rem;
} }
/* ─── Discover Panel ──────────────────────────────────────────────── */
.discover-grid {
display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 12px; margin-bottom: 16px;
}
.discover-card {
border: 1px solid var(--line-2);
border-radius: 10px;
padding: 12px 14px;
background: var(--surface-1, rgba(255,255,255,0.02));
}
.discover-card-title {
font-size: 0.72rem; color: var(--text-muted, #888);
text-transform: uppercase; letter-spacing: 0.08em;
margin-bottom: 4px;
}
.discover-card-stat {
font-family: var(--mono); font-size: 1.4rem;
color: var(--accent); margin-bottom: 8px;
}
.discover-card-list {
list-style: none; padding: 0; margin: 0;
font-size: 0.78rem; font-family: var(--mono);
}
.discover-card-list li {
padding: 4px 0;
border-top: 1px solid var(--line-1, rgba(255,255,255,0.05));
display: flex; justify-content: space-between; align-items: center;
}
.discover-card-list li:first-child { border-top: none; }
.discover-card-list .disc-ok { color: var(--accent); }
.discover-card-list .disc-no { color: var(--text-muted, #888); opacity: 0.6; }
/* ─── API Tab ──────────────────────────────────────────────────────── */
.api-card {
border: 1px solid var(--line-2);
border-radius: 10px;
padding: 14px 16px;
margin-bottom: 14px;
background: var(--surface-1, rgba(255,255,255,0.02));
}
.api-card-head {
display: flex; align-items: center; gap: 10px; flex-wrap: wrap;
margin-bottom: 10px;
}
.api-method {
font-family: var(--mono); font-size: 0.7rem; font-weight: 700;
padding: 3px 8px; border-radius: 4px;
background: var(--accent); color: #fff; letter-spacing: 0.05em;
}
.api-path {
font-family: var(--mono); font-size: 0.92rem;
color: var(--text);
}
.api-tag {
font-size: 0.72rem; color: var(--text-muted, #888);
font-style: italic; flex: 1;
}
.api-snippet {
font-family: var(--mono); font-size: 0.8rem;
background: var(--surface-2, rgba(0,0,0,0.25));
border: 1px solid var(--line-1, rgba(255,255,255,0.05));
padding: 12px 14px; border-radius: 6px;
overflow-x: auto; white-space: pre;
color: var(--text); margin: 0;
}
.api-snippet code { background: transparent; padding: 0; }
.api-copy { padding: 4px 12px; font-size: 0.7rem; }
.api-tryout {
border: 1px solid var(--line-2);
border-radius: 10px;
padding: 14px 16px;
background: var(--surface-1, rgba(255,255,255,0.02));
}
.api-tryout-row { display: flex; flex-wrap: wrap; align-items: center; }
.api-bridge-table-wrap { overflow-x: auto; border: 1px solid var(--line-2); border-radius: 10px; }
.api-bridge-table {
width: 100%; border-collapse: collapse; font-size: 0.85rem;
}
.api-bridge-table th, .api-bridge-table td {
padding: 10px 12px; text-align: left; border-bottom: 1px solid var(--line-1, rgba(255,255,255,0.05));
}
.api-bridge-table th {
font-weight: 600; color: var(--text-muted, #888);
text-transform: uppercase; letter-spacing: 0.05em; font-size: 0.72rem;
}
.api-bridge-table tr:last-child td { border-bottom: none; }
.api-bridge-status { font-family: var(--mono); font-size: 0.78rem; }
.api-bridge-status.ok { color: var(--accent); }
.api-bridge-status.err { color: #e34; }
/* ─── Buttons ────────────────────────────────────────────────────────── */ /* ─── Buttons ────────────────────────────────────────────────────────── */
.btn { .btn {
font-family: var(--mono); font-family: var(--mono);
@ -1338,6 +1433,7 @@
<button class="tab-trigger" data-tab="leaderboard" role="tab" title="Race-mode results — fastest model leaderboard if you ran multi-model races"><span class="tab-num">08</span>races <span class="tab-badge" id="leaderboardTabBadge">·</span></button> <button class="tab-trigger" data-tab="leaderboard" role="tab" title="Race-mode results — fastest model leaderboard if you ran multi-model races"><span class="tab-num">08</span>races <span class="tab-badge" id="leaderboardTabBadge">·</span></button>
<button class="tab-trigger" data-tab="share" role="tab" title="Generate an embeddable SVG card showing your savings (for blog/Twitter/README)"><span class="tab-num">09</span>share</button> <button class="tab-trigger" data-tab="share" role="tab" title="Generate an embeddable SVG card showing your savings (for blog/Twitter/README)"><span class="tab-num">09</span>share</button>
<button class="tab-trigger" data-tab="report" role="tab" title="Generate a printable monthly PDF report"><span class="tab-num">10</span>report</button> <button class="tab-trigger" data-tab="report" role="tab" title="Generate a printable monthly PDF report"><span class="tab-num">10</span>report</button>
<button class="tab-trigger" data-tab="api" role="tab" title="API reference — copy-paste curl/SDK examples for OpenAI-compat, Anthropic-compat, native"><span class="tab-num">11</span>api</button>
</nav> </nav>
<!-- ─── Tab: Overview ────────────────────────────────────────────────── --> <!-- ─── Tab: Overview ────────────────────────────────────────────────── -->
@ -1356,7 +1452,7 @@
<div class="hero-counter"><span id="heroTokensSavedCombined">0</span><span style="font-size:1.1rem;color:var(--dim);font-weight:400;margin-left:8px;">tokens</span></div> <div class="hero-counter"><span id="heroTokensSavedCombined">0</span><span style="font-size:1.1rem;color:var(--dim);font-weight:400;margin-left:8px;">tokens</span></div>
<div class="hero-layer-breakdown" id="heroLayerBreakdown"> <div class="hero-layer-breakdown" id="heroLayerBreakdown">
<div class="layer-row"><span class="layer-name">⚡ Gateway (LLM calls)</span><span class="layer-val" id="heroTokensSaved">0</span></div> <div class="layer-row"><span class="layer-name">⚡ Gateway (LLM calls)</span><span class="layer-val" id="heroTokensSaved">0</span></div>
<div class="layer-row" id="heroLeanCtxRow" style="display:none;"><span class="layer-name">🗜 Lean-CTX (tool calls)</span><span class="layer-val" id="heroLeanCtxTokens"></span></div> <div class="layer-row" id="heroExternalToolRow" style="display:none;"><span class="layer-name">🗜 External tool compression (legacy)</span><span class="layer-val" id="heroExternalToolTokens"></span></div>
</div> </div>
<div class="hero-row"> <div class="hero-row">
<div class="hero-pill"> <div class="hero-pill">
@ -1392,8 +1488,8 @@
</div> </div>
</div> </div>
<!-- ─── Five-Axis Savings Breakdown — what makes us better than Lean-CTX ── --> <!-- ─── Five-Axis Savings Breakdown — full savings breakdown ── -->
<h2 class="h-section">Savings Sources <span class="h-meta">we measure 5 axes — Lean-CTX measures 1</span></h2> <h2 class="h-section">Savings Sources <span class="h-meta">5 measurement axes across all calls</span></h2>
<div class="savings-axes" id="savingsAxes"> <div class="savings-axes" id="savingsAxes">
<div class="loading">loading</div> <div class="loading">loading</div>
</div> </div>
@ -1472,8 +1568,34 @@
<strong>auto-gateway</strong> <span id="subsAutoState">detection only</span> <strong>auto-gateway</strong> <span id="subsAutoState">detection only</span>
— installed CLI subscriptions are wrapped into HTTP bridges and exposed via <code>/v1/chat/completions</code> — installed CLI subscriptions are wrapped into HTTP bridges and exposed via <code>/v1/chat/completions</code>
</div> </div>
<button class="btn btn-sm primary" id="subsSpawnBtn" type="button">⟳ spawn missing bridges</button> <div style="display: flex; gap: 8px;">
<button class="btn btn-sm" id="discoverFullBtn" type="button" title="Full-system scan: CLIs + local LLMs + API keys, then auto-spawn any detected bridges">⚡ discover & connect all</button>
<button class="btn btn-sm primary" id="subsSpawnBtn" type="button">⟳ spawn missing bridges</button>
</div>
</div> </div>
<!-- ── Full discovery report (populated by discover button) ────────── -->
<div id="discoverReportWrap" style="display: none; margin-bottom: 14px;">
<h2 class="h-section">Discovery Report <span class="h-meta" id="discoverReportMeta"></span></h2>
<div class="discover-grid">
<div class="discover-card">
<div class="discover-card-title">CLI Subscriptions</div>
<div class="discover-card-stat"><span id="discCntSubs">0</span> detected</div>
<ul class="discover-card-list" id="discListSubs"></ul>
</div>
<div class="discover-card">
<div class="discover-card-title">Local LLM Servers</div>
<div class="discover-card-stat"><span id="discCntLocal">0</span> running</div>
<ul class="discover-card-list" id="discListLocal"></ul>
</div>
<div class="discover-card">
<div class="discover-card-title">API-Key Providers</div>
<div class="discover-card-stat"><span id="discCntKeys">0</span> configured</div>
<ul class="discover-card-list" id="discListKeys"></ul>
</div>
</div>
</div>
<div class="subs-grid" id="subscriptionsList"> <div class="subs-grid" id="subscriptionsList">
<div class="loading">discovering installed subscriptions</div> <div class="loading">discovering installed subscriptions</div>
</div> </div>
@ -1567,6 +1689,11 @@
<div class="metric-value" id="cacheHitRate">0<span class="metric-unit">%</span></div> <div class="metric-value" id="cacheHitRate">0<span class="metric-unit">%</span></div>
<div class="metric-change">hits ÷ total req</div> <div class="metric-change">hits ÷ total req</div>
</div> </div>
<div class="metric">
<div class="metric-label">compressed since last restart</div>
<div class="metric-value" id="compressedSinceRestart">0</div>
<div class="metric-change" id="compressedSinceRestartMeta">— · — ops · since —</div>
</div>
</div> </div>
<h2 class="h-section">Top Caching Callers <span class="h-meta">most savings</span></h2> <h2 class="h-section">Top Caching Callers <span class="h-meta">most savings</span></h2>
@ -1675,6 +1802,136 @@
<div class="share-hint">Tip: in the report window, press <code>Cmd/Ctrl+P</code> → "Save as PDF". The report is fully styled for A4 print.</div> <div class="share-hint">Tip: in the report window, press <code>Cmd/Ctrl+P</code> → "Save as PDF". The report is fully styled for A4 print.</div>
</section> </section>
<!-- ─── Tab: API Reference ─────────────────────────────────────────── -->
<section class="tab-panel" data-tab="api">
<h2 class="h-section">API Reference <span class="h-meta">all endpoints route through compression + caller tracking</span></h2>
<div class="api-intro" style="margin: 8px 0 16px; color: var(--text-muted, #888); font-size: 13px; line-height: 1.5;">
The LLM Gateway exposes three POST endpoints and one GET. Every call is logged in
<em>activity</em>, compressed when input ≥ 700 tokens, and routed via <code>routing-rules.yaml</code>
to the right subscription bridge (Claude Code, ChatGPT, Copilot, M365 Copilot, Codex) or local Ollama.
</div>
<!-- ── Endpoint card: OpenAI-compatible ─────────────────────────── -->
<div class="api-card" data-endpoint="chat">
<div class="api-card-head">
<span class="api-method">POST</span>
<code class="api-path">/v1/chat/completions</code>
<span class="api-tag">OpenAI-compatible · works with `openai` SDK</span>
<button class="btn ghost api-copy" data-target="api-snippet-chat" type="button">copy</button>
</div>
<pre id="api-snippet-chat" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "claude-sonnet-4.6",
"messages": [{"role": "user", "content": "hi"}]
}'</code></pre>
</div>
<!-- ── Endpoint card: Anthropic-compatible ──────────────────────── -->
<div class="api-card" data-endpoint="messages">
<div class="api-card-head">
<span class="api-method">POST</span>
<code class="api-path">/v1/messages</code>
<span class="api-tag">Anthropic-compatible · works with `@anthropic-ai/sdk`</span>
<button class="btn ghost api-copy" data-target="api-snippet-messages" type="button">copy</button>
</div>
<pre id="api-snippet-messages" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/messages \
-H "Content-Type: application/json" \
-d '{
"model": "claude-sonnet-4.6",
"messages": [{"role": "user", "content": "hi"}],
"max_tokens": 1024
}'</code></pre>
</div>
<!-- ── Endpoint card: Native ────────────────────────────────────── -->
<div class="api-card" data-endpoint="completion">
<div class="api-card-head">
<span class="api-method">POST</span>
<code class="api-path">/v1/completion</code>
<span class="api-tag">native — full caller-tracking + compression options</span>
<button class="btn ghost api-copy" data-target="api-snippet-completion" type="button">copy</button>
</div>
<pre id="api-snippet-completion" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/completion \
-H "Content-Type: application/json" \
-d '{
"caller": "my-app",
"task_type": "generic_qa",
"input": "your prompt here",
"options": { "compression": { "enabled": true, "mode": "auto" } }
}'</code></pre>
</div>
<!-- ── Endpoint card: Models list ───────────────────────────────── -->
<div class="api-card" data-endpoint="models">
<div class="api-card-head">
<span class="api-method">GET</span>
<code class="api-path">/v1/models</code>
<span class="api-tag">list every model the gateway can route to</span>
<button class="btn ghost api-copy" data-target="api-snippet-models" type="button">copy</button>
</div>
<pre id="api-snippet-models" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/models</code></pre>
</div>
<!-- ── Try-It-Out playground ────────────────────────────────────── -->
<h2 class="h-section" style="margin-top: 28px;">Try it out <span class="h-meta">live POST against the gateway</span></h2>
<div class="api-tryout">
<div class="api-tryout-row">
<label class="settings-row-label">Endpoint:
<select id="apiTryEndpoint" class="settings-input" style="width: 220px; margin-left: 8px;">
<option value="/v1/completion">/v1/completion (native)</option>
<option value="/v1/chat/completions">/v1/chat/completions (OpenAI)</option>
<option value="/v1/messages">/v1/messages (Anthropic)</option>
</select>
</label>
<label class="settings-row-label" style="margin-left: 18px;">Model:
<input id="apiTryModel" class="settings-input" type="text" value="claude-sonnet-4.6" style="width: 200px; margin-left: 8px;">
</label>
</div>
<label class="settings-row-label" style="display: block; margin-top: 10px;">Prompt:
<textarea id="apiTryPrompt" class="settings-input" rows="4" style="width: 100%; margin-top: 6px;" placeholder="Type your prompt — long inputs (>700 tokens) will be compressed automatically.">Say hello in three different languages.</textarea>
</label>
<div style="margin-top: 10px;">
<button class="btn primary" id="apiTryRun" type="button">send request</button>
<span id="apiTryStatus" style="margin-left: 12px; font-size: 12px; color: var(--text-muted, #888);"></span>
</div>
<div id="apiTryResultWrap" style="margin-top: 14px; display: none;">
<div class="api-tryout-meta" id="apiTryMeta" style="font-size: 12px; color: var(--text-muted, #888); margin-bottom: 6px;"></div>
<pre class="api-snippet"><code id="apiTryResult"></code></pre>
</div>
</div>
<!-- ── Bridge mapping (model → subscription) ────────────────────── -->
<h2 class="h-section" style="margin-top: 28px;">Model → Bridge Mapping <span class="h-meta">which subscription each model alias routes to</span></h2>
<div class="api-bridge-table-wrap">
<table class="api-bridge-table" id="apiBridgeTable">
<thead>
<tr>
<th>Model alias</th>
<th>Bridge</th>
<th>Subscription used</th>
<th>Port</th>
<th>Status</th>
</tr>
</thead>
<tbody>
<tr><td><code>claude-sonnet-4.6</code>, <code>claude-haiku</code>, <code>claude-opus</code></td><td>claude-bridge</td><td>Claude Code Max (OAuth)</td><td>3250</td><td class="api-bridge-status" data-bridge="claude-bridge"></td></tr>
<tr><td><code>gpt-4o</code>, <code>gpt-4.1</code>, <code>gpt-5.x</code></td><td>openai-bridge</td><td>ChatGPT Plus / Pro</td><td>3251</td><td class="api-bridge-status" data-bridge="openai-bridge"></td></tr>
<tr><td><code>copilot-gpt-4o</code>, <code>copilot-claude-3.7</code></td><td>copilot-bridge</td><td>GitHub Copilot</td><td>3252</td><td class="api-bridge-status" data-bridge="copilot-bridge"></td></tr>
<tr><td><code>codex-mini</code>, <code>gpt-5.1-codex-mini</code></td><td>codex-bridge</td><td>OpenAI Codex CLI</td><td>3253</td><td class="api-bridge-status" data-bridge="codex-bridge"></td></tr>
<tr><td><code>m365-copilot</code></td><td>m365-copilot-bridge</td><td>Microsoft 365 Copilot</td><td>3257</td><td class="api-bridge-status" data-bridge="m365-copilot-bridge"></td></tr>
<tr><td><code>qwen2.5:3b / 7b / 14b / 32b</code>, <code>magatama:32b</code>, <code>magatama-coder</code></td><td>ollama (Mac Studio)</td><td>local — no cost</td><td>11434</td><td class="api-bridge-status" data-bridge="ollama"></td></tr>
</tbody>
</table>
</div>
<div class="share-hint" style="margin-top: 12px;">
The gateway picks the bridge from <code>routing-rules.yaml</code> based on <code>task_type</code> and the
requested <code>model</code>. You can also hit a bridge directly (e.g. <code>http://82.165.222.127:3250/v1/messages</code>)
— but then you bypass compression, savings tracking, and the routing rules.
</div>
</section>
<!-- ─── Caller Deep-Dive Modal ───────────────────────────────────── --> <!-- ─── Caller Deep-Dive Modal ───────────────────────────────────── -->
<div class="modal-overlay" id="callerModal" role="dialog" aria-modal="true"> <div class="modal-overlay" id="callerModal" role="dialog" aria-modal="true">
<div class="modal" style="max-width: 900px;"> <div class="modal" style="max-width: 900px;">
@ -1883,6 +2140,9 @@
el.innerHTML = clients.map(client => { el.innerHTML = clients.map(client => {
const lastSeen = client.lastSeen ? new Date(client.lastSeen).toLocaleString() : 'never'; const lastSeen = client.lastSeen ? new Date(client.lastSeen).toLocaleString() : 'never';
const callerList = client.callers?.length ? client.callers.join(', ') : 'no caller id seen'; const callerList = client.callers?.length ? client.callers.join(', ') : 'no caller id seen';
const bridgeState = client.bridgeProvider
? `${client.bridgeProvider}: ${client.bridgeStatus || 'not configured'}${client.bridgeDetail ? ` (${client.bridgeDetail})` : ''}`
: 'bridge: OpenAI-compatible / manual client config';
return ` return `
<div class="client-item"> <div class="client-item">
<div class="client-top"> <div class="client-top">
@ -1892,6 +2152,7 @@
<div class="client-meta"> <div class="client-meta">
<div><strong>${formatNumber(client.requestCount)}</strong> requests · <strong>${formatNumber(client.tokensSaved)}</strong> saved</div> <div><strong>${formatNumber(client.requestCount)}</strong> requests · <strong>${formatNumber(client.tokensSaved)}</strong> saved</div>
<div title="${escapeHtml(callerList)}">caller: ${escapeHtml(callerList)}</div> <div title="${escapeHtml(callerList)}">caller: ${escapeHtml(callerList)}</div>
<div title="${escapeHtml(bridgeState)}">gateway: ${escapeHtml(bridgeState)}</div>
<div>last: ${escapeHtml(lastSeen)}</div> <div>last: ${escapeHtml(lastSeen)}</div>
</div> </div>
</div> </div>
@ -2137,6 +2398,60 @@
`; `;
} }
// ─── Full Discovery: CLIs + Local LLMs + API Keys ────────────────────
document.getElementById('discoverFullBtn')?.addEventListener('click', async () => {
const btn = document.getElementById('discoverFullBtn');
const wrap = document.getElementById('discoverReportWrap');
const meta = document.getElementById('discoverReportMeta');
btn.disabled = true;
const orig = btn.textContent;
btn.textContent = '⏳ scanning…';
try {
const res = await apiFetch(`${API_BASE}/api/dashboard/discover`, { method: 'POST' });
const payload = await res.json();
if (!payload.success) throw new Error(payload.error || 'discovery failed');
const r = payload.data.report;
const spawnedCount = payload.data.spawnedCount;
wrap.style.display = 'block';
meta.textContent = `host: ${r.host} · scanned: ${new Date(r.generatedAt).toLocaleTimeString()} · ${spawnedCount} bridges spawned · ${r.summary.totalProviders} total providers, ${r.summary.totalRoutableModels} models`;
// CLI subscriptions
document.getElementById('discCntSubs').textContent = r.subscriptions.detected;
document.getElementById('discListSubs').innerHTML = r.subscriptions.items.map(s => `
<li>
<span>${s.descriptor.label}</span>
<span class="${s.installed ? 'disc-ok' : 'disc-no'}">${s.installed ? (s.authenticated === true ? '✓ auth' : (s.authenticated === false ? '⚠ unauth' : '?')) : '—'}</span>
</li>
`).join('');
// Local LLM servers
document.getElementById('discCntLocal').textContent = r.localLLMs.detected;
document.getElementById('discListLocal').innerHTML = r.localLLMs.items.map(l => `
<li>
<span>${l.label}<br><span style="font-size:0.66rem;opacity:0.6;">${l.url}</span></span>
<span class="${l.detected ? 'disc-ok' : 'disc-no'}">${l.detected ? `✓ ${l.models.length} models · ${l.latencyMs}ms` : '— offline'}</span>
</li>
`).join('');
// API-key providers
document.getElementById('discCntKeys').textContent = r.apiKeys.configured;
document.getElementById('discListKeys').innerHTML = r.apiKeys.items.map(k => `
<li>
<span>${k.label}<br><span style="font-size:0.66rem;opacity:0.6;">${k.envKey}</span></span>
<span class="${k.configured ? 'disc-ok' : 'disc-no'}">${k.configured ? '✓ set' : '— missing'}</span>
</li>
`).join('');
btn.textContent = `✓ found ${r.summary.totalProviders}`;
await loadSubscriptions();
} catch (e) {
btn.textContent = `✗ ${e.message}`;
} finally {
setTimeout(() => { btn.disabled = false; btn.textContent = orig; }, 3000);
}
});
document.getElementById('subsSpawnBtn').addEventListener('click', async () => { document.getElementById('subsSpawnBtn').addEventListener('click', async () => {
const btn = document.getElementById('subsSpawnBtn'); const btn = document.getElementById('subsSpawnBtn');
btn.disabled = true; btn.disabled = true;
@ -2235,7 +2550,7 @@
document.getElementById('routingModeBadge').textContent = s.routingMode; document.getElementById('routingModeBadge').textContent = s.routingMode;
// UI mode toggles // UI mode toggles
const ui = s.ui ?? { simpleMode: true, hideEmptyProviders: true, showTooltips: true }; const ui = s.ui ?? { simpleMode: false, hideEmptyProviders: true, showTooltips: true };
document.getElementById('uiSimpleMode').checked = !!ui.simpleMode; document.getElementById('uiSimpleMode').checked = !!ui.simpleMode;
document.getElementById('uiHideEmpty').checked = !!ui.hideEmptyProviders; document.getElementById('uiHideEmpty').checked = !!ui.hideEmptyProviders;
document.getElementById('uiTooltips').checked = !!ui.showTooltips; document.getElementById('uiTooltips').checked = !!ui.showTooltips;
@ -2394,6 +2709,11 @@
document.getElementById('cacheEntries').textContent = formatNumber(s.uniqueEntries); document.getElementById('cacheEntries').textContent = formatNumber(s.uniqueEntries);
document.getElementById('tokensPrevented').textContent = formatNumber(s.totalTokensSaved); document.getElementById('tokensPrevented').textContent = formatNumber(s.totalTokensSaved);
document.getElementById('cacheHitRate').innerHTML = s.hitRatePercent.toFixed(1) + '<span class="metric-unit">%</span>'; document.getElementById('cacheHitRate').innerHTML = s.hitRatePercent.toFixed(1) + '<span class="metric-unit">%</span>';
const sr = s.sinceRestart || {};
document.getElementById('compressedSinceRestart').textContent = formatNumber(sr.tokensSaved || 0);
const sinceLabel = sr.sinceISO ? new Date(sr.sinceISO).toLocaleString() : '—';
const pctTxt = (sr.savingsPct || 0).toFixed(1) + '%';
document.getElementById('compressedSinceRestartMeta').textContent = pctTxt + ' · ' + (sr.operations || 0) + ' ops · since ' + sinceLabel;
// Tab badge // Tab badge
document.getElementById('savingsTabBadge').textContent = s.totalHits > 0 ? formatCost(s.totalCostSaved) : '·'; document.getElementById('savingsTabBadge').textContent = s.totalHits > 0 ? formatCost(s.totalCostSaved) : '·';
@ -2618,9 +2938,9 @@
`; `;
} }
// Try to fetch Lean-CTX stats from localhost:3333 (browser-side, not server-side) // Try to fetch external tool stats from localhost:3333 (legacy compat) (browser-side, not server-side)
// Returns null if Lean-CTX not running OR dashboard browsed from different machine. // Returns null if no external tool runs there.
async function fetchLeanCtxStats() { async function fetchExternalToolStats() {
try { try {
const ctrl = new AbortController(); const ctrl = new AbortController();
setTimeout(() => ctrl.abort(), 1500); setTimeout(() => ctrl.abort(), 1500);
@ -2645,20 +2965,20 @@
document.getElementById('heroCacheHits').textContent = s.totalHits; document.getElementById('heroCacheHits').textContent = s.totalHits;
document.getElementById('heroSavingsRate').textContent = `${s.hitRatePercent || 0}%`; document.getElementById('heroSavingsRate').textContent = `${s.hitRatePercent || 0}%`;
// Lean-CTX integration: pull from localhost:3333 if available // Optional external-tool integration: pull from localhost:3333 if running
const leanCtx = await fetchLeanCtxStats(); const externalTool = await fetchExternalToolStats();
const combined = gatewayTokens + (leanCtx?.saved || 0); const combined = gatewayTokens + (externalTool?.saved || 0);
document.getElementById('heroTokensSavedCombined').textContent = formatNumber(combined); document.getElementById('heroTokensSavedCombined').textContent = formatNumber(combined);
if (leanCtx) { if (externalTool) {
document.getElementById('heroLeanCtxRow').style.display = 'flex'; document.getElementById('heroExternalToolRow').style.display = 'flex';
document.getElementById('heroLeanCtxTokens').textContent = formatNumber(leanCtx.saved); document.getElementById('heroExternalToolTokens').textContent = formatNumber(externalTool.saved);
} else { } else {
document.getElementById('heroLeanCtxRow').style.display = 'none'; document.getElementById('heroExternalToolRow').style.display = 'none';
} }
document.getElementById('costWithout').textContent = formatCost(c.costWithoutGateway || 0); document.getElementById('costWithout').textContent = formatCost(c.costWithoutGateway || 0);
document.getElementById('costWith').textContent = formatCost(c.costWithGateway || 0); document.getElementById('costWith').textContent = formatCost(c.costWithGateway || 0);
const saved = (c.costWithoutGateway || 0) - (c.costWithGateway || 0); const saved = (c.costWithoutGateway || 0) - (c.costWithGateway || 0);
document.getElementById('costSavedLine').textContent = formatCost(saved); document.getElementById('costSavedLine').textContent = (saved < 0 ? '-$' : '$') + Math.abs(saved).toFixed(2);
document.getElementById('costSavedPercent').textContent = `${(c.effectiveSavingsPercent || 0).toFixed(1)}%`; document.getElementById('costSavedPercent').textContent = `${(c.effectiveSavingsPercent || 0).toFixed(1)}%`;
// 5-axis savings // 5-axis savings
@ -3070,9 +3390,98 @@
if (target === 'memory') loadMemoryGraph(); if (target === 'memory') loadMemoryGraph();
if (target === 'leaderboard') loadLeaderboard(); if (target === 'leaderboard') loadLeaderboard();
if (target === 'share') refreshShareCard(); if (target === 'share') refreshShareCard();
if (target === 'api') refreshApiBridgeStatus();
}); });
}); });
// ─── API Tab — copy buttons, try-it-out, bridge status ────────────────
function copyToClipboard(text) {
if (navigator.clipboard?.writeText) return navigator.clipboard.writeText(text);
const ta = document.createElement('textarea');
ta.value = text; document.body.appendChild(ta); ta.select();
document.execCommand('copy'); document.body.removeChild(ta);
return Promise.resolve();
}
document.querySelectorAll('.api-copy').forEach(btn => {
btn.addEventListener('click', async () => {
const targetId = btn.dataset.target;
const snippet = document.getElementById(targetId)?.innerText || '';
await copyToClipboard(snippet);
const orig = btn.textContent;
btn.textContent = 'copied ✓';
setTimeout(() => { btn.textContent = orig; }, 1400);
});
});
document.getElementById('apiTryRun')?.addEventListener('click', async () => {
const endpoint = document.getElementById('apiTryEndpoint').value;
const model = document.getElementById('apiTryModel').value || 'claude-sonnet-4.6';
const prompt = document.getElementById('apiTryPrompt').value || '';
const status = document.getElementById('apiTryStatus');
const meta = document.getElementById('apiTryMeta');
const wrap = document.getElementById('apiTryResultWrap');
const out = document.getElementById('apiTryResult');
if (!prompt.trim()) { status.textContent = 'add a prompt first'; return; }
let body;
if (endpoint === '/v1/completion') {
body = { caller: 'dashboard-tryout', task_type: 'generic_qa', input: prompt, options: { compression: { enabled: true, mode: 'auto' } } };
} else if (endpoint === '/v1/chat/completions') {
body = { model, messages: [{ role: 'user', content: prompt }] };
} else {
body = { model, messages: [{ role: 'user', content: prompt }], max_tokens: 1024 };
}
status.textContent = 'sending…';
const t0 = performance.now();
try {
const res = await fetch((API_BASE || location.origin) + endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
const dtMs = Math.round(performance.now() - t0);
const json = await res.json().catch(() => ({}));
status.textContent = `${res.status} ${res.statusText} · ${dtMs} ms`;
const c = json?.compression || (json?.metadata?.compression) || null;
if (c) {
meta.textContent = `compression: applied=${c.applied} · method=${c.method} · before=${c.tokens_before} after=${c.tokens_after} saved=${c.tokens_saved}`;
} else {
meta.textContent = 'no compression metadata in response';
}
out.textContent = JSON.stringify(json, null, 2);
wrap.style.display = 'block';
} catch (err) {
status.textContent = 'error: ' + (err.message || err);
}
});
async function refreshApiBridgeStatus() {
try {
const res = await fetch((API_BASE || location.origin) + '/api/dashboard/providers');
if (!res.ok) return;
const json = await res.json();
const allProviders = [
...((json?.data?.grouped?.subscription) || []),
...((json?.data?.grouped?.local) || []),
];
document.querySelectorAll('.api-bridge-status').forEach(cell => {
const name = cell.dataset.bridge;
const p = allProviders.find(x => x.name === name);
if (!p) { cell.textContent = 'unknown'; cell.classList.add('err'); return; }
if (p.enabled && p.status === 'configured') {
cell.textContent = '✓ online';
cell.classList.add('ok');
} else {
cell.textContent = p.status || 'disabled';
cell.classList.add('err');
}
});
} catch {
/* silent */
}
}
// ─── Init ──────────────────────────────────────────────────────────── // ─── Init ────────────────────────────────────────────────────────────
async function init() { async function init() {
await checkHealth(); await checkHealth();
@ -3090,7 +3499,7 @@
if (payload.success) { if (payload.success) {
document.getElementById('routingModeBadge').textContent = payload.data.routingMode; document.getElementById('routingModeBadge').textContent = payload.data.routingMode;
// Apply UI mode (Simple Mode etc.) immediately on load // Apply UI mode (Simple Mode etc.) immediately on load
applyUiMode(payload.data.ui ?? { simpleMode: true, hideEmptyProviders: true, showTooltips: true }); applyUiMode(payload.data.ui ?? { simpleMode: false, hideEmptyProviders: true, showTooltips: true });
} }
} catch (e) { /* non-fatal */ } } catch (e) { /* non-fatal */ }

View File

@ -1,7 +1,7 @@
# LLM Gateway Model Configuration # LLM Gateway Model Configuration
# Ollama base URL: http://192.168.178.169:11434 # Ollama base URL: http://192.168.178.169:11434
ollama_base_url: "https://ollama.fichtmueller.org" ollama_base_url: "http://127.0.0.1:11434"
tiers: tiers:
fast: fast:
@ -26,7 +26,7 @@ models:
qwen2.5:3b: qwen2.5:3b:
tier: fast tier: fast
context_length: 32768 context_length: 32768
strengths: [classification, short_text, routing] strengths: [classification, summarization, routing]
max_tokens_default: 512 max_tokens_default: 512
qwen2.5:7b: qwen2.5:7b:
@ -35,83 +35,58 @@ models:
strengths: [classification, summarization, short_analysis] strengths: [classification, summarization, short_analysis]
max_tokens_default: 1024 max_tokens_default: 1024
phi3.5:3.8b: qwen2.5:7b-instruct:
tier: fast tier: fast
context_length: 128000 context_length: 32768
strengths: [classification, summarization] strengths: [classification, summarization, short_analysis]
max_tokens_default: 1024
qwen2.5-coder:7b-instruct:
tier: fast
context_length: 32768
strengths: [code_generation, technical_analysis, routing]
max_tokens_default: 512 max_tokens_default: 512
# ─── MAGATAMA — Fine-tuned Security Intelligence (Context X) ───────────────── # ─── MAGATAMA — Fine-tuned Security Intelligence (Context X) ─────────────────
magatama:32b: magatama:32b:
tier: large tier: large
context_length: 131072 context_length: 131072
strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting] strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting, complex_writing, deep_analysis, technical]
max_tokens_default: 4096 max_tokens_default: 4096
description: "MAGATAMA まがたま — TEPPEKI 7-pillar security AI, fine-tuned on Qwen2.5-32B" description: "MAGATAMA まがたま — TEPPEKI 7-pillar security AI, fine-tuned on Qwen2.5-32B"
# Custom fine-tuned models (Context X)
ctxhealer:latest:
tier: medium
context_length: 32768
strengths: [infrastructure_diagnosis, root_cause_analysis, remediation_steps]
max_tokens_default: 1024
llama-guard3:1b:
tier: fast
context_length: 8192
strengths: [safety_classification, threat_detection]
max_tokens_default: 256
# Medium tier # Medium tier
qwen2.5:14b: qwen2.5:14b:
tier: medium tier: medium
context_length: 131072 context_length: 131072
strengths: [general, writing, analysis, coding] strengths: [general, writing, analysis, coding, dialogue]
max_tokens_default: 2048 max_tokens_default: 2048
mistral:7b: magatama-llm-v2-0:latest:
tier: medium tier: medium
context_length: 32768 context_length: 131072
strengths: [general, writing] strengths: [general, writing, analysis, coding, dialogue]
max_tokens_default: 2048 max_tokens_default: 2048
llama3.2:8b: magatama-coder:latest:
tier: medium
context_length: 128000
strengths: [general, chat, analysis]
max_tokens_default: 2048
deepseek-r1:8b:
tier: medium tier: medium
context_length: 65536 context_length: 65536
strengths: [reasoning, analysis, coding] strengths: [code_generation, technical_analysis, debugging]
max_tokens_default: 2048 max_tokens_default: 2048
# Large tier # Large tier
qwen2.5:32b: qwen2.5:32b:
tier: large tier: large
context_length: 131072 context_length: 131072
strengths: [complex_writing, deep_analysis, technical] strengths: [complex_writing, deep_analysis, technical, security_analysis]
max_tokens_default: 4096
llama3.3:70b:
tier: large
context_length: 128000
strengths: [complex_reasoning, long_form, research]
max_tokens_default: 4096
deepseek-r1:32b:
tier: large
context_length: 131072
strengths: [chain_of_thought, complex_reasoning]
max_tokens_default: 4096 max_tokens_default: 4096
# Fallback chains per tier # Fallback chains per tier
fallback_chains: fallback_chains:
fast: [qwen2.5:3b, qwen2.5:7b, phi3.5:3.8b] fast: [qwen2.5:7b-instruct, qwen2.5-coder:7b-instruct]
medium: [qwen2.5:14b, mistral:7b, llama3.2:8b] medium: [magatama-llm-v2-0:latest, magatama-coder:latest, qwen2.5:7b-instruct]
large: [qwen2.5:32b, llama3.3:70b, deepseek-r1:32b] large: [magatama:32b, magatama-llm-v2-0:latest]
code_generation: [deepseek-r1:32b, qwen2.5:32b, llama3.3:70b] code_generation: [magatama-coder:latest, qwen2.5-coder:7b-instruct]
# Cross-tier fallback when primary tier fails # Cross-tier fallback when primary tier fails
tier_fallback: tier_fallback:

View File

@ -1110,7 +1110,7 @@ routing_rules:
# ─── CONTENT / LINKEDIN ────────────────────────────────────────────────────── # ─── CONTENT / LINKEDIN ──────────────────────────────────────────────────────
linkedin_post: linkedin_post:
model: qwen2.5:32b model: fo-blog-v10
tier: large tier: large
prompt_template: linkedin_post prompt_template: linkedin_post
temperature: 0.7 temperature: 0.7
@ -1118,7 +1118,7 @@ routing_rules:
output_format: text output_format: text
requires_fact_check: false requires_fact_check: false
validators: [banlist, language, length, question_closer] validators: [banlist, language, length, question_closer]
callers: [n8n, internal] callers: [n8n, internal, linkedin-distributor]
linkedin_comment: linkedin_comment:
model: qwen2.5:14b model: qwen2.5:14b

View File

@ -3,7 +3,7 @@
-- Purpose: Track token compression and cost analytics -- Purpose: Track token compression and cost analytics
-- PostgreSQL compatible version (version 16+) -- PostgreSQL compatible version (version 16+)
-- Table: Token compression metrics (LeanCTX, RTK) -- Table: Token compression metrics (LLM Gateway)
CREATE TABLE IF NOT EXISTS tokenvault_metrics ( CREATE TABLE IF NOT EXISTS tokenvault_metrics (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
file_path VARCHAR(255), file_path VARCHAR(255),

View File

@ -1,12 +1,12 @@
-- Tokenvault & Cost Tracking Schema Extensions -- Tokenvault & Cost Tracking Schema Extensions
-- Created: 2026-04-19 -- Created: 2026-04-19
-- Purpose: Track token compression (LeanCTX + RTK) and cost analytics -- Purpose: Track token compression (LLM Gateway) and cost analytics
-- Table: Token compression metrics (LeanCTX, RTK) -- Table: Token compression metrics (LLM Gateway)
CREATE TABLE IF NOT EXISTS tokenvault_metrics ( CREATE TABLE IF NOT EXISTS tokenvault_metrics (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
file_path VARCHAR(255), file_path VARCHAR(255),
mode VARCHAR(50), -- 'lean-aggressive', 'lean-map', 'rtk-max', etc. mode VARCHAR(50), -- 'gateway-aggressive', 'gateway-map', 'gateway-trim', etc.
tokens_before INT, tokens_before INT,
tokens_after INT, tokens_after INT,
savings_pct DECIMAL(5,2), savings_pct DECIMAL(5,2),
@ -26,7 +26,7 @@ CREATE TABLE IF NOT EXISTS cost_analytics (
agent_id VARCHAR(50), -- 'claude-code', 'qwen-reviewer', etc. agent_id VARCHAR(50), -- 'claude-code', 'qwen-reviewer', etc.
tokens_in INT, tokens_in INT,
tokens_out INT, tokens_out INT,
tokens_compressed INT, -- After LeanCTX + RTK tokens_compressed INT, -- After LLM Gateway compression
cost_usd DECIMAL(10,6), cost_usd DECIMAL(10,6),
cost_saved_usd DECIMAL(10,6), cost_saved_usd DECIMAL(10,6),
provider VARCHAR(50), -- 'ollama', 'cerebras', 'groq', 'claude', etc. provider VARCHAR(50), -- 'ollama', 'cerebras', 'groq', 'claude', etc.

View File

@ -101,7 +101,7 @@ export function calculateCost(
/** /**
* Calculate cost savings from compression * Calculate cost savings from compression
* @param model Model identifier * @param model Model identifier
* @param tokensBeforeCompression Tokens before LeanCTX + RTK * @param tokensBeforeCompression Tokens before LLM Gateway compression
* @param tokensAfterCompression Tokens after compression * @param tokensAfterCompression Tokens after compression
* @returns Savings in USD * @returns Savings in USD
*/ */

View File

@ -47,7 +47,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
enabled: true, enabled: true,
models: [ models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 }, { id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 }, { id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 }, { id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
], ],
}, },
@ -174,7 +174,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
enabled: true, enabled: true,
models: [ models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 }, { id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 }, { id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 }, { id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
], ],
}, },

View File

@ -728,6 +728,36 @@ function handleFormalLogicOverride(
return result; return result;
} }
// ── Helper: Code Generation Intent Override ───────────────────────────────
const CODE_GENERATION_PATTERNS = [
/\bwrite\s+(?:a\s+)?(?:typescript|javascript|python|go|rust|react|next\.js|node)?\s*(?:function|class|script|module|component|test|handler|middleware)\b/i,
/\b(?:implement|create|build|generate|scaffold)\b[\s\S]{0,160}\b(?:api|endpoint|function|class|component|service|schema|migration|crud|jwt|test|project|module)\b/i,
/\b(?:rest|graphql)\s+api\b[\s\S]{0,160}\b(?:implement|create|build|endpoint|authentication|jwt)\b/i,
];
function handleCodeGenerationOverride(
fullText: string,
input: ScorerInput,
userMessages: readonly WeightedMessage[],
): ScoringResult | null {
if (!CODE_GENERATION_PATTERNS.some((pattern) => pattern.test(fullText))) {
return null;
}
const dimensions = computeAllDimensions(input, userMessages, fullText);
const result: ScoringResult = {
tier: 'code_generation',
score: 0.62,
confidence: 0.86,
reason: 'code generation intent detected',
dimensions,
};
recordSessionTier('code_generation');
logger.debug({ tier: 'code_generation', reason: 'code_generation_override' }, 'Request scored via code generation override');
return result;
}
// ── Helper: Apply Score Overrides ────────────────────────────────────────── // ── Helper: Apply Score Overrides ──────────────────────────────────────────
interface ScoreOverridesInput { interface ScoreOverridesInput {
@ -754,6 +784,7 @@ function applyScoreOverrides(
const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration'); const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
if (codeGenDim && codeGenDim.rawScore > 0.25) { if (codeGenDim && codeGenDim.rawScore > 0.25) {
tier = 'code_generation'; tier = 'code_generation';
confidence = Math.max(confidence, 0.78);
reason = 'code generation keywords detected'; reason = 'code generation keywords detected';
} }
@ -771,7 +802,7 @@ function applyScoreOverrides(
} }
// Ambiguity check // Ambiguity check
if (confidence < 0.45) { if (confidence < 0.45 && tier !== 'code_generation' && tier !== 'reasoning') {
tier = 'medium'; tier = 'medium';
reason = 'ambiguous (confidence < 0.45, defaulting to medium)'; reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
} }
@ -795,6 +826,9 @@ export function scoreRequest(
const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages); const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages);
if (formalLogicResult) return formalLogicResult; if (formalLogicResult) return formalLogicResult;
const codeGenerationResult = handleCodeGenerationOverride(fullText, input, userMessages);
if (codeGenerationResult) return codeGenerationResult;
const dimensions = computeAllDimensions(input, userMessages, fullText); const dimensions = computeAllDimensions(input, userMessages, fullText);
let rawScore = 0; let rawScore = 0;
for (const dim of dimensions) { for (const dim of dimensions) {

View File

@ -184,14 +184,14 @@ export function getOllamaBaseUrl(): string {
/** /**
* Maps a scorer tier to the best primary model and its fallback chain. * Maps a scorer tier to the best primary model and its fallback chain.
* The 'reasoning' tier uses llama3.3:70b (complex_reasoning strength) from the large tier. * The 'reasoning' tier uses llama3.3:70b (complex_reasoning strength) from the large tier.
* The 'code_generation' tier uses OpenAI Codex (gpt-4-turbo) as primary via external provider. * The 'code_generation' tier uses OpenAI Codex as primary via external provider.
*/ */
const TIER_MODEL_MAP: Record<Tier, { primary: string; configTier: 'fast' | 'medium' | 'large'; provider?: string }> = { const TIER_MODEL_MAP: Record<Tier, { primary: string; configTier: 'fast' | 'medium' | 'large'; provider?: string }> = {
fast: { primary: 'qwen2.5:3b', configTier: 'fast' }, fast: { primary: 'qwen2.5:3b', configTier: 'fast' },
medium: { primary: 'qwen2.5:14b', configTier: 'medium' }, medium: { primary: 'qwen2.5:14b', configTier: 'medium' },
large: { primary: 'qwen2.5:32b', configTier: 'large' }, large: { primary: 'qwen2.5:32b', configTier: 'large' },
reasoning: { primary: 'llama3.3:70b', configTier: 'large' }, reasoning: { primary: 'llama3.3:70b', configTier: 'large' },
code_generation: { primary: 'gpt-4-turbo', configTier: 'large', provider: 'openai-codex' }, code_generation: { primary: 'gpt-5.1-codex-mini', configTier: 'large', provider: 'openai-codex' },
}; };
function buildMediumTierFallback( function buildMediumTierFallback(
@ -223,7 +223,8 @@ function buildScoredFallbackChain(
models: ModelsYaml, models: ModelsYaml,
): string[] { ): string[] {
if (tier === 'reasoning' || tier === 'code_generation') { if (tier === 'reasoning' || tier === 'code_generation') {
return [selectedModel, ...buildFallbackChain(selectedModel, configTier, models).filter((m) => m !== selectedModel)]; const fallbackTier = tier === 'code_generation' ? 'code_generation' : configTier;
return [selectedModel, ...buildFallbackChain(selectedModel, fallbackTier, models).filter((m) => m !== selectedModel)];
} }
return buildFallbackChain(selectedModel, configTier, models); return buildFallbackChain(selectedModel, configTier, models);
} }
@ -302,7 +303,7 @@ export function routeByScore(
const mapping = TIER_MODEL_MAP[scoringResult.tier]; const mapping = TIER_MODEL_MAP[scoringResult.tier];
const selectedModel = mapping.primary; const selectedModel = mapping.primary;
const configTier = mapping.configTier; const configTier = mapping.configTier;
const tierConfig = models.tiers[configTier]; const tierConfig = models.tiers[scoringResult.tier] ?? models.tiers[configTier];
if (!tierConfig) { if (!tierConfig) {
logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium'); logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium');

View File

@ -127,6 +127,27 @@ function shouldBypassResponseCache(caller: string): boolean {
|| normalized.includes('copilot'); || normalized.includes('copilot');
} }
function inputForPromptGuard(input: string): string {
const cleaned = input.replace(/^(user|assistant|system|developer):\s*/gim, '').trim();
return cleaned || input;
}
function shouldRunPromptGuard(input: string, scan: InjectionScanResult): boolean {
if (scan.matches.length > 0) return true;
const cleaned = inputForPromptGuard(input).normalize('NFKC');
return [
/\b(?:ignore|disregard|forget|override|bypass|jailbreak)\b[\s\S]{0,120}\b(?:instructions?|rules?|prompt|policy|safety)\b/i,
/\b(?:you\s+are\s+now|act\s+as|pretend\s+to\s+be|developer\s+mode|root\s+administrator|runtime\s+controller|security\s+auditor)\b/i,
/\b(?:show|print|dump|reveal|output)\b[\s\S]{0,160}\b(?:system\s+prompt|developer\s+prompt|hidden|runtime|memory|tools?|filters?|policy|classifier|chain-of-thought|reasoning)\b/i,
/\b(?:passwords?|passw(?:o|ö)rter|credentials?|api\s*keys?|tokens?|secrets?)\b[\s\S]{0,160}\b(?:print|show|write|paste|send|share|reveal|chat|anmelden|log\s*in)\b/i,
/\b(?:base64|rot13|hex\s+encoded|decode|execute|run\s+this)\b/i,
/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/,
/\b[A-Za-z0-9+/]{40,}={0,2}\b/,
/\b(?:[0-9a-fA-F]{2}){16,}\b/,
].some((pattern) => pattern.test(cleaned));
}
const ChatMessageSchema = z.object({ const ChatMessageSchema = z.object({
role: z.string().min(1), role: z.string().min(1),
content: z.union([z.string(), z.array(z.unknown()), z.null()]).optional(), content: z.union([z.string(), z.array(z.unknown()), z.null()]).optional(),
@ -428,8 +449,8 @@ async function executeCompletion(body: CompletionRequest, startMs: number, callI
} }
// ─── Layer 2: ML classifier (Prompt-Guard sidecar) ──────────────────── // ─── Layer 2: ML classifier (Prompt-Guard sidecar) ────────────────────
if (!injectionScan.detected && isPromptGuardConfigured() && body.input.length >= getPromptGuardMinLen()) { if (!injectionScan.detected && isPromptGuardConfigured() && body.input.length >= getPromptGuardMinLen() && shouldRunPromptGuard(body.input, injectionScan)) {
const pg = await callPromptGuard(body.input); const pg = await callPromptGuard(inputForPromptGuard(body.input));
if (pg.available && pg.label === 'INJECTION' && pg.score >= getPromptGuardThreshold()) { if (pg.available && pg.label === 'INJECTION' && pg.score >= getPromptGuardThreshold()) {
logger.warn( logger.warn(
{ caller, callId, pg_score: pg.score, pg_latency_ms: pg.latencyMs }, { caller, callId, pg_score: pg.score, pg_latency_ms: pg.latencyMs },

View File

@ -9,6 +9,7 @@ import { createRequestLogger } from '../modules/request-logger.js';
import { globalRequestStream } from '../modules/request-stream.js'; import { globalRequestStream } from '../modules/request-stream.js';
import { getAvailableProviders, getAllProviders } from '../pipeline/external-providers.js'; import { getAvailableProviders, getAllProviders } from '../pipeline/external-providers.js';
import { discoverSubscriptions } from '../modules/subscription-discovery.js'; import { discoverSubscriptions } from '../modules/subscription-discovery.js';
import { runDiscovery, runDiscoveryAndSpawn } from '../modules/auto-discovery.js';
import { getRunningBridges, spawnDetectedBridges } from '../modules/bridge-spawner.js'; import { getRunningBridges, spawnDetectedBridges } from '../modules/bridge-spawner.js';
import { getPublicSettings, saveSettings, SettingsPatchSchema } from '../modules/settings-store.js'; import { getPublicSettings, saveSettings, SettingsPatchSchema } from '../modules/settings-store.js';
import { import {
@ -18,6 +19,10 @@ import {
pruneStaleCacheEntries, pruneStaleCacheEntries,
} from '../modules/response-cache.js'; } from '../modules/response-cache.js';
import { getComprehensiveSavings } from '../modules/savings-calculator.js'; import { getComprehensiveSavings } from '../modules/savings-calculator.js';
// Captured once at module load — represents the gateway-process start time
// for the 'compressed since last restart' tile in the dashboard.
const SERVER_STARTED_AT_ISO = new Date().toISOString();
import { import {
getBuddyState, getBuddyState,
getAchievements, getAchievements,
@ -106,6 +111,22 @@ type ProviderRuntime = {
}; };
const CLIENT_CATALOG = [ const CLIENT_CATALOG = [
{
id: 'macbook-claude-code',
label: 'MacBook (Claude Code)',
patterns: ['claude-code-laptop'],
commands: [],
paths: [],
processPatterns: [],
},
{
id: 'macstudio-claude-code',
label: 'Mac Studio (Claude Code)',
patterns: ['claude-code-macstudio', 'claude-code-studio'],
commands: [],
paths: [],
processPatterns: [],
},
{ {
id: 'codex-desktop', id: 'codex-desktop',
label: 'Codex Desktop / CLI', label: 'Codex Desktop / CLI',
@ -158,6 +179,17 @@ const CLIENT_CATALOG = [
type ClientStatus = 'live' | 'running' | 'installed' | 'not-connected'; type ClientStatus = 'live' | 'running' | 'installed' | 'not-connected';
const CLIENT_BRIDGE_PROVIDERS: Record<(typeof CLIENT_CATALOG)[number]['id'], string | undefined> = {
'macbook-claude-code': undefined,
'macstudio-claude-code': undefined,
'codex-desktop': 'codex',
'claude-desktop': 'claude-code',
'microsoft-copilot': 'm365-copilot-bridge',
'github-copilot': 'copilot-bridge',
'openai-compatible': undefined,
'chatgpt': 'codex-bridge',
};
function expandUserPath(path: string): string { function expandUserPath(path: string): string {
return path.startsWith('~/') ? `${homedir()}/${path.slice(2)}` : path; return path.startsWith('~/') ? `${homedir()}/${path.slice(2)}` : path;
} }
@ -217,8 +249,22 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise<Array<{
tokensSaved: number; tokensSaved: number;
source: 'gateway' | 'local-detection' | 'none'; source: 'gateway' | 'local-detection' | 'none';
detectionSignals: string[]; detectionSignals: string[];
bridgeProvider?: string;
bridgeStatus?: string;
bridgeHealthy?: boolean;
bridgeDetail?: string;
}>> { }>> {
const detections = await getLocalDesktopDetections(); const detections = await getLocalDesktopDetections();
const bridgeRuntimes = Object.fromEntries(await Promise.all(CLIENT_CATALOG.map(async (client) => {
const providerName = CLIENT_BRIDGE_PROVIDERS[client.id];
return [
client.id,
{
providerName,
...(providerName ? await providerRuntime(providerName) : {}),
},
] as const;
})));
let callers: Array<{ caller: string; requestCount: number; lastSeen?: string; tokensIn: number; tokensSaved: number }> = []; let callers: Array<{ caller: string; requestCount: number; lastSeen?: string; tokensIn: number; tokensSaved: number }> = [];
try { try {
@ -257,12 +303,19 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise<Array<{
logger.warn({ error }, 'Client gateway traffic lookup failed, returning local desktop detections only'); logger.warn({ error }, 'Client gateway traffic lookup failed, returning local desktop detections only');
} }
// First-match-wins: a caller is assigned to the first (most specific) catalog
// entry it matches, so device-specific entries (MacBook/Mac Studio) take a
// caller before the generic 'claude-desktop' bucket — no double counting.
const assignedCallers = new Set<string>();
return CLIENT_CATALOG.map((client) => { return CLIENT_CATALOG.map((client) => {
const detection = detections[client.id]; const detection = detections[client.id];
const bridgeRuntime = bridgeRuntimes[client.id];
const matched = callers.filter((row) => { const matched = callers.filter((row) => {
if (assignedCallers.has(row.caller)) return false;
const caller = row.caller.toLowerCase(); const caller = row.caller.toLowerCase();
return client.patterns.some((pattern) => caller.includes(pattern)); return client.patterns.some((pattern) => caller.includes(pattern));
}); });
matched.forEach((row) => assignedCallers.add(row.caller));
const requestCount = matched.reduce((sum, row) => sum + row.requestCount, 0); const requestCount = matched.reduce((sum, row) => sum + row.requestCount, 0);
const tokensIn = matched.reduce((sum, row) => sum + row.tokensIn, 0); const tokensIn = matched.reduce((sum, row) => sum + row.tokensIn, 0);
const tokensSaved = matched.reduce((sum, row) => sum + row.tokensSaved, 0); const tokensSaved = matched.reduce((sum, row) => sum + row.tokensSaved, 0);
@ -283,6 +336,10 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise<Array<{
tokensSaved, tokensSaved,
source: requestCount > 0 ? 'gateway' : detection?.installed ? 'local-detection' : 'none', source: requestCount > 0 ? 'gateway' : detection?.installed ? 'local-detection' : 'none',
detectionSignals: detection?.signals ?? [], detectionSignals: detection?.signals ?? [],
bridgeProvider: bridgeRuntime?.providerName,
bridgeStatus: bridgeRuntime?.runtimeStatus,
bridgeHealthy: bridgeRuntime?.runtimeHealthy,
bridgeDetail: bridgeRuntime?.runtimeDetail,
}; };
}); });
} }
@ -291,8 +348,6 @@ function bridgeHealthUrl(providerName: string): string | undefined {
const bridgeUrls: Record<string, string | undefined> = { const bridgeUrls: Record<string, string | undefined> = {
'claude-bridge': process.env['CLAUDE_BRIDGE_URL'], 'claude-bridge': process.env['CLAUDE_BRIDGE_URL'],
'claude-code': process.env['CLAUDE_CODE_URL'] || process.env['CLAUDE_BRIDGE_URL'], 'claude-code': process.env['CLAUDE_CODE_URL'] || process.env['CLAUDE_BRIDGE_URL'],
'openai-bridge': process.env['OPENAI_BRIDGE_URL'],
'chatgpt-bridge': process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'],
'copilot-bridge': process.env['COPILOT_BRIDGE_URL'], 'copilot-bridge': process.env['COPILOT_BRIDGE_URL'],
'm365-copilot-bridge': process.env['M365_COPILOT_BRIDGE_URL'], 'm365-copilot-bridge': process.env['M365_COPILOT_BRIDGE_URL'],
'openai-codex': process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'], 'openai-codex': process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'],
@ -575,7 +630,7 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
const configuredProviders = providers.filter((provider) => provider.enabled && !!process.env[provider.envKey]); const configuredProviders = providers.filter((provider) => provider.enabled && !!process.env[provider.envKey]);
const localProviders = providers.filter((provider) => provider.name.toLowerCase().includes('ollama')); const localProviders = providers.filter((provider) => provider.name.toLowerCase().includes('ollama'));
const subscriptionProviders = providers.filter((provider) => const subscriptionProviders = providers.filter((provider) =>
['claude-bridge', 'claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge', 'm365-copilot-bridge', 'codex', 'openai-codex'] ['claude-bridge', 'copilot-bridge', 'm365-copilot-bridge', 'openai-codex']
.includes(provider.name) .includes(provider.name)
); );
@ -883,12 +938,10 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
const displayLabels: Record<string, string> = { const displayLabels: Record<string, string> = {
'claude-bridge': 'Claude Code Subscription (Bridge)', 'claude-bridge': 'Claude Code Subscription (Bridge)',
'claude-code': 'Claude Code Direct', 'claude-code': 'Claude Code Direct',
'openai-bridge': 'OpenAI ChatGPT Subscription (Bridge)',
'chatgpt-bridge': 'ChatGPT Plus Subscription (Bridge)',
'copilot-bridge': 'GitHub Copilot Subscription', 'copilot-bridge': 'GitHub Copilot Subscription',
'm365-copilot-bridge': 'Microsoft 365 Copilot Subscription', 'm365-copilot-bridge': 'Microsoft 365 Copilot Subscription',
'codex': 'GitHub Copilot Codex (Inner API)', 'copilot-codex': 'GitHub Copilot (Codex Inner API)',
'openai-codex': 'OpenAI API (Codex / GPT)', 'openai-codex': 'OpenAI (ChatGPT + Codex)',
'cerebras': 'Cerebras (Free Tier)', 'cerebras': 'Cerebras (Free Tier)',
'groq': 'Groq (Free Tier)', 'groq': 'Groq (Free Tier)',
'mistral': 'Mistral AI (Free Tier)', 'mistral': 'Mistral AI (Free Tier)',
@ -898,9 +951,8 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
// Subscription providers (paid via login/subscription, NOT free-tier API) // Subscription providers (paid via login/subscription, NOT free-tier API)
const subscriptionNames = new Set([ const subscriptionNames = new Set([
'claude-bridge', 'claude-code', 'claude-bridge',
'openai-bridge', 'chatgpt-bridge', 'copilot-bridge', 'm365-copilot-bridge', 'openai-codex'
'copilot-bridge', 'm365-copilot-bridge', 'codex', 'openai-codex'
]); ]);
// Categorize all providers (independent of API-key presence) // Categorize all providers (independent of API-key presence)
@ -1073,6 +1125,36 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
} }
}); });
// ─── Full-System Auto-Discovery ─────────────────────────────────────────
// GET /api/dashboard/discover → unified report (read-only)
// POST /api/dashboard/discover → discover + spawn bridges
fastify.get('/api/dashboard/discover', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
try {
const report = await runDiscovery();
return reply.send({ success: true, data: report });
} catch (error) {
logger.error({ error }, 'Discovery scan failed');
return reply.status(500).send({ success: false, error: 'Discovery scan failed' });
}
});
fastify.post('/api/dashboard/discover', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
try {
const result = await runDiscoveryAndSpawn();
return reply.send({
success: true,
data: {
report: result.report,
spawned: result.spawned,
spawnedCount: result.spawned.length,
},
});
} catch (error) {
logger.error({ error }, 'Discovery + spawn failed');
return reply.status(500).send({ success: false, error: 'Discovery + spawn failed' });
}
});
// POST /api/dashboard/subscriptions/spawn — trigger auto-spawn of detected bridges. // POST /api/dashboard/subscriptions/spawn — trigger auto-spawn of detected bridges.
// Returns the list of bridges that were spawned (or already running). // Returns the list of bridges that were spawned (or already running).
fastify.post('/api/dashboard/subscriptions/spawn', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => { fastify.post('/api/dashboard/subscriptions/spawn', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
@ -1180,7 +1262,8 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
effectiveSavingsPercent, effectiveSavingsPercent,
totals: comprehensive.totals, totals: comprehensive.totals,
}, },
}, // Compression since this gateway process started — resets at each restart.
},
series, series,
}, },
meta: { hours, bucket_minutes: bucketMin, timestamp: new Date().toISOString() }, meta: { hours, bucket_minutes: bucketMin, timestamp: new Date().toISOString() },
@ -1638,4 +1721,45 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
return reply.status(500).send({ error: 'Failed to serve dashboard UI' }); return reply.status(500).send({ error: 'Failed to serve dashboard UI' });
} }
}); });
// Passive usage import: lets clients that talk DIRECTLY to a provider (e.g. the
// laptop's Claude Code -> api.anthropic.com) report their usage so they appear in
// clients/costs WITHOUT routing traffic through the gateway. A caller containing
// 'claude-code' matches the CLIENT_CATALOG 'claude-desktop' entry.
fastify.post('/api/dashboard/usage/report', dashboardAuth, async (request: FastifyRequest, reply: FastifyReply) => {
try {
const body = (request.body ?? {}) as Record<string, unknown>;
const caller = String(body.caller ?? 'claude-code-laptop').slice(0, 120);
const model = String(body.model ?? 'claude-code').slice(0, 120);
const tokensIn = Math.max(0, Math.floor(Number(body.tokens_in) || 0));
const tokensOut = Math.max(0, Math.floor(Number(body.tokens_out) || 0));
const costUsd = Math.max(0, Number(body.cost_usd) || 0);
const day = String(body.day ?? new Date().toISOString().slice(0, 10)).slice(0, 32);
if (tokensIn === 0 && tokensOut === 0) {
return reply.status(400).send({ success: false, error: 'tokens_in or tokens_out required' });
}
// Stamp the row with the ACTUAL usage day so lastSeen = when tokens were
// used, not when the export ran. Cap at "now" so today's still-growing day
// reads as current/live.
const dayEnd = new Date(`${day}T23:59:59Z`);
const usedAt = dayEnd.getTime() > Date.now() ? new Date() : dayEnd;
const db = getPool();
const requestId = `usage-import:${caller}:${model}:${day}`;
// Upsert by request_id (one row per caller/model/day): re-reporting an
// in-progress day updates its totals instead of creating duplicates.
const updated = await db.query(
`UPDATE request_tracking SET tokens_in=$1, tokens_out=$2, cost_usd=$3, created_at=$4 WHERE request_id=$5`,
[tokensIn, tokensOut, costUsd, usedAt, requestId]
);
if (updated.rowCount === 0) {
const requestLogger = createRequestLogger(db);
await requestLogger.logRequest(requestId, caller, 'usage_import', model, 'approved', tokensIn, tokensOut, costUsd, 0);
await db.query(`UPDATE request_tracking SET created_at=$1 WHERE request_id=$2`, [usedAt, requestId]);
}
return reply.status(200).send({ success: true, imported: { caller, model, day, tokensIn, tokensOut, costUsd, usedAt } });
} catch (error) {
logger.error({ error }, 'Failed to import usage report');
return reply.status(500).send({ success: false, error: 'Failed to import usage report' });
}
});
} }

View File

@ -38,22 +38,40 @@ async function checkOllama(baseUrl: string): Promise<{ status: 'ok' | 'down'; la
async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> { async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> {
try { try {
await query('SELECT 1'); await withTimeout(query('SELECT 1'), 2500, 'database check timed out');
return { status: 'ok' }; return { status: 'ok' };
} catch (err) { } catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' }; return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
} }
} }
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, message: string): Promise<T> {
let timer: NodeJS.Timeout | undefined;
try {
return await Promise.race([
promise,
new Promise<T>((_resolve, reject) => {
timer = setTimeout(() => reject(new Error(message)), timeoutMs);
}),
]);
} finally {
if (timer) clearTimeout(timer);
}
}
async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> { async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> {
const boss = getPgBoss(); const boss = getPgBoss();
if (!boss) return { status: 'unknown' }; if (!boss) return { status: 'unknown' };
try { try {
const [queued, active] = await Promise.all([ const [queued, active] = await withTimeout(
boss.getQueueSize('llm-batch', { before: 'completed' }), Promise.all([
boss.getQueueSize('llm-batch', { before: 'active' }), boss.getQueueSize('llm-batch', { before: 'completed' }),
]); boss.getQueueSize('llm-batch', { before: 'active' }),
]),
2500,
'queue check timed out',
);
return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) }; return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) };
} catch (err) { } catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' }; return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
@ -62,8 +80,10 @@ async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?
async function getReviewQueueCount(): Promise<number> { async function getReviewQueueCount(): Promise<number> {
try { try {
const result = await query<{ count: string }>( const result = await withTimeout(
'SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL', query<{ count: string }>('SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL'),
2500,
'review queue check timed out',
); );
return parseInt(result.rows[0]?.count ?? '0', 10); return parseInt(result.rows[0]?.count ?? '0', 10);
} catch { } catch {
@ -109,8 +129,8 @@ export async function healthRoute(fastify: FastifyInstance): Promise<void> {
const breakerStates = getAllBreakerStates(); const breakerStates = getAllBreakerStates();
const isDown = ollamaCheck.status === 'down' || dbCheck.status === 'down'; const isDown = dbCheck.status === 'down';
const isDegraded = queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open'); const isDegraded = ollamaCheck.status === 'down' || queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok'; const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok';

View File

@ -107,9 +107,22 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
return; return;
} }
// Skip for localhost/loopback callers (infra-health, fix-engine, internal services) const hostHeader = String(request.headers['host'] ?? '');
const reqHost = String(request.headers['host'] ?? ''); const forwardedHost = String(request.headers['x-forwarded-host'] ?? '');
if (reqHost.startsWith('localhost') || reqHost.startsWith('127.0.0.1')) { const remoteAddress = request.ip ?? '';
const host = forwardedHost || hostHeader;
const isLoopbackHost =
/^localhost(?::\d+)?$/i.test(host) ||
/^127\.0\.0\.1(?::\d+)?$/.test(host) ||
/^\[::1\](?::\d+)?$/.test(host);
const isLoopbackRemote =
remoteAddress === '127.0.0.1' ||
remoteAddress === '::1' ||
remoteAddress === '::ffff:127.0.0.1';
// Internal loopback callers such as Magatama Core run behind the same host
// and must not be redirected to HTTPS unless the Gateway actually serves TLS.
if (isLoopbackHost || isLoopbackRemote) {
return; return;
} }
@ -120,7 +133,6 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
(request.headers['x-forwarded-proto'] === 'https'); (request.headers['x-forwarded-proto'] === 'https');
if (!isSecure && process.env['NODE_ENV'] === 'production') { if (!isSecure && process.env['NODE_ENV'] === 'production') {
const host = request.headers['x-forwarded-host'] || request.headers['host'];
return reply.redirect(`https://${host}${request.url}`); return reply.redirect(`https://${host}${request.url}`);
} }
}); });
@ -132,14 +144,11 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
*/ */
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) { export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
server.addHook('onSend', async (request, reply) => { server.addHook('onSend', async (request, reply) => {
// Content Security Policy — route handlers may set a narrower CSP before this hook. // Content Security Policy for the self-contained dashboard UI.
// Default allows 'unsafe-inline' for the dashboard UI. reply.header(
if (!reply.getHeader('Content-Security-Policy')) { 'Content-Security-Policy',
reply.header( "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
'Content-Security-Policy', );
"default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
);
}
// Prevent clickjacking // Prevent clickjacking
reply.header('X-Frame-Options', 'DENY'); reply.header('X-Frame-Options', 'DENY');

View File

@ -12,11 +12,23 @@ import { dashboardRoute } from './routes/dashboard.js';
import { streamRoute } from './routes/stream.js'; import { streamRoute } from './routes/stream.js';
import { learningInsightsRoute } from './routes/learning-insights.js'; import { learningInsightsRoute } from './routes/learning-insights.js';
import { staticRoute } from './routes/static.js'; import { staticRoute } from './routes/static.js';
import tenantAuth from './security/tenant-auth.js';
import { internalRoute } from './routes/internal.js';
import { getPool } from './db/client.js'; import { getPool } from './db/client.js';
import { runMigrations } from './db/migrate.js'; import { runMigrations } from './db/migrate.js';
import { initPgBoss } from './queue/pg-boss-client.js'; import { initPgBoss } from './queue/pg-boss-client.js';
import { logger } from './observability/logger.js'; import { logger } from './observability/logger.js';
import { scheduleLearningCycles } from './learning/learning-engine.js'; import { scheduleLearningCycles } from './learning/learning-engine.js';
import { autoSpawnOnBoot } from './modules/auto-discovery.js';
import { embeddingsRoute } from './routes/embeddings.js';
import { replayRoute } from './routes/replay.js';
import { audioRoute } from './routes/audio.js';
import { mcpRoute } from './modules/mcp-server.js';
import { loadWorkspacePreset, applyWorkspaceDefaults } from './modules/workspace-presets.js';
import { loadPlugins } from './modules/plugin-system.js';
import { ingestPeerStats, scheduleFederationPublisher, buildStats } from './modules/federated-stats.js';
import { scheduleAdaptiveLearner, getAllRecommendations } from './modules/adaptive-routing.js';
import { startBridgeWatchdog } from './modules/bridge-watchdog.js';
import { fileURLToPath } from 'url'; import { fileURLToPath } from 'url';
import { dirname, join } from 'path'; import { dirname, join } from 'path';
import { readFileSync, existsSync } from 'fs'; import { readFileSync, existsSync } from 'fs';
@ -93,9 +105,11 @@ async function buildServer() {
'http://192.168.178.196:3000', 'http://192.168.178.196:3000',
/^http:\/\/192\.168\.178\.\d+/, /^http:\/\/192\.168\.178\.\d+/,
/^https:\/\/.*\.context-x\.org$/, /^https:\/\/.*\.context-x\.org$/,
/^https:\/\/(www\.)?runwerk\.app$/,
/^https:\/\/.*\.runwerk\.app$/,
], ],
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'], methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID'], allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID', 'X-Runwerk-Caller', 'X-Runwerk-Privacy', 'X-Runwerk-Tier', 'X-Runwerk-Purpose'],
credentials: true, credentials: true,
}); });
@ -114,7 +128,17 @@ async function buildServer() {
}), }),
}); });
await server.register(tenantAuth);
await server.register(internalRoute);
await server.register(completionRoute, { prefix: '/v1' }); await server.register(completionRoute, { prefix: '/v1' });
await server.register(embeddingsRoute, { prefix: '/v1' });
await server.register(replayRoute, { prefix: '/v1' });
await server.register(audioRoute, { prefix: '/v1' });
await server.register(mcpRoute);
server.post('/v1/federation/ingest', async (request, reply) => {
const result = ingestPeerStats(request.body as never);
return reply.send({ success: true, ...result });
});
await server.register(batchRoute, { prefix: '/v1' }); await server.register(batchRoute, { prefix: '/v1' });
await server.register(classifyRoute, { prefix: '/v1' }); await server.register(classifyRoute, { prefix: '/v1' });
await server.register(reviewRoute, { prefix: '/v1' }); await server.register(reviewRoute, { prefix: '/v1' });
@ -193,9 +217,54 @@ async function main() {
} catch (pgErr) { } catch (pgErr) {
logger.warn({ pgErr }, 'PgBoss init failed - continuing without queue'); logger.warn({ pgErr }, 'PgBoss init failed - continuing without queue');
} }
// Workspace preset (apply env defaults from workspace.yaml if present)
try {
const preset = await loadWorkspacePreset();
if (preset) applyWorkspaceDefaults(preset);
} catch (err) {
logger.warn({ err }, 'Workspace preset load failed (non-fatal)');
}
// Plugin system (load pre/post hooks from PLUGINS_DIR)
try {
await loadPlugins();
} catch (err) {
logger.warn({ err }, 'Plugin loading failed (non-fatal)');
}
scheduleLearningCycles(); scheduleLearningCycles();
await server.listen({ port, host }); await server.listen({ port, host });
logger.info({ port, host }, 'LLM Gateway started'); logger.info({ port, host }, 'LLM Gateway started');
// Auto-spawn detected subscription bridges if AUTO_SPAWN_BRIDGES=1
void autoSpawnOnBoot();
// Bridge watchdog (opt-in via WATCHDOG_ENABLED=1)
try {
startBridgeWatchdog();
} catch (err) {
logger.warn({ err }, 'Bridge watchdog start failed');
}
// Adaptive routing learner (opt-in via ADAPTIVE_ROUTING_ENABLED=1)
try {
const pool = getPool();
scheduleAdaptiveLearner(pool as never);
} catch (err) {
logger.warn({ err }, 'Adaptive learner scheduling failed');
}
// Federation publisher (opt-in via FEDERATION_ENABLED=1)
scheduleFederationPublisher(async () => {
const recos = getAllRecommendations();
return buildStats(recos.map((r) => ({
task_type: r.taskType,
model_used: r.preferredModel,
samples: r.rationale.samples,
success_rate: r.rationale.successRate,
avg_latency_ms: r.rationale.avgLatencyMs,
})));
});
} catch (err) { } catch (err) {
logger.error({ err }, 'Failed to start server'); logger.error({ err }, 'Failed to start server');
process.exit(1); process.exit(1);

View File

@ -1,5 +1,5 @@
// Tokenvault Integration Hooks // Tokenvault Integration Hooks
// Instruments LeanCTX and RTK compression tracking // Instruments LLM Gateway compression tracking (legacy hook names retained for backward compat)
// Updated: 2026-04-19 // Updated: 2026-04-19
import { Pool, QueryResult } from 'pg'; import { Pool, QueryResult } from 'pg';
@ -62,13 +62,13 @@ export function estimateTokens(text: string | object): number {
} }
/** /**
* Log compression ratio for RTK output * Log compression ratio for token-trim output
*/ */
export async function logRTKCompression( export async function logGatewayTrimCompression(
db: Pool, db: Pool,
rawOutput: string, rawOutput: string,
compressedOutput: string, compressedOutput: string,
toolUsed: string = 'rtk' toolUsed: string = 'llm-gateway-trim'
): Promise<CompressionMetric> { ): Promise<CompressionMetric> {
const tokensBefore = estimateTokens(rawOutput); const tokensBefore = estimateTokens(rawOutput);
const tokensAfter = estimateTokens(compressedOutput); const tokensAfter = estimateTokens(compressedOutput);
@ -93,9 +93,9 @@ export async function logRTKCompression(
} }
/** /**
* Track LeanCTX file read operations * Track gateway file-read operations
*/ */
export async function logLeanCTXRead( export async function logGatewayFileRead(
db: Pool, db: Pool,
filePath: string, filePath: string,
mode: string, mode: string,
@ -115,7 +115,7 @@ export async function logLeanCTXRead(
tokensBefore: rawTokens, tokensBefore: rawTokens,
tokensAfter: compressedTokens, tokensAfter: compressedTokens,
savingsPct, savingsPct,
toolUsed: 'lean-ctx' toolUsed: 'llm-gateway'
}; };
await logCompressionMetric(db, metric); await logCompressionMetric(db, metric);
@ -207,7 +207,7 @@ export async function getCompressionStats(
tool_used, tool_used,
COUNT(*) as count COUNT(*) as count
FROM tokenvault_metrics FROM tokenvault_metrics
WHERE created_at > NOW() - INTERVAL $1 HOUR WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
GROUP BY tool_used`, GROUP BY tool_used`,
[hoursBack] [hoursBack]
); );
@ -270,7 +270,7 @@ export async function getCostSummary(
project, project,
SUM(CASE WHEN cost_usd > 0 THEN 1 ELSE 0 END) as paid_tasks SUM(CASE WHEN cost_usd > 0 THEN 1 ELSE 0 END) as paid_tasks
FROM cost_analytics FROM cost_analytics
WHERE created_at > NOW() - INTERVAL $1 HOUR WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
GROUP BY project`, GROUP BY project`,
[hoursBack] [hoursBack]
); );