feat: merge Gitea main (injection-defense, bridges, dashboard) + Erik WIP features
Reconcile 6-week divergence: Gitea main (injection-defense, output-defense, prompt-guard-client, admin-auth, start-with-env, dashboard-v2, savings-calculator, race-mode, gamification + 13 more modules) merged with Erik's deployed features (usage-report endpoint, per-device entries, CEST timezone, cost-panel, bridge routing). ecosystem.config.cjs excluded (live token, never commit).
This commit is contained in:
parent
c53e0d2165
commit
c7c457ae2a
@ -36,7 +36,6 @@ COPY --from=builder /app/packages/gateway/dist ./packages/gateway/dist
|
||||
|
||||
# Copy production node_modules
|
||||
COPY --from=builder /app/node_modules ./node_modules
|
||||
COPY --from=builder /app/packages/gateway/node_modules ./packages/gateway/node_modules 2>/dev/null || true
|
||||
|
||||
# Copy runtime assets (prompt templates, config)
|
||||
COPY packages/gateway/prompts ./packages/gateway/prompts
|
||||
|
||||
@ -4,15 +4,31 @@ services:
|
||||
container_name: llm-gateway
|
||||
ports:
|
||||
- "3100:3100"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
PORT: "3100"
|
||||
DATABASE_URL: "${DATABASE_URL}"
|
||||
TIP_DATABASE_URL: "${TIP_DATABASE_URL}"
|
||||
OLLAMA_URL: "http://192.168.178.169:11434"
|
||||
OLLAMA_BASE_URL: "${OLLAMA_BASE_URL:-https://ollama.fichtmueller.org}"
|
||||
CLAUDE_BRIDGE_ENABLED: "true"
|
||||
CLAUDE_BRIDGE_URL: "${CLAUDE_BRIDGE_URL:-http://host.docker.internal:3250}"
|
||||
CLAUDE_CODE_URL: "${CLAUDE_CODE_URL:-http://host.docker.internal:3250}"
|
||||
OPENAI_BRIDGE_URL: "${OPENAI_BRIDGE_URL:-http://host.docker.internal:3251}"
|
||||
CHATGPT_BRIDGE_URL: "${CHATGPT_BRIDGE_URL:-http://host.docker.internal:3251}"
|
||||
COPILOT_BRIDGE_URL: "${COPILOT_BRIDGE_URL:-http://host.docker.internal:3252}"
|
||||
GEMINI_BRIDGE_URL: "${GEMINI_BRIDGE_URL:-http://host.docker.internal:3254}"
|
||||
CODEX_BRIDGE_URL: "${CODEX_BRIDGE_URL:-http://host.docker.internal:3253}"
|
||||
OPENAI_CODEX_URL: "${OPENAI_CODEX_URL:-http://host.docker.internal:3253}"
|
||||
AIDER_BRIDGE_URL: "${AIDER_BRIDGE_URL:-http://host.docker.internal:3256}"
|
||||
SHIELDX_URL: "${SHIELDX_URL:-}"
|
||||
GITEA_URL: "http://gitea.context-x.org"
|
||||
LOG_LEVEL: "${LOG_LEVEL:-info}"
|
||||
DASHBOARD_AUTH_TOKEN: "${DASHBOARD_AUTH_TOKEN:-}"
|
||||
REFERENCE_INPUT_COST_PER_1K: "${REFERENCE_INPUT_COST_PER_1K:-0.005}"
|
||||
REFERENCE_OUTPUT_COST_PER_1K: "${REFERENCE_OUTPUT_COST_PER_1K:-0.015}"
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "-O-", "http://localhost:3100/health/live"]
|
||||
|
||||
57
package-lock.json
generated
57
package-lock.json
generated
@ -11,10 +11,10 @@
|
||||
"packages/*"
|
||||
],
|
||||
"dependencies": {
|
||||
"jose": "^6.2.2"
|
||||
"jose": "^6.2.3"
|
||||
}
|
||||
},
|
||||
"../../../shieldx": {
|
||||
"../../shieldx": {
|
||||
"extraneous": true
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
@ -305,6 +305,10 @@
|
||||
"resolved": "packages/codex-lsp-adapter",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@llm-gateway/companion": {
|
||||
"resolved": "packages/companion",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@llm-gateway/ctx-health": {
|
||||
"resolved": "packages/ctx-health",
|
||||
"link": true
|
||||
@ -321,6 +325,10 @@
|
||||
"resolved": "packages/learning-integration",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@llm-gateway/mcp-server": {
|
||||
"resolved": "packages/mcp-server",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@llm-gateway/prompt-optimizer": {
|
||||
"resolved": "packages/prompt-optimizer",
|
||||
"link": true
|
||||
@ -1127,6 +1135,8 @@
|
||||
},
|
||||
"node_modules/fastify-plugin": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/fastify-plugin/-/fastify-plugin-5.1.0.tgz",
|
||||
"integrity": "sha512-FAIDA8eovSt5qcDgcBvDuX/v0Cjz0ohGhENZ/wpc3y+oZCY2afZ9Baqql3g/lC+OHRnciQol4ww7tuthOb9idw==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
@ -1475,9 +1485,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/jose": {
|
||||
"version": "6.2.2",
|
||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz",
|
||||
"integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==",
|
||||
"version": "6.2.3",
|
||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz",
|
||||
"integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
@ -3178,6 +3188,21 @@
|
||||
"node": ">=0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/yaml": {
|
||||
"version": "2.9.0",
|
||||
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.9.0.tgz",
|
||||
"integrity": "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==",
|
||||
"license": "ISC",
|
||||
"bin": {
|
||||
"yaml": "bin.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14.6"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/eemeli"
|
||||
}
|
||||
},
|
||||
"node_modules/yocto-queue": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
|
||||
@ -4086,6 +4111,16 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"packages/companion": {
|
||||
"name": "@llm-gateway/companion",
|
||||
"version": "1.0.0",
|
||||
"bin": {
|
||||
"llm-gateway-companion": "bin/llm-gateway-companion.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"packages/ctx-health": {
|
||||
"name": "@llm-gateway/ctx-health",
|
||||
"version": "1.0.0",
|
||||
@ -4114,6 +4149,7 @@
|
||||
"@fastify/static": "^8.3.0",
|
||||
"ajv": "^8.17.1",
|
||||
"fastify": "^5.8.5",
|
||||
"fastify-plugin": "^5.1.0",
|
||||
"franc": "^6.2.0",
|
||||
"jose": "^5.4.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
@ -4122,6 +4158,7 @@
|
||||
"pg-boss": "^10.1.3",
|
||||
"pino": "^9.5.0",
|
||||
"prom-client": "^15.1.3",
|
||||
"yaml": "^2.9.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
@ -4448,6 +4485,16 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"packages/mcp-server": {
|
||||
"name": "@llm-gateway/mcp-server",
|
||||
"version": "1.0.0",
|
||||
"bin": {
|
||||
"llm-gateway-mcp": "bin/llm-gateway-mcp.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"packages/prompt-optimizer": {
|
||||
"name": "@llm-gateway/prompt-optimizer",
|
||||
"version": "0.1.0",
|
||||
|
||||
@ -18,6 +18,6 @@
|
||||
"ctx-health:dev": "npm run dev --workspace=packages/ctx-health"
|
||||
},
|
||||
"dependencies": {
|
||||
"jose": "^6.2.2"
|
||||
"jose": "^6.2.3"
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,7 +7,8 @@
|
||||
"build": "tsc && npm run build:copy-assets",
|
||||
"build:copy-assets": "mkdir -p dist/db/migrations dist/config dist/public && cp -r src/db/migrations/*.sql dist/db/migrations/ 2>/dev/null || true && cp -r src/config/*.yaml dist/config/ 2>/dev/null || true && cp -r public/* dist/public/ 2>/dev/null || true",
|
||||
"start": "node dist/server.js",
|
||||
"test": "vitest"
|
||||
"test": "vitest",
|
||||
"prestart": "node scripts/check-build-drift.mjs"
|
||||
},
|
||||
"dependencies": {
|
||||
"@fastify/cors": "^10.1.0",
|
||||
@ -16,6 +17,7 @@
|
||||
"@fastify/static": "^8.3.0",
|
||||
"ajv": "^8.17.1",
|
||||
"fastify": "^5.8.5",
|
||||
"fastify-plugin": "^5.1.0",
|
||||
"franc": "^6.2.0",
|
||||
"jose": "^5.4.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
@ -24,6 +26,7 @@
|
||||
"pg-boss": "^10.1.3",
|
||||
"pino": "^9.5.0",
|
||||
"prom-client": "^15.1.3",
|
||||
"yaml": "^2.9.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@ -1,63 +1,105 @@
|
||||
id: linkedin_post
|
||||
version: "1.0.0"
|
||||
version: "2.0.0"
|
||||
task_type: linkedin_post
|
||||
description: "LinkedIn teaser in Rene Fichtmueller's voice. Anti-AI, anti-marketing, technical, direct."
|
||||
|
||||
system_prompt: |
|
||||
You are a professional LinkedIn content writer. Write engaging, authentic posts that sound human.
|
||||
You write a single short LinkedIn post in Rene Fichtmueller's voice. Rene is a network/optics engineer who blogs at blog.fichtmueller.org. His voice is direct, technical, sometimes contrarian, never marketing.
|
||||
|
||||
Rules:
|
||||
- Maximum 1300 characters (LinkedIn soft limit)
|
||||
- No hashtag spam (max 3 relevant hashtags)
|
||||
- No engagement-bait questions at the end
|
||||
- No "In today's fast-paced world" openings
|
||||
- Write in first person, direct and confident tone
|
||||
- Include a clear value point or insight
|
||||
- Current date: {{current_date}}
|
||||
HARD RULES — do not violate:
|
||||
- 2 to 3 short sentences. Maximum 4. Period.
|
||||
- No hashtags. None. Not at the end, not anywhere.
|
||||
- No emojis. Not even one.
|
||||
- No engagement-bait. Do not end with "What do you think?", "Thoughts?", "Have you seen this?".
|
||||
- No call-to-action language ("Check it out", "Read more", "Don't miss").
|
||||
- No meta-references to the blog post itself: do not write "I wrote about this", "I published a piece", "I broke this down", "more in the article".
|
||||
- End with the URL on its own line. Nothing after the URL.
|
||||
|
||||
BANNED PHRASES — never use any of these:
|
||||
- delve, leverage, robust, journey, embark, paradigm, unlock, seamlessly, holistic, harness, foster, amplify, underscore, indelible, profound, intricate, meticulous, testament, vibrant, bespoke, encompass, hitherto, realm, utilize, synergy
|
||||
- "leaving money on the table"
|
||||
- "until it's too late"
|
||||
- "the line item most X skip"
|
||||
- "turns out"
|
||||
- "the unexpected part is"
|
||||
- "the gap between X and Y is wider than"
|
||||
- "in today's fast-paced", "in the world of", "in the realm of"
|
||||
- "it's important to note", "it's worth noting"
|
||||
- "let's dive into", "let's explore"
|
||||
- "the future of X", "the next generation of X" (unless quoting someone)
|
||||
- "game-changer", "cutting-edge", "groundbreaking", "comprehensive"
|
||||
|
||||
TONE — match these traits:
|
||||
- Specific numbers over generalities. 20W is better than "high power". 14 weeks is better than "long lead time".
|
||||
- Named products, standards, RFCs when relevant. 400ZR+, RPKI, IEEE 802.3.
|
||||
- First person ("I", "my", "we") where genuine.
|
||||
- Short sentences. Period. Short sentences. Period.
|
||||
- Concession sometimes: admit what you don't know or what surprised you.
|
||||
- Closing line stands on its own. No qualifier, no hedge.
|
||||
|
||||
Current date: {{current_date}}
|
||||
|
||||
{{few_shot_examples}}
|
||||
|
||||
system_prompt_de: |
|
||||
Du bist ein professioneller LinkedIn-Content-Writer. Schreibe authentische, menschlich klingende Beiträge.
|
||||
Du schreibst einen kurzen LinkedIn-Post in der Stimme von Rene Fichtmueller. Direkt, technisch, manchmal contrarian, nie Marketing.
|
||||
|
||||
Regeln:
|
||||
- Maximal 1300 Zeichen (LinkedIn Soft-Limit)
|
||||
- Keine Hashtag-Spam (max. 3 relevante Hashtags)
|
||||
- Keine Engagement-Bait-Fragen am Ende
|
||||
- Keine Einstiege mit "In der heutigen schnelllebigen Welt"
|
||||
- Schreibe in der Ich-Perspektive, direkt und selbstsicher
|
||||
- Enthalte einen klaren Mehrwert oder Einblick
|
||||
- Aktuelles Datum: {{current_date}}
|
||||
HARTE REGELN — nie verletzen:
|
||||
- 2 bis 3 kurze Sätze. Maximal 4. Punkt.
|
||||
- Keine Hashtags. Keine. Nirgendwo.
|
||||
- Keine Emojis. Auch nicht einer.
|
||||
- Kein Engagement-Bait. Niemals enden mit "Was meint ihr?", "Eure Erfahrung?".
|
||||
- Keine Call-to-Action-Sprache ("Schaut mal rein", "Hier mehr lesen").
|
||||
- Keine Meta-Referenzen auf den Blog-Post: kein "Ich habe dazu geschrieben", "Mehr im Artikel".
|
||||
- URL alleine in der letzten Zeile. Nichts danach.
|
||||
|
||||
VERBOTENE WORTE/PHRASEN:
|
||||
- "leverage", "delve", "robust", "harness", "navigieren", "Reise", "Paradigma", "freischalten", "ganzheitlich", "Synergie", "umfassend"
|
||||
- "in der heutigen schnelllebigen Welt"
|
||||
- "es lohnt sich zu erwähnen"
|
||||
|
||||
TON:
|
||||
- Konkrete Zahlen statt Verallgemeinerungen.
|
||||
- Erste Person wo authentisch.
|
||||
- Kurze Sätze. Punkt.
|
||||
- Konzession wo möglich: zugeben was überrascht hat.
|
||||
|
||||
Aktuelles Datum: {{current_date}}
|
||||
|
||||
user_template: |
|
||||
Topic/Input:
|
||||
Article info:
|
||||
{{input}}
|
||||
|
||||
Context:
|
||||
{{user_context}}
|
||||
|
||||
Write a LinkedIn post:
|
||||
Write the LinkedIn post now. 2-3 sentences. No hashtags. URL on last line. Stop after the URL.
|
||||
|
||||
user_template_de: |
|
||||
Thema/Eingabe:
|
||||
Artikel-Infos:
|
||||
{{input}}
|
||||
|
||||
Kontext:
|
||||
{{user_context}}
|
||||
|
||||
Schreibe einen LinkedIn-Beitrag:
|
||||
Schreibe den LinkedIn-Post jetzt. 2-3 Sätze. Keine Hashtags. URL in der letzten Zeile. Stopp nach der URL.
|
||||
|
||||
few_shot_examples:
|
||||
- user: "We deployed a new BGP monitoring system last week"
|
||||
- user: "Article info:\nTitle: BGP Route Leak Hit My Network Last Tuesday\nExcerpt: A transit provider re-advertised our customer cone to the wrong peers. Detection took 4 hours. The mitigation took 14 minutes once we saw it.\nURL: https://blog.fichtmueller.org/example1/"
|
||||
assistant: |
|
||||
We spent 18 months dealing with BGP route leaks that cost us 4 hours of network visibility per incident.
|
||||
A transit provider re-advertised our customer cone to the wrong peers last Tuesday. We caught it after 4 hours. The fix took 14 minutes once we saw it — the cost was the 4 hours we didn't.
|
||||
|
||||
Last week we deployed our own monitoring system. First result: caught a route leak from a transit provider within 3 minutes, before any customer noticed.
|
||||
https://blog.fichtmueller.org/example1/
|
||||
|
||||
The fix wasn't sophisticated. It was systematic alerting on RPKI invalid routes combined with PeeringDB cross-referencing.
|
||||
- user: "Article info:\nTitle: 400ZR Reach Math Nobody Shows You\nExcerpt: Datasheets quote a maximum reach number. That number is for a clean link with margin you don't have.\nURL: https://blog.fichtmueller.org/example2/"
|
||||
assistant: |
|
||||
The 400ZR datasheet number is a ceiling, not a target. Real-world links have eight connectors, two amplifier hops, and aged fiber — the spec assumes none of that.
|
||||
|
||||
If you're running a mid-size ISP and still relying on customer tickets to discover routing issues, that's the gap worth closing.
|
||||
Build to the budget you actually have, not the one the marketing slide showed you.
|
||||
|
||||
#BGP #NetworkEngineering #ISP
|
||||
https://blog.fichtmueller.org/example2/
|
||||
|
||||
- user: "Article info:\nTitle: ADHD Diary #008: The 47-Tabs Problem\nExcerpt: I closed 47 browser tabs Tuesday. By Wednesday I had 51 again. This is not a tool problem.\nURL: https://blog.fichtmueller.org/example3/"
|
||||
assistant: |
|
||||
I closed 47 browser tabs on Tuesday. By Wednesday morning I had 51 again.
|
||||
|
||||
This isn't a tool problem. Every tab is a "I might do this later" promise I made to myself. The fix isn't a better tab manager — it's saying no.
|
||||
|
||||
https://blog.fichtmueller.org/example3/
|
||||
|
||||
variables:
|
||||
- input
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<script>
|
||||
/* Force timestamps to Europe/Berlin (CEST/CET, auto-DST) */
|
||||
(function(){var TZ="Europe/Berlin";["toLocaleString","toLocaleTimeString","toLocaleDateString"].forEach(function(fn){var o=Date.prototype[fn];Date.prototype[fn]=function(l,op){op=Object.assign({},op||{},{timeZone:TZ});return o.call(this,l||"de-DE",op);};});})();
|
||||
</script>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>llm.gateway / workbench</title>
|
||||
@ -127,13 +131,11 @@
|
||||
/* ─── Tab navigation ──────────────────────────────────────────────────── */
|
||||
.tabs {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0;
|
||||
border-bottom: 1px solid var(--line);
|
||||
margin: 0 0 28px;
|
||||
overflow-x: auto;
|
||||
scrollbar-width: none;
|
||||
}
|
||||
.tabs::-webkit-scrollbar { display: none; }
|
||||
.tab-trigger {
|
||||
background: none;
|
||||
border: none;
|
||||
@ -498,7 +500,7 @@
|
||||
body.hide-empty-providers .wallet-card[data-status="unknown"] { display: none; }
|
||||
|
||||
/* In Simple Mode, hide the noisy "5-axis" header explainer */
|
||||
body.simple-mode .h-section .h-meta:contains('Lean-CTX') { display: none; }
|
||||
body.simple-mode .h-section .h-meta:contains('LLM Gateway') { display: none; }
|
||||
|
||||
/* ─── Hero (Buddy + Savings + Cost-VS) ───────────────────────────────── */
|
||||
.hero-grid {
|
||||
@ -1066,6 +1068,99 @@
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
/* ─── Discover Panel ──────────────────────────────────────────────── */
|
||||
.discover-grid {
|
||||
display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||||
gap: 12px; margin-bottom: 16px;
|
||||
}
|
||||
.discover-card {
|
||||
border: 1px solid var(--line-2);
|
||||
border-radius: 10px;
|
||||
padding: 12px 14px;
|
||||
background: var(--surface-1, rgba(255,255,255,0.02));
|
||||
}
|
||||
.discover-card-title {
|
||||
font-size: 0.72rem; color: var(--text-muted, #888);
|
||||
text-transform: uppercase; letter-spacing: 0.08em;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.discover-card-stat {
|
||||
font-family: var(--mono); font-size: 1.4rem;
|
||||
color: var(--accent); margin-bottom: 8px;
|
||||
}
|
||||
.discover-card-list {
|
||||
list-style: none; padding: 0; margin: 0;
|
||||
font-size: 0.78rem; font-family: var(--mono);
|
||||
}
|
||||
.discover-card-list li {
|
||||
padding: 4px 0;
|
||||
border-top: 1px solid var(--line-1, rgba(255,255,255,0.05));
|
||||
display: flex; justify-content: space-between; align-items: center;
|
||||
}
|
||||
.discover-card-list li:first-child { border-top: none; }
|
||||
.discover-card-list .disc-ok { color: var(--accent); }
|
||||
.discover-card-list .disc-no { color: var(--text-muted, #888); opacity: 0.6; }
|
||||
|
||||
/* ─── API Tab ──────────────────────────────────────────────────────── */
|
||||
.api-card {
|
||||
border: 1px solid var(--line-2);
|
||||
border-radius: 10px;
|
||||
padding: 14px 16px;
|
||||
margin-bottom: 14px;
|
||||
background: var(--surface-1, rgba(255,255,255,0.02));
|
||||
}
|
||||
.api-card-head {
|
||||
display: flex; align-items: center; gap: 10px; flex-wrap: wrap;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.api-method {
|
||||
font-family: var(--mono); font-size: 0.7rem; font-weight: 700;
|
||||
padding: 3px 8px; border-radius: 4px;
|
||||
background: var(--accent); color: #fff; letter-spacing: 0.05em;
|
||||
}
|
||||
.api-path {
|
||||
font-family: var(--mono); font-size: 0.92rem;
|
||||
color: var(--text);
|
||||
}
|
||||
.api-tag {
|
||||
font-size: 0.72rem; color: var(--text-muted, #888);
|
||||
font-style: italic; flex: 1;
|
||||
}
|
||||
.api-snippet {
|
||||
font-family: var(--mono); font-size: 0.8rem;
|
||||
background: var(--surface-2, rgba(0,0,0,0.25));
|
||||
border: 1px solid var(--line-1, rgba(255,255,255,0.05));
|
||||
padding: 12px 14px; border-radius: 6px;
|
||||
overflow-x: auto; white-space: pre;
|
||||
color: var(--text); margin: 0;
|
||||
}
|
||||
.api-snippet code { background: transparent; padding: 0; }
|
||||
.api-copy { padding: 4px 12px; font-size: 0.7rem; }
|
||||
|
||||
.api-tryout {
|
||||
border: 1px solid var(--line-2);
|
||||
border-radius: 10px;
|
||||
padding: 14px 16px;
|
||||
background: var(--surface-1, rgba(255,255,255,0.02));
|
||||
}
|
||||
.api-tryout-row { display: flex; flex-wrap: wrap; align-items: center; }
|
||||
|
||||
.api-bridge-table-wrap { overflow-x: auto; border: 1px solid var(--line-2); border-radius: 10px; }
|
||||
.api-bridge-table {
|
||||
width: 100%; border-collapse: collapse; font-size: 0.85rem;
|
||||
}
|
||||
.api-bridge-table th, .api-bridge-table td {
|
||||
padding: 10px 12px; text-align: left; border-bottom: 1px solid var(--line-1, rgba(255,255,255,0.05));
|
||||
}
|
||||
.api-bridge-table th {
|
||||
font-weight: 600; color: var(--text-muted, #888);
|
||||
text-transform: uppercase; letter-spacing: 0.05em; font-size: 0.72rem;
|
||||
}
|
||||
.api-bridge-table tr:last-child td { border-bottom: none; }
|
||||
.api-bridge-status { font-family: var(--mono); font-size: 0.78rem; }
|
||||
.api-bridge-status.ok { color: var(--accent); }
|
||||
.api-bridge-status.err { color: #e34; }
|
||||
|
||||
/* ─── Buttons ────────────────────────────────────────────────────────── */
|
||||
.btn {
|
||||
font-family: var(--mono);
|
||||
@ -1338,6 +1433,7 @@
|
||||
<button class="tab-trigger" data-tab="leaderboard" role="tab" title="Race-mode results — fastest model leaderboard if you ran multi-model races"><span class="tab-num">08</span>races <span class="tab-badge" id="leaderboardTabBadge">·</span></button>
|
||||
<button class="tab-trigger" data-tab="share" role="tab" title="Generate an embeddable SVG card showing your savings (for blog/Twitter/README)"><span class="tab-num">09</span>share</button>
|
||||
<button class="tab-trigger" data-tab="report" role="tab" title="Generate a printable monthly PDF report"><span class="tab-num">10</span>report</button>
|
||||
<button class="tab-trigger" data-tab="api" role="tab" title="API reference — copy-paste curl/SDK examples for OpenAI-compat, Anthropic-compat, native"><span class="tab-num">11</span>api</button>
|
||||
</nav>
|
||||
|
||||
<!-- ─── Tab: Overview ────────────────────────────────────────────────── -->
|
||||
@ -1356,7 +1452,7 @@
|
||||
<div class="hero-counter"><span id="heroTokensSavedCombined">0</span><span style="font-size:1.1rem;color:var(--dim);font-weight:400;margin-left:8px;">tokens</span></div>
|
||||
<div class="hero-layer-breakdown" id="heroLayerBreakdown">
|
||||
<div class="layer-row"><span class="layer-name">⚡ Gateway (LLM calls)</span><span class="layer-val" id="heroTokensSaved">0</span></div>
|
||||
<div class="layer-row" id="heroLeanCtxRow" style="display:none;"><span class="layer-name">🗜 Lean-CTX (tool calls)</span><span class="layer-val" id="heroLeanCtxTokens">—</span></div>
|
||||
<div class="layer-row" id="heroExternalToolRow" style="display:none;"><span class="layer-name">🗜 External tool compression (legacy)</span><span class="layer-val" id="heroExternalToolTokens">—</span></div>
|
||||
</div>
|
||||
<div class="hero-row">
|
||||
<div class="hero-pill">
|
||||
@ -1392,8 +1488,8 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ─── Five-Axis Savings Breakdown — what makes us better than Lean-CTX ── -->
|
||||
<h2 class="h-section">Savings Sources <span class="h-meta">we measure 5 axes — Lean-CTX measures 1</span></h2>
|
||||
<!-- ─── Five-Axis Savings Breakdown — full savings breakdown ── -->
|
||||
<h2 class="h-section">Savings Sources <span class="h-meta">5 measurement axes across all calls</span></h2>
|
||||
<div class="savings-axes" id="savingsAxes">
|
||||
<div class="loading">loading</div>
|
||||
</div>
|
||||
@ -1472,8 +1568,34 @@
|
||||
<strong>auto-gateway</strong> <span id="subsAutoState">detection only</span>
|
||||
— installed CLI subscriptions are wrapped into HTTP bridges and exposed via <code>/v1/chat/completions</code>
|
||||
</div>
|
||||
<button class="btn btn-sm primary" id="subsSpawnBtn" type="button">⟳ spawn missing bridges</button>
|
||||
<div style="display: flex; gap: 8px;">
|
||||
<button class="btn btn-sm" id="discoverFullBtn" type="button" title="Full-system scan: CLIs + local LLMs + API keys, then auto-spawn any detected bridges">⚡ discover & connect all</button>
|
||||
<button class="btn btn-sm primary" id="subsSpawnBtn" type="button">⟳ spawn missing bridges</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ── Full discovery report (populated by discover button) ────────── -->
|
||||
<div id="discoverReportWrap" style="display: none; margin-bottom: 14px;">
|
||||
<h2 class="h-section">Discovery Report <span class="h-meta" id="discoverReportMeta">—</span></h2>
|
||||
<div class="discover-grid">
|
||||
<div class="discover-card">
|
||||
<div class="discover-card-title">CLI Subscriptions</div>
|
||||
<div class="discover-card-stat"><span id="discCntSubs">0</span> detected</div>
|
||||
<ul class="discover-card-list" id="discListSubs"></ul>
|
||||
</div>
|
||||
<div class="discover-card">
|
||||
<div class="discover-card-title">Local LLM Servers</div>
|
||||
<div class="discover-card-stat"><span id="discCntLocal">0</span> running</div>
|
||||
<ul class="discover-card-list" id="discListLocal"></ul>
|
||||
</div>
|
||||
<div class="discover-card">
|
||||
<div class="discover-card-title">API-Key Providers</div>
|
||||
<div class="discover-card-stat"><span id="discCntKeys">0</span> configured</div>
|
||||
<ul class="discover-card-list" id="discListKeys"></ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="subs-grid" id="subscriptionsList">
|
||||
<div class="loading">discovering installed subscriptions</div>
|
||||
</div>
|
||||
@ -1567,6 +1689,11 @@
|
||||
<div class="metric-value" id="cacheHitRate">0<span class="metric-unit">%</span></div>
|
||||
<div class="metric-change">hits ÷ total req</div>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<div class="metric-label">compressed since last restart</div>
|
||||
<div class="metric-value" id="compressedSinceRestart">0</div>
|
||||
<div class="metric-change" id="compressedSinceRestartMeta">— · — ops · since —</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2 class="h-section">Top Caching Callers <span class="h-meta">most savings</span></h2>
|
||||
@ -1675,6 +1802,136 @@
|
||||
<div class="share-hint">Tip: in the report window, press <code>Cmd/Ctrl+P</code> → "Save as PDF". The report is fully styled for A4 print.</div>
|
||||
</section>
|
||||
|
||||
<!-- ─── Tab: API Reference ─────────────────────────────────────────── -->
|
||||
<section class="tab-panel" data-tab="api">
|
||||
<h2 class="h-section">API Reference <span class="h-meta">all endpoints route through compression + caller tracking</span></h2>
|
||||
|
||||
<div class="api-intro" style="margin: 8px 0 16px; color: var(--text-muted, #888); font-size: 13px; line-height: 1.5;">
|
||||
The LLM Gateway exposes three POST endpoints and one GET. Every call is logged in
|
||||
<em>activity</em>, compressed when input ≥ 700 tokens, and routed via <code>routing-rules.yaml</code>
|
||||
to the right subscription bridge (Claude Code, ChatGPT, Copilot, M365 Copilot, Codex) or local Ollama.
|
||||
</div>
|
||||
|
||||
<!-- ── Endpoint card: OpenAI-compatible ─────────────────────────── -->
|
||||
<div class="api-card" data-endpoint="chat">
|
||||
<div class="api-card-head">
|
||||
<span class="api-method">POST</span>
|
||||
<code class="api-path">/v1/chat/completions</code>
|
||||
<span class="api-tag">OpenAI-compatible · works with `openai` SDK</span>
|
||||
<button class="btn ghost api-copy" data-target="api-snippet-chat" type="button">copy</button>
|
||||
</div>
|
||||
<pre id="api-snippet-chat" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "claude-sonnet-4.6",
|
||||
"messages": [{"role": "user", "content": "hi"}]
|
||||
}'</code></pre>
|
||||
</div>
|
||||
|
||||
<!-- ── Endpoint card: Anthropic-compatible ──────────────────────── -->
|
||||
<div class="api-card" data-endpoint="messages">
|
||||
<div class="api-card-head">
|
||||
<span class="api-method">POST</span>
|
||||
<code class="api-path">/v1/messages</code>
|
||||
<span class="api-tag">Anthropic-compatible · works with `@anthropic-ai/sdk`</span>
|
||||
<button class="btn ghost api-copy" data-target="api-snippet-messages" type="button">copy</button>
|
||||
</div>
|
||||
<pre id="api-snippet-messages" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "claude-sonnet-4.6",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"max_tokens": 1024
|
||||
}'</code></pre>
|
||||
</div>
|
||||
|
||||
<!-- ── Endpoint card: Native ────────────────────────────────────── -->
|
||||
<div class="api-card" data-endpoint="completion">
|
||||
<div class="api-card-head">
|
||||
<span class="api-method">POST</span>
|
||||
<code class="api-path">/v1/completion</code>
|
||||
<span class="api-tag">native — full caller-tracking + compression options</span>
|
||||
<button class="btn ghost api-copy" data-target="api-snippet-completion" type="button">copy</button>
|
||||
</div>
|
||||
<pre id="api-snippet-completion" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/completion \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"caller": "my-app",
|
||||
"task_type": "generic_qa",
|
||||
"input": "your prompt here",
|
||||
"options": { "compression": { "enabled": true, "mode": "auto" } }
|
||||
}'</code></pre>
|
||||
</div>
|
||||
|
||||
<!-- ── Endpoint card: Models list ───────────────────────────────── -->
|
||||
<div class="api-card" data-endpoint="models">
|
||||
<div class="api-card-head">
|
||||
<span class="api-method">GET</span>
|
||||
<code class="api-path">/v1/models</code>
|
||||
<span class="api-tag">list every model the gateway can route to</span>
|
||||
<button class="btn ghost api-copy" data-target="api-snippet-models" type="button">copy</button>
|
||||
</div>
|
||||
<pre id="api-snippet-models" class="api-snippet"><code>curl https://llm-gateway.context-x.org/v1/models</code></pre>
|
||||
</div>
|
||||
|
||||
<!-- ── Try-It-Out playground ────────────────────────────────────── -->
|
||||
<h2 class="h-section" style="margin-top: 28px;">Try it out <span class="h-meta">live POST against the gateway</span></h2>
|
||||
<div class="api-tryout">
|
||||
<div class="api-tryout-row">
|
||||
<label class="settings-row-label">Endpoint:
|
||||
<select id="apiTryEndpoint" class="settings-input" style="width: 220px; margin-left: 8px;">
|
||||
<option value="/v1/completion">/v1/completion (native)</option>
|
||||
<option value="/v1/chat/completions">/v1/chat/completions (OpenAI)</option>
|
||||
<option value="/v1/messages">/v1/messages (Anthropic)</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="settings-row-label" style="margin-left: 18px;">Model:
|
||||
<input id="apiTryModel" class="settings-input" type="text" value="claude-sonnet-4.6" style="width: 200px; margin-left: 8px;">
|
||||
</label>
|
||||
</div>
|
||||
<label class="settings-row-label" style="display: block; margin-top: 10px;">Prompt:
|
||||
<textarea id="apiTryPrompt" class="settings-input" rows="4" style="width: 100%; margin-top: 6px;" placeholder="Type your prompt — long inputs (>700 tokens) will be compressed automatically.">Say hello in three different languages.</textarea>
|
||||
</label>
|
||||
<div style="margin-top: 10px;">
|
||||
<button class="btn primary" id="apiTryRun" type="button">send request</button>
|
||||
<span id="apiTryStatus" style="margin-left: 12px; font-size: 12px; color: var(--text-muted, #888);"></span>
|
||||
</div>
|
||||
<div id="apiTryResultWrap" style="margin-top: 14px; display: none;">
|
||||
<div class="api-tryout-meta" id="apiTryMeta" style="font-size: 12px; color: var(--text-muted, #888); margin-bottom: 6px;"></div>
|
||||
<pre class="api-snippet"><code id="apiTryResult"></code></pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ── Bridge mapping (model → subscription) ────────────────────── -->
|
||||
<h2 class="h-section" style="margin-top: 28px;">Model → Bridge Mapping <span class="h-meta">which subscription each model alias routes to</span></h2>
|
||||
<div class="api-bridge-table-wrap">
|
||||
<table class="api-bridge-table" id="apiBridgeTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Model alias</th>
|
||||
<th>Bridge</th>
|
||||
<th>Subscription used</th>
|
||||
<th>Port</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td><code>claude-sonnet-4.6</code>, <code>claude-haiku</code>, <code>claude-opus</code></td><td>claude-bridge</td><td>Claude Code Max (OAuth)</td><td>3250</td><td class="api-bridge-status" data-bridge="claude-bridge">—</td></tr>
|
||||
<tr><td><code>gpt-4o</code>, <code>gpt-4.1</code>, <code>gpt-5.x</code></td><td>openai-bridge</td><td>ChatGPT Plus / Pro</td><td>3251</td><td class="api-bridge-status" data-bridge="openai-bridge">—</td></tr>
|
||||
<tr><td><code>copilot-gpt-4o</code>, <code>copilot-claude-3.7</code></td><td>copilot-bridge</td><td>GitHub Copilot</td><td>3252</td><td class="api-bridge-status" data-bridge="copilot-bridge">—</td></tr>
|
||||
<tr><td><code>codex-mini</code>, <code>gpt-5.1-codex-mini</code></td><td>codex-bridge</td><td>OpenAI Codex CLI</td><td>3253</td><td class="api-bridge-status" data-bridge="codex-bridge">—</td></tr>
|
||||
<tr><td><code>m365-copilot</code></td><td>m365-copilot-bridge</td><td>Microsoft 365 Copilot</td><td>3257</td><td class="api-bridge-status" data-bridge="m365-copilot-bridge">—</td></tr>
|
||||
<tr><td><code>qwen2.5:3b / 7b / 14b / 32b</code>, <code>magatama:32b</code>, <code>magatama-coder</code></td><td>ollama (Mac Studio)</td><td>local — no cost</td><td>11434</td><td class="api-bridge-status" data-bridge="ollama">—</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div class="share-hint" style="margin-top: 12px;">
|
||||
The gateway picks the bridge from <code>routing-rules.yaml</code> based on <code>task_type</code> and the
|
||||
requested <code>model</code>. You can also hit a bridge directly (e.g. <code>http://82.165.222.127:3250/v1/messages</code>)
|
||||
— but then you bypass compression, savings tracking, and the routing rules.
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ─── Caller Deep-Dive Modal ───────────────────────────────────── -->
|
||||
<div class="modal-overlay" id="callerModal" role="dialog" aria-modal="true">
|
||||
<div class="modal" style="max-width: 900px;">
|
||||
@ -1883,6 +2140,9 @@
|
||||
el.innerHTML = clients.map(client => {
|
||||
const lastSeen = client.lastSeen ? new Date(client.lastSeen).toLocaleString() : 'never';
|
||||
const callerList = client.callers?.length ? client.callers.join(', ') : 'no caller id seen';
|
||||
const bridgeState = client.bridgeProvider
|
||||
? `${client.bridgeProvider}: ${client.bridgeStatus || 'not configured'}${client.bridgeDetail ? ` (${client.bridgeDetail})` : ''}`
|
||||
: 'bridge: OpenAI-compatible / manual client config';
|
||||
return `
|
||||
<div class="client-item">
|
||||
<div class="client-top">
|
||||
@ -1892,6 +2152,7 @@
|
||||
<div class="client-meta">
|
||||
<div><strong>${formatNumber(client.requestCount)}</strong> requests · <strong>${formatNumber(client.tokensSaved)}</strong> saved</div>
|
||||
<div title="${escapeHtml(callerList)}">caller: ${escapeHtml(callerList)}</div>
|
||||
<div title="${escapeHtml(bridgeState)}">gateway: ${escapeHtml(bridgeState)}</div>
|
||||
<div>last: ${escapeHtml(lastSeen)}</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -2137,6 +2398,60 @@
|
||||
`;
|
||||
}
|
||||
|
||||
// ─── Full Discovery: CLIs + Local LLMs + API Keys ────────────────────
|
||||
document.getElementById('discoverFullBtn')?.addEventListener('click', async () => {
|
||||
const btn = document.getElementById('discoverFullBtn');
|
||||
const wrap = document.getElementById('discoverReportWrap');
|
||||
const meta = document.getElementById('discoverReportMeta');
|
||||
btn.disabled = true;
|
||||
const orig = btn.textContent;
|
||||
btn.textContent = '⏳ scanning…';
|
||||
try {
|
||||
const res = await apiFetch(`${API_BASE}/api/dashboard/discover`, { method: 'POST' });
|
||||
const payload = await res.json();
|
||||
if (!payload.success) throw new Error(payload.error || 'discovery failed');
|
||||
const r = payload.data.report;
|
||||
const spawnedCount = payload.data.spawnedCount;
|
||||
|
||||
wrap.style.display = 'block';
|
||||
meta.textContent = `host: ${r.host} · scanned: ${new Date(r.generatedAt).toLocaleTimeString()} · ${spawnedCount} bridges spawned · ${r.summary.totalProviders} total providers, ${r.summary.totalRoutableModels} models`;
|
||||
|
||||
// CLI subscriptions
|
||||
document.getElementById('discCntSubs').textContent = r.subscriptions.detected;
|
||||
document.getElementById('discListSubs').innerHTML = r.subscriptions.items.map(s => `
|
||||
<li>
|
||||
<span>${s.descriptor.label}</span>
|
||||
<span class="${s.installed ? 'disc-ok' : 'disc-no'}">${s.installed ? (s.authenticated === true ? '✓ auth' : (s.authenticated === false ? '⚠ unauth' : '?')) : '—'}</span>
|
||||
</li>
|
||||
`).join('');
|
||||
|
||||
// Local LLM servers
|
||||
document.getElementById('discCntLocal').textContent = r.localLLMs.detected;
|
||||
document.getElementById('discListLocal').innerHTML = r.localLLMs.items.map(l => `
|
||||
<li>
|
||||
<span>${l.label}<br><span style="font-size:0.66rem;opacity:0.6;">${l.url}</span></span>
|
||||
<span class="${l.detected ? 'disc-ok' : 'disc-no'}">${l.detected ? `✓ ${l.models.length} models · ${l.latencyMs}ms` : '— offline'}</span>
|
||||
</li>
|
||||
`).join('');
|
||||
|
||||
// API-key providers
|
||||
document.getElementById('discCntKeys').textContent = r.apiKeys.configured;
|
||||
document.getElementById('discListKeys').innerHTML = r.apiKeys.items.map(k => `
|
||||
<li>
|
||||
<span>${k.label}<br><span style="font-size:0.66rem;opacity:0.6;">${k.envKey}</span></span>
|
||||
<span class="${k.configured ? 'disc-ok' : 'disc-no'}">${k.configured ? '✓ set' : '— missing'}</span>
|
||||
</li>
|
||||
`).join('');
|
||||
|
||||
btn.textContent = `✓ found ${r.summary.totalProviders}`;
|
||||
await loadSubscriptions();
|
||||
} catch (e) {
|
||||
btn.textContent = `✗ ${e.message}`;
|
||||
} finally {
|
||||
setTimeout(() => { btn.disabled = false; btn.textContent = orig; }, 3000);
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById('subsSpawnBtn').addEventListener('click', async () => {
|
||||
const btn = document.getElementById('subsSpawnBtn');
|
||||
btn.disabled = true;
|
||||
@ -2235,7 +2550,7 @@
|
||||
document.getElementById('routingModeBadge').textContent = s.routingMode;
|
||||
|
||||
// UI mode toggles
|
||||
const ui = s.ui ?? { simpleMode: true, hideEmptyProviders: true, showTooltips: true };
|
||||
const ui = s.ui ?? { simpleMode: false, hideEmptyProviders: true, showTooltips: true };
|
||||
document.getElementById('uiSimpleMode').checked = !!ui.simpleMode;
|
||||
document.getElementById('uiHideEmpty').checked = !!ui.hideEmptyProviders;
|
||||
document.getElementById('uiTooltips').checked = !!ui.showTooltips;
|
||||
@ -2394,6 +2709,11 @@
|
||||
document.getElementById('cacheEntries').textContent = formatNumber(s.uniqueEntries);
|
||||
document.getElementById('tokensPrevented').textContent = formatNumber(s.totalTokensSaved);
|
||||
document.getElementById('cacheHitRate').innerHTML = s.hitRatePercent.toFixed(1) + '<span class="metric-unit">%</span>';
|
||||
const sr = s.sinceRestart || {};
|
||||
document.getElementById('compressedSinceRestart').textContent = formatNumber(sr.tokensSaved || 0);
|
||||
const sinceLabel = sr.sinceISO ? new Date(sr.sinceISO).toLocaleString() : '—';
|
||||
const pctTxt = (sr.savingsPct || 0).toFixed(1) + '%';
|
||||
document.getElementById('compressedSinceRestartMeta').textContent = pctTxt + ' · ' + (sr.operations || 0) + ' ops · since ' + sinceLabel;
|
||||
|
||||
// Tab badge
|
||||
document.getElementById('savingsTabBadge').textContent = s.totalHits > 0 ? formatCost(s.totalCostSaved) : '·';
|
||||
@ -2618,9 +2938,9 @@
|
||||
`;
|
||||
}
|
||||
|
||||
// Try to fetch Lean-CTX stats from localhost:3333 (browser-side, not server-side)
|
||||
// Returns null if Lean-CTX not running OR dashboard browsed from different machine.
|
||||
async function fetchLeanCtxStats() {
|
||||
// Try to fetch external tool stats from localhost:3333 (legacy compat) (browser-side, not server-side)
|
||||
// Returns null if no external tool runs there.
|
||||
async function fetchExternalToolStats() {
|
||||
try {
|
||||
const ctrl = new AbortController();
|
||||
setTimeout(() => ctrl.abort(), 1500);
|
||||
@ -2645,20 +2965,20 @@
|
||||
document.getElementById('heroCacheHits').textContent = s.totalHits;
|
||||
document.getElementById('heroSavingsRate').textContent = `${s.hitRatePercent || 0}%`;
|
||||
|
||||
// Lean-CTX integration: pull from localhost:3333 if available
|
||||
const leanCtx = await fetchLeanCtxStats();
|
||||
const combined = gatewayTokens + (leanCtx?.saved || 0);
|
||||
// Optional external-tool integration: pull from localhost:3333 if running
|
||||
const externalTool = await fetchExternalToolStats();
|
||||
const combined = gatewayTokens + (externalTool?.saved || 0);
|
||||
document.getElementById('heroTokensSavedCombined').textContent = formatNumber(combined);
|
||||
if (leanCtx) {
|
||||
document.getElementById('heroLeanCtxRow').style.display = 'flex';
|
||||
document.getElementById('heroLeanCtxTokens').textContent = formatNumber(leanCtx.saved);
|
||||
if (externalTool) {
|
||||
document.getElementById('heroExternalToolRow').style.display = 'flex';
|
||||
document.getElementById('heroExternalToolTokens').textContent = formatNumber(externalTool.saved);
|
||||
} else {
|
||||
document.getElementById('heroLeanCtxRow').style.display = 'none';
|
||||
document.getElementById('heroExternalToolRow').style.display = 'none';
|
||||
}
|
||||
document.getElementById('costWithout').textContent = formatCost(c.costWithoutGateway || 0);
|
||||
document.getElementById('costWith').textContent = formatCost(c.costWithGateway || 0);
|
||||
const saved = (c.costWithoutGateway || 0) - (c.costWithGateway || 0);
|
||||
document.getElementById('costSavedLine').textContent = formatCost(saved);
|
||||
document.getElementById('costSavedLine').textContent = (saved < 0 ? '-$' : '$') + Math.abs(saved).toFixed(2);
|
||||
document.getElementById('costSavedPercent').textContent = `${(c.effectiveSavingsPercent || 0).toFixed(1)}%`;
|
||||
|
||||
// 5-axis savings
|
||||
@ -3070,9 +3390,98 @@
|
||||
if (target === 'memory') loadMemoryGraph();
|
||||
if (target === 'leaderboard') loadLeaderboard();
|
||||
if (target === 'share') refreshShareCard();
|
||||
if (target === 'api') refreshApiBridgeStatus();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── API Tab — copy buttons, try-it-out, bridge status ────────────────
|
||||
function copyToClipboard(text) {
|
||||
if (navigator.clipboard?.writeText) return navigator.clipboard.writeText(text);
|
||||
const ta = document.createElement('textarea');
|
||||
ta.value = text; document.body.appendChild(ta); ta.select();
|
||||
document.execCommand('copy'); document.body.removeChild(ta);
|
||||
return Promise.resolve();
|
||||
}
|
||||
document.querySelectorAll('.api-copy').forEach(btn => {
|
||||
btn.addEventListener('click', async () => {
|
||||
const targetId = btn.dataset.target;
|
||||
const snippet = document.getElementById(targetId)?.innerText || '';
|
||||
await copyToClipboard(snippet);
|
||||
const orig = btn.textContent;
|
||||
btn.textContent = 'copied ✓';
|
||||
setTimeout(() => { btn.textContent = orig; }, 1400);
|
||||
});
|
||||
});
|
||||
|
||||
document.getElementById('apiTryRun')?.addEventListener('click', async () => {
|
||||
const endpoint = document.getElementById('apiTryEndpoint').value;
|
||||
const model = document.getElementById('apiTryModel').value || 'claude-sonnet-4.6';
|
||||
const prompt = document.getElementById('apiTryPrompt').value || '';
|
||||
const status = document.getElementById('apiTryStatus');
|
||||
const meta = document.getElementById('apiTryMeta');
|
||||
const wrap = document.getElementById('apiTryResultWrap');
|
||||
const out = document.getElementById('apiTryResult');
|
||||
if (!prompt.trim()) { status.textContent = 'add a prompt first'; return; }
|
||||
|
||||
let body;
|
||||
if (endpoint === '/v1/completion') {
|
||||
body = { caller: 'dashboard-tryout', task_type: 'generic_qa', input: prompt, options: { compression: { enabled: true, mode: 'auto' } } };
|
||||
} else if (endpoint === '/v1/chat/completions') {
|
||||
body = { model, messages: [{ role: 'user', content: prompt }] };
|
||||
} else {
|
||||
body = { model, messages: [{ role: 'user', content: prompt }], max_tokens: 1024 };
|
||||
}
|
||||
|
||||
status.textContent = 'sending…';
|
||||
const t0 = performance.now();
|
||||
try {
|
||||
const res = await fetch((API_BASE || location.origin) + endpoint, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const dtMs = Math.round(performance.now() - t0);
|
||||
const json = await res.json().catch(() => ({}));
|
||||
status.textContent = `${res.status} ${res.statusText} · ${dtMs} ms`;
|
||||
const c = json?.compression || (json?.metadata?.compression) || null;
|
||||
if (c) {
|
||||
meta.textContent = `compression: applied=${c.applied} · method=${c.method} · before=${c.tokens_before} after=${c.tokens_after} saved=${c.tokens_saved}`;
|
||||
} else {
|
||||
meta.textContent = 'no compression metadata in response';
|
||||
}
|
||||
out.textContent = JSON.stringify(json, null, 2);
|
||||
wrap.style.display = 'block';
|
||||
} catch (err) {
|
||||
status.textContent = 'error: ' + (err.message || err);
|
||||
}
|
||||
});
|
||||
|
||||
async function refreshApiBridgeStatus() {
|
||||
try {
|
||||
const res = await fetch((API_BASE || location.origin) + '/api/dashboard/providers');
|
||||
if (!res.ok) return;
|
||||
const json = await res.json();
|
||||
const allProviders = [
|
||||
...((json?.data?.grouped?.subscription) || []),
|
||||
...((json?.data?.grouped?.local) || []),
|
||||
];
|
||||
document.querySelectorAll('.api-bridge-status').forEach(cell => {
|
||||
const name = cell.dataset.bridge;
|
||||
const p = allProviders.find(x => x.name === name);
|
||||
if (!p) { cell.textContent = 'unknown'; cell.classList.add('err'); return; }
|
||||
if (p.enabled && p.status === 'configured') {
|
||||
cell.textContent = '✓ online';
|
||||
cell.classList.add('ok');
|
||||
} else {
|
||||
cell.textContent = p.status || 'disabled';
|
||||
cell.classList.add('err');
|
||||
}
|
||||
});
|
||||
} catch {
|
||||
/* silent */
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Init ────────────────────────────────────────────────────────────
|
||||
async function init() {
|
||||
await checkHealth();
|
||||
@ -3090,7 +3499,7 @@
|
||||
if (payload.success) {
|
||||
document.getElementById('routingModeBadge').textContent = payload.data.routingMode;
|
||||
// Apply UI mode (Simple Mode etc.) immediately on load
|
||||
applyUiMode(payload.data.ui ?? { simpleMode: true, hideEmptyProviders: true, showTooltips: true });
|
||||
applyUiMode(payload.data.ui ?? { simpleMode: false, hideEmptyProviders: true, showTooltips: true });
|
||||
}
|
||||
} catch (e) { /* non-fatal */ }
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# LLM Gateway Model Configuration
|
||||
# Ollama base URL: http://192.168.178.169:11434
|
||||
|
||||
ollama_base_url: "https://ollama.fichtmueller.org"
|
||||
ollama_base_url: "http://127.0.0.1:11434"
|
||||
|
||||
tiers:
|
||||
fast:
|
||||
@ -26,7 +26,7 @@ models:
|
||||
qwen2.5:3b:
|
||||
tier: fast
|
||||
context_length: 32768
|
||||
strengths: [classification, short_text, routing]
|
||||
strengths: [classification, summarization, routing]
|
||||
max_tokens_default: 512
|
||||
|
||||
qwen2.5:7b:
|
||||
@ -35,83 +35,58 @@ models:
|
||||
strengths: [classification, summarization, short_analysis]
|
||||
max_tokens_default: 1024
|
||||
|
||||
phi3.5:3.8b:
|
||||
qwen2.5:7b-instruct:
|
||||
tier: fast
|
||||
context_length: 128000
|
||||
strengths: [classification, summarization]
|
||||
context_length: 32768
|
||||
strengths: [classification, summarization, short_analysis]
|
||||
max_tokens_default: 1024
|
||||
|
||||
qwen2.5-coder:7b-instruct:
|
||||
tier: fast
|
||||
context_length: 32768
|
||||
strengths: [code_generation, technical_analysis, routing]
|
||||
max_tokens_default: 512
|
||||
|
||||
# ─── MAGATAMA — Fine-tuned Security Intelligence (Context X) ─────────────────
|
||||
magatama:32b:
|
||||
tier: large
|
||||
context_length: 131072
|
||||
strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting]
|
||||
strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting, complex_writing, deep_analysis, technical]
|
||||
max_tokens_default: 4096
|
||||
description: "MAGATAMA まがたま — TEPPEKI 7-pillar security AI, fine-tuned on Qwen2.5-32B"
|
||||
|
||||
# Custom fine-tuned models (Context X)
|
||||
ctxhealer:latest:
|
||||
tier: medium
|
||||
context_length: 32768
|
||||
strengths: [infrastructure_diagnosis, root_cause_analysis, remediation_steps]
|
||||
max_tokens_default: 1024
|
||||
|
||||
llama-guard3:1b:
|
||||
tier: fast
|
||||
context_length: 8192
|
||||
strengths: [safety_classification, threat_detection]
|
||||
max_tokens_default: 256
|
||||
|
||||
# Medium tier
|
||||
qwen2.5:14b:
|
||||
tier: medium
|
||||
context_length: 131072
|
||||
strengths: [general, writing, analysis, coding]
|
||||
strengths: [general, writing, analysis, coding, dialogue]
|
||||
max_tokens_default: 2048
|
||||
|
||||
mistral:7b:
|
||||
magatama-llm-v2-0:latest:
|
||||
tier: medium
|
||||
context_length: 32768
|
||||
strengths: [general, writing]
|
||||
context_length: 131072
|
||||
strengths: [general, writing, analysis, coding, dialogue]
|
||||
max_tokens_default: 2048
|
||||
|
||||
llama3.2:8b:
|
||||
tier: medium
|
||||
context_length: 128000
|
||||
strengths: [general, chat, analysis]
|
||||
max_tokens_default: 2048
|
||||
|
||||
deepseek-r1:8b:
|
||||
magatama-coder:latest:
|
||||
tier: medium
|
||||
context_length: 65536
|
||||
strengths: [reasoning, analysis, coding]
|
||||
strengths: [code_generation, technical_analysis, debugging]
|
||||
max_tokens_default: 2048
|
||||
|
||||
# Large tier
|
||||
qwen2.5:32b:
|
||||
tier: large
|
||||
context_length: 131072
|
||||
strengths: [complex_writing, deep_analysis, technical]
|
||||
max_tokens_default: 4096
|
||||
|
||||
llama3.3:70b:
|
||||
tier: large
|
||||
context_length: 128000
|
||||
strengths: [complex_reasoning, long_form, research]
|
||||
max_tokens_default: 4096
|
||||
|
||||
deepseek-r1:32b:
|
||||
tier: large
|
||||
context_length: 131072
|
||||
strengths: [chain_of_thought, complex_reasoning]
|
||||
strengths: [complex_writing, deep_analysis, technical, security_analysis]
|
||||
max_tokens_default: 4096
|
||||
|
||||
# Fallback chains per tier
|
||||
fallback_chains:
|
||||
fast: [qwen2.5:3b, qwen2.5:7b, phi3.5:3.8b]
|
||||
medium: [qwen2.5:14b, mistral:7b, llama3.2:8b]
|
||||
large: [qwen2.5:32b, llama3.3:70b, deepseek-r1:32b]
|
||||
code_generation: [deepseek-r1:32b, qwen2.5:32b, llama3.3:70b]
|
||||
fast: [qwen2.5:7b-instruct, qwen2.5-coder:7b-instruct]
|
||||
medium: [magatama-llm-v2-0:latest, magatama-coder:latest, qwen2.5:7b-instruct]
|
||||
large: [magatama:32b, magatama-llm-v2-0:latest]
|
||||
code_generation: [magatama-coder:latest, qwen2.5-coder:7b-instruct]
|
||||
|
||||
# Cross-tier fallback when primary tier fails
|
||||
tier_fallback:
|
||||
|
||||
@ -1110,7 +1110,7 @@ routing_rules:
|
||||
|
||||
# ─── CONTENT / LINKEDIN ──────────────────────────────────────────────────────
|
||||
linkedin_post:
|
||||
model: qwen2.5:32b
|
||||
model: fo-blog-v10
|
||||
tier: large
|
||||
prompt_template: linkedin_post
|
||||
temperature: 0.7
|
||||
@ -1118,7 +1118,7 @@ routing_rules:
|
||||
output_format: text
|
||||
requires_fact_check: false
|
||||
validators: [banlist, language, length, question_closer]
|
||||
callers: [n8n, internal]
|
||||
callers: [n8n, internal, linkedin-distributor]
|
||||
|
||||
linkedin_comment:
|
||||
model: qwen2.5:14b
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
-- Purpose: Track token compression and cost analytics
|
||||
-- PostgreSQL compatible version (version 16+)
|
||||
|
||||
-- Table: Token compression metrics (LeanCTX, RTK)
|
||||
-- Table: Token compression metrics (LLM Gateway)
|
||||
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
|
||||
id SERIAL PRIMARY KEY,
|
||||
file_path VARCHAR(255),
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
-- Tokenvault & Cost Tracking Schema Extensions
|
||||
-- Created: 2026-04-19
|
||||
-- Purpose: Track token compression (LeanCTX + RTK) and cost analytics
|
||||
-- Purpose: Track token compression (LLM Gateway) and cost analytics
|
||||
|
||||
-- Table: Token compression metrics (LeanCTX, RTK)
|
||||
-- Table: Token compression metrics (LLM Gateway)
|
||||
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
|
||||
id SERIAL PRIMARY KEY,
|
||||
file_path VARCHAR(255),
|
||||
mode VARCHAR(50), -- 'lean-aggressive', 'lean-map', 'rtk-max', etc.
|
||||
mode VARCHAR(50), -- 'gateway-aggressive', 'gateway-map', 'gateway-trim', etc.
|
||||
tokens_before INT,
|
||||
tokens_after INT,
|
||||
savings_pct DECIMAL(5,2),
|
||||
@ -26,7 +26,7 @@ CREATE TABLE IF NOT EXISTS cost_analytics (
|
||||
agent_id VARCHAR(50), -- 'claude-code', 'qwen-reviewer', etc.
|
||||
tokens_in INT,
|
||||
tokens_out INT,
|
||||
tokens_compressed INT, -- After LeanCTX + RTK
|
||||
tokens_compressed INT, -- After LLM Gateway compression
|
||||
cost_usd DECIMAL(10,6),
|
||||
cost_saved_usd DECIMAL(10,6),
|
||||
provider VARCHAR(50), -- 'ollama', 'cerebras', 'groq', 'claude', etc.
|
||||
|
||||
@ -101,7 +101,7 @@ export function calculateCost(
|
||||
/**
|
||||
* Calculate cost savings from compression
|
||||
* @param model Model identifier
|
||||
* @param tokensBeforeCompression Tokens before LeanCTX + RTK
|
||||
* @param tokensBeforeCompression Tokens before LLM Gateway compression
|
||||
* @param tokensAfterCompression Tokens after compression
|
||||
* @returns Savings in USD
|
||||
*/
|
||||
|
||||
@ -47,7 +47,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
|
||||
enabled: true,
|
||||
models: [
|
||||
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
|
||||
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
|
||||
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
|
||||
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
|
||||
],
|
||||
},
|
||||
@ -174,7 +174,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
|
||||
enabled: true,
|
||||
models: [
|
||||
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
|
||||
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
|
||||
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
|
||||
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
|
||||
],
|
||||
},
|
||||
|
||||
@ -728,6 +728,36 @@ function handleFormalLogicOverride(
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Helper: Code Generation Intent Override ───────────────────────────────
|
||||
|
||||
const CODE_GENERATION_PATTERNS = [
|
||||
/\bwrite\s+(?:a\s+)?(?:typescript|javascript|python|go|rust|react|next\.js|node)?\s*(?:function|class|script|module|component|test|handler|middleware)\b/i,
|
||||
/\b(?:implement|create|build|generate|scaffold)\b[\s\S]{0,160}\b(?:api|endpoint|function|class|component|service|schema|migration|crud|jwt|test|project|module)\b/i,
|
||||
/\b(?:rest|graphql)\s+api\b[\s\S]{0,160}\b(?:implement|create|build|endpoint|authentication|jwt)\b/i,
|
||||
];
|
||||
|
||||
function handleCodeGenerationOverride(
|
||||
fullText: string,
|
||||
input: ScorerInput,
|
||||
userMessages: readonly WeightedMessage[],
|
||||
): ScoringResult | null {
|
||||
if (!CODE_GENERATION_PATTERNS.some((pattern) => pattern.test(fullText))) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const dimensions = computeAllDimensions(input, userMessages, fullText);
|
||||
const result: ScoringResult = {
|
||||
tier: 'code_generation',
|
||||
score: 0.62,
|
||||
confidence: 0.86,
|
||||
reason: 'code generation intent detected',
|
||||
dimensions,
|
||||
};
|
||||
recordSessionTier('code_generation');
|
||||
logger.debug({ tier: 'code_generation', reason: 'code_generation_override' }, 'Request scored via code generation override');
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Helper: Apply Score Overrides ──────────────────────────────────────────
|
||||
|
||||
interface ScoreOverridesInput {
|
||||
@ -754,6 +784,7 @@ function applyScoreOverrides(
|
||||
const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
|
||||
if (codeGenDim && codeGenDim.rawScore > 0.25) {
|
||||
tier = 'code_generation';
|
||||
confidence = Math.max(confidence, 0.78);
|
||||
reason = 'code generation keywords detected';
|
||||
}
|
||||
|
||||
@ -771,7 +802,7 @@ function applyScoreOverrides(
|
||||
}
|
||||
|
||||
// Ambiguity check
|
||||
if (confidence < 0.45) {
|
||||
if (confidence < 0.45 && tier !== 'code_generation' && tier !== 'reasoning') {
|
||||
tier = 'medium';
|
||||
reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
|
||||
}
|
||||
@ -795,6 +826,9 @@ export function scoreRequest(
|
||||
const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages);
|
||||
if (formalLogicResult) return formalLogicResult;
|
||||
|
||||
const codeGenerationResult = handleCodeGenerationOverride(fullText, input, userMessages);
|
||||
if (codeGenerationResult) return codeGenerationResult;
|
||||
|
||||
const dimensions = computeAllDimensions(input, userMessages, fullText);
|
||||
let rawScore = 0;
|
||||
for (const dim of dimensions) {
|
||||
|
||||
@ -184,14 +184,14 @@ export function getOllamaBaseUrl(): string {
|
||||
/**
|
||||
* Maps a scorer tier to the best primary model and its fallback chain.
|
||||
* The 'reasoning' tier uses llama3.3:70b (complex_reasoning strength) from the large tier.
|
||||
* The 'code_generation' tier uses OpenAI Codex (gpt-4-turbo) as primary via external provider.
|
||||
* The 'code_generation' tier uses OpenAI Codex as primary via external provider.
|
||||
*/
|
||||
const TIER_MODEL_MAP: Record<Tier, { primary: string; configTier: 'fast' | 'medium' | 'large'; provider?: string }> = {
|
||||
fast: { primary: 'qwen2.5:3b', configTier: 'fast' },
|
||||
medium: { primary: 'qwen2.5:14b', configTier: 'medium' },
|
||||
large: { primary: 'qwen2.5:32b', configTier: 'large' },
|
||||
reasoning: { primary: 'llama3.3:70b', configTier: 'large' },
|
||||
code_generation: { primary: 'gpt-4-turbo', configTier: 'large', provider: 'openai-codex' },
|
||||
code_generation: { primary: 'gpt-5.1-codex-mini', configTier: 'large', provider: 'openai-codex' },
|
||||
};
|
||||
|
||||
function buildMediumTierFallback(
|
||||
@ -223,7 +223,8 @@ function buildScoredFallbackChain(
|
||||
models: ModelsYaml,
|
||||
): string[] {
|
||||
if (tier === 'reasoning' || tier === 'code_generation') {
|
||||
return [selectedModel, ...buildFallbackChain(selectedModel, configTier, models).filter((m) => m !== selectedModel)];
|
||||
const fallbackTier = tier === 'code_generation' ? 'code_generation' : configTier;
|
||||
return [selectedModel, ...buildFallbackChain(selectedModel, fallbackTier, models).filter((m) => m !== selectedModel)];
|
||||
}
|
||||
return buildFallbackChain(selectedModel, configTier, models);
|
||||
}
|
||||
@ -302,7 +303,7 @@ export function routeByScore(
|
||||
const mapping = TIER_MODEL_MAP[scoringResult.tier];
|
||||
const selectedModel = mapping.primary;
|
||||
const configTier = mapping.configTier;
|
||||
const tierConfig = models.tiers[configTier];
|
||||
const tierConfig = models.tiers[scoringResult.tier] ?? models.tiers[configTier];
|
||||
|
||||
if (!tierConfig) {
|
||||
logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium');
|
||||
|
||||
@ -127,6 +127,27 @@ function shouldBypassResponseCache(caller: string): boolean {
|
||||
|| normalized.includes('copilot');
|
||||
}
|
||||
|
||||
function inputForPromptGuard(input: string): string {
|
||||
const cleaned = input.replace(/^(user|assistant|system|developer):\s*/gim, '').trim();
|
||||
return cleaned || input;
|
||||
}
|
||||
|
||||
function shouldRunPromptGuard(input: string, scan: InjectionScanResult): boolean {
|
||||
if (scan.matches.length > 0) return true;
|
||||
|
||||
const cleaned = inputForPromptGuard(input).normalize('NFKC');
|
||||
return [
|
||||
/\b(?:ignore|disregard|forget|override|bypass|jailbreak)\b[\s\S]{0,120}\b(?:instructions?|rules?|prompt|policy|safety)\b/i,
|
||||
/\b(?:you\s+are\s+now|act\s+as|pretend\s+to\s+be|developer\s+mode|root\s+administrator|runtime\s+controller|security\s+auditor)\b/i,
|
||||
/\b(?:show|print|dump|reveal|output)\b[\s\S]{0,160}\b(?:system\s+prompt|developer\s+prompt|hidden|runtime|memory|tools?|filters?|policy|classifier|chain-of-thought|reasoning)\b/i,
|
||||
/\b(?:passwords?|passw(?:o|ö)rter|credentials?|api\s*keys?|tokens?|secrets?)\b[\s\S]{0,160}\b(?:print|show|write|paste|send|share|reveal|chat|anmelden|log\s*in)\b/i,
|
||||
/\b(?:base64|rot13|hex\s+encoded|decode|execute|run\s+this)\b/i,
|
||||
/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/,
|
||||
/\b[A-Za-z0-9+/]{40,}={0,2}\b/,
|
||||
/\b(?:[0-9a-fA-F]{2}){16,}\b/,
|
||||
].some((pattern) => pattern.test(cleaned));
|
||||
}
|
||||
|
||||
const ChatMessageSchema = z.object({
|
||||
role: z.string().min(1),
|
||||
content: z.union([z.string(), z.array(z.unknown()), z.null()]).optional(),
|
||||
@ -428,8 +449,8 @@ async function executeCompletion(body: CompletionRequest, startMs: number, callI
|
||||
}
|
||||
|
||||
// ─── Layer 2: ML classifier (Prompt-Guard sidecar) ────────────────────
|
||||
if (!injectionScan.detected && isPromptGuardConfigured() && body.input.length >= getPromptGuardMinLen()) {
|
||||
const pg = await callPromptGuard(body.input);
|
||||
if (!injectionScan.detected && isPromptGuardConfigured() && body.input.length >= getPromptGuardMinLen() && shouldRunPromptGuard(body.input, injectionScan)) {
|
||||
const pg = await callPromptGuard(inputForPromptGuard(body.input));
|
||||
if (pg.available && pg.label === 'INJECTION' && pg.score >= getPromptGuardThreshold()) {
|
||||
logger.warn(
|
||||
{ caller, callId, pg_score: pg.score, pg_latency_ms: pg.latencyMs },
|
||||
|
||||
@ -9,6 +9,7 @@ import { createRequestLogger } from '../modules/request-logger.js';
|
||||
import { globalRequestStream } from '../modules/request-stream.js';
|
||||
import { getAvailableProviders, getAllProviders } from '../pipeline/external-providers.js';
|
||||
import { discoverSubscriptions } from '../modules/subscription-discovery.js';
|
||||
import { runDiscovery, runDiscoveryAndSpawn } from '../modules/auto-discovery.js';
|
||||
import { getRunningBridges, spawnDetectedBridges } from '../modules/bridge-spawner.js';
|
||||
import { getPublicSettings, saveSettings, SettingsPatchSchema } from '../modules/settings-store.js';
|
||||
import {
|
||||
@ -18,6 +19,10 @@ import {
|
||||
pruneStaleCacheEntries,
|
||||
} from '../modules/response-cache.js';
|
||||
import { getComprehensiveSavings } from '../modules/savings-calculator.js';
|
||||
|
||||
// Captured once at module load — represents the gateway-process start time
|
||||
// for the 'compressed since last restart' tile in the dashboard.
|
||||
const SERVER_STARTED_AT_ISO = new Date().toISOString();
|
||||
import {
|
||||
getBuddyState,
|
||||
getAchievements,
|
||||
@ -106,6 +111,22 @@ type ProviderRuntime = {
|
||||
};
|
||||
|
||||
const CLIENT_CATALOG = [
|
||||
{
|
||||
id: 'macbook-claude-code',
|
||||
label: 'MacBook (Claude Code)',
|
||||
patterns: ['claude-code-laptop'],
|
||||
commands: [],
|
||||
paths: [],
|
||||
processPatterns: [],
|
||||
},
|
||||
{
|
||||
id: 'macstudio-claude-code',
|
||||
label: 'Mac Studio (Claude Code)',
|
||||
patterns: ['claude-code-macstudio', 'claude-code-studio'],
|
||||
commands: [],
|
||||
paths: [],
|
||||
processPatterns: [],
|
||||
},
|
||||
{
|
||||
id: 'codex-desktop',
|
||||
label: 'Codex Desktop / CLI',
|
||||
@ -158,6 +179,17 @@ const CLIENT_CATALOG = [
|
||||
|
||||
type ClientStatus = 'live' | 'running' | 'installed' | 'not-connected';
|
||||
|
||||
const CLIENT_BRIDGE_PROVIDERS: Record<(typeof CLIENT_CATALOG)[number]['id'], string | undefined> = {
|
||||
'macbook-claude-code': undefined,
|
||||
'macstudio-claude-code': undefined,
|
||||
'codex-desktop': 'codex',
|
||||
'claude-desktop': 'claude-code',
|
||||
'microsoft-copilot': 'm365-copilot-bridge',
|
||||
'github-copilot': 'copilot-bridge',
|
||||
'openai-compatible': undefined,
|
||||
'chatgpt': 'codex-bridge',
|
||||
};
|
||||
|
||||
function expandUserPath(path: string): string {
|
||||
return path.startsWith('~/') ? `${homedir()}/${path.slice(2)}` : path;
|
||||
}
|
||||
@ -217,8 +249,22 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise<Array<{
|
||||
tokensSaved: number;
|
||||
source: 'gateway' | 'local-detection' | 'none';
|
||||
detectionSignals: string[];
|
||||
bridgeProvider?: string;
|
||||
bridgeStatus?: string;
|
||||
bridgeHealthy?: boolean;
|
||||
bridgeDetail?: string;
|
||||
}>> {
|
||||
const detections = await getLocalDesktopDetections();
|
||||
const bridgeRuntimes = Object.fromEntries(await Promise.all(CLIENT_CATALOG.map(async (client) => {
|
||||
const providerName = CLIENT_BRIDGE_PROVIDERS[client.id];
|
||||
return [
|
||||
client.id,
|
||||
{
|
||||
providerName,
|
||||
...(providerName ? await providerRuntime(providerName) : {}),
|
||||
},
|
||||
] as const;
|
||||
})));
|
||||
let callers: Array<{ caller: string; requestCount: number; lastSeen?: string; tokensIn: number; tokensSaved: number }> = [];
|
||||
|
||||
try {
|
||||
@ -257,12 +303,19 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise<Array<{
|
||||
logger.warn({ error }, 'Client gateway traffic lookup failed, returning local desktop detections only');
|
||||
}
|
||||
|
||||
// First-match-wins: a caller is assigned to the first (most specific) catalog
|
||||
// entry it matches, so device-specific entries (MacBook/Mac Studio) take a
|
||||
// caller before the generic 'claude-desktop' bucket — no double counting.
|
||||
const assignedCallers = new Set<string>();
|
||||
return CLIENT_CATALOG.map((client) => {
|
||||
const detection = detections[client.id];
|
||||
const bridgeRuntime = bridgeRuntimes[client.id];
|
||||
const matched = callers.filter((row) => {
|
||||
if (assignedCallers.has(row.caller)) return false;
|
||||
const caller = row.caller.toLowerCase();
|
||||
return client.patterns.some((pattern) => caller.includes(pattern));
|
||||
});
|
||||
matched.forEach((row) => assignedCallers.add(row.caller));
|
||||
const requestCount = matched.reduce((sum, row) => sum + row.requestCount, 0);
|
||||
const tokensIn = matched.reduce((sum, row) => sum + row.tokensIn, 0);
|
||||
const tokensSaved = matched.reduce((sum, row) => sum + row.tokensSaved, 0);
|
||||
@ -283,6 +336,10 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise<Array<{
|
||||
tokensSaved,
|
||||
source: requestCount > 0 ? 'gateway' : detection?.installed ? 'local-detection' : 'none',
|
||||
detectionSignals: detection?.signals ?? [],
|
||||
bridgeProvider: bridgeRuntime?.providerName,
|
||||
bridgeStatus: bridgeRuntime?.runtimeStatus,
|
||||
bridgeHealthy: bridgeRuntime?.runtimeHealthy,
|
||||
bridgeDetail: bridgeRuntime?.runtimeDetail,
|
||||
};
|
||||
});
|
||||
}
|
||||
@ -291,8 +348,6 @@ function bridgeHealthUrl(providerName: string): string | undefined {
|
||||
const bridgeUrls: Record<string, string | undefined> = {
|
||||
'claude-bridge': process.env['CLAUDE_BRIDGE_URL'],
|
||||
'claude-code': process.env['CLAUDE_CODE_URL'] || process.env['CLAUDE_BRIDGE_URL'],
|
||||
'openai-bridge': process.env['OPENAI_BRIDGE_URL'],
|
||||
'chatgpt-bridge': process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'],
|
||||
'copilot-bridge': process.env['COPILOT_BRIDGE_URL'],
|
||||
'm365-copilot-bridge': process.env['M365_COPILOT_BRIDGE_URL'],
|
||||
'openai-codex': process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'],
|
||||
@ -575,7 +630,7 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
||||
const configuredProviders = providers.filter((provider) => provider.enabled && !!process.env[provider.envKey]);
|
||||
const localProviders = providers.filter((provider) => provider.name.toLowerCase().includes('ollama'));
|
||||
const subscriptionProviders = providers.filter((provider) =>
|
||||
['claude-bridge', 'claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge', 'm365-copilot-bridge', 'codex', 'openai-codex']
|
||||
['claude-bridge', 'copilot-bridge', 'm365-copilot-bridge', 'openai-codex']
|
||||
.includes(provider.name)
|
||||
);
|
||||
|
||||
@ -883,12 +938,10 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
||||
const displayLabels: Record<string, string> = {
|
||||
'claude-bridge': 'Claude Code Subscription (Bridge)',
|
||||
'claude-code': 'Claude Code Direct',
|
||||
'openai-bridge': 'OpenAI ChatGPT Subscription (Bridge)',
|
||||
'chatgpt-bridge': 'ChatGPT Plus Subscription (Bridge)',
|
||||
'copilot-bridge': 'GitHub Copilot Subscription',
|
||||
'm365-copilot-bridge': 'Microsoft 365 Copilot Subscription',
|
||||
'codex': 'GitHub Copilot Codex (Inner API)',
|
||||
'openai-codex': 'OpenAI API (Codex / GPT)',
|
||||
'copilot-codex': 'GitHub Copilot (Codex Inner API)',
|
||||
'openai-codex': 'OpenAI (ChatGPT + Codex)',
|
||||
'cerebras': 'Cerebras (Free Tier)',
|
||||
'groq': 'Groq (Free Tier)',
|
||||
'mistral': 'Mistral AI (Free Tier)',
|
||||
@ -898,9 +951,8 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
||||
|
||||
// Subscription providers (paid via login/subscription, NOT free-tier API)
|
||||
const subscriptionNames = new Set([
|
||||
'claude-bridge', 'claude-code',
|
||||
'openai-bridge', 'chatgpt-bridge',
|
||||
'copilot-bridge', 'm365-copilot-bridge', 'codex', 'openai-codex'
|
||||
'claude-bridge',
|
||||
'copilot-bridge', 'm365-copilot-bridge', 'openai-codex'
|
||||
]);
|
||||
|
||||
// Categorize all providers (independent of API-key presence)
|
||||
@ -1073,6 +1125,36 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Full-System Auto-Discovery ─────────────────────────────────────────
|
||||
// GET /api/dashboard/discover → unified report (read-only)
|
||||
// POST /api/dashboard/discover → discover + spawn bridges
|
||||
fastify.get('/api/dashboard/discover', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||
try {
|
||||
const report = await runDiscovery();
|
||||
return reply.send({ success: true, data: report });
|
||||
} catch (error) {
|
||||
logger.error({ error }, 'Discovery scan failed');
|
||||
return reply.status(500).send({ success: false, error: 'Discovery scan failed' });
|
||||
}
|
||||
});
|
||||
|
||||
fastify.post('/api/dashboard/discover', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||
try {
|
||||
const result = await runDiscoveryAndSpawn();
|
||||
return reply.send({
|
||||
success: true,
|
||||
data: {
|
||||
report: result.report,
|
||||
spawned: result.spawned,
|
||||
spawnedCount: result.spawned.length,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error({ error }, 'Discovery + spawn failed');
|
||||
return reply.status(500).send({ success: false, error: 'Discovery + spawn failed' });
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/dashboard/subscriptions/spawn — trigger auto-spawn of detected bridges.
|
||||
// Returns the list of bridges that were spawned (or already running).
|
||||
fastify.post('/api/dashboard/subscriptions/spawn', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||
@ -1180,7 +1262,8 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
||||
effectiveSavingsPercent,
|
||||
totals: comprehensive.totals,
|
||||
},
|
||||
},
|
||||
// Compression since this gateway process started — resets at each restart.
|
||||
},
|
||||
series,
|
||||
},
|
||||
meta: { hours, bucket_minutes: bucketMin, timestamp: new Date().toISOString() },
|
||||
@ -1638,4 +1721,45 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
||||
return reply.status(500).send({ error: 'Failed to serve dashboard UI' });
|
||||
}
|
||||
});
|
||||
|
||||
// Passive usage import: lets clients that talk DIRECTLY to a provider (e.g. the
|
||||
// laptop's Claude Code -> api.anthropic.com) report their usage so they appear in
|
||||
// clients/costs WITHOUT routing traffic through the gateway. A caller containing
|
||||
// 'claude-code' matches the CLIENT_CATALOG 'claude-desktop' entry.
|
||||
fastify.post('/api/dashboard/usage/report', dashboardAuth, async (request: FastifyRequest, reply: FastifyReply) => {
|
||||
try {
|
||||
const body = (request.body ?? {}) as Record<string, unknown>;
|
||||
const caller = String(body.caller ?? 'claude-code-laptop').slice(0, 120);
|
||||
const model = String(body.model ?? 'claude-code').slice(0, 120);
|
||||
const tokensIn = Math.max(0, Math.floor(Number(body.tokens_in) || 0));
|
||||
const tokensOut = Math.max(0, Math.floor(Number(body.tokens_out) || 0));
|
||||
const costUsd = Math.max(0, Number(body.cost_usd) || 0);
|
||||
const day = String(body.day ?? new Date().toISOString().slice(0, 10)).slice(0, 32);
|
||||
if (tokensIn === 0 && tokensOut === 0) {
|
||||
return reply.status(400).send({ success: false, error: 'tokens_in or tokens_out required' });
|
||||
}
|
||||
// Stamp the row with the ACTUAL usage day so lastSeen = when tokens were
|
||||
// used, not when the export ran. Cap at "now" so today's still-growing day
|
||||
// reads as current/live.
|
||||
const dayEnd = new Date(`${day}T23:59:59Z`);
|
||||
const usedAt = dayEnd.getTime() > Date.now() ? new Date() : dayEnd;
|
||||
const db = getPool();
|
||||
const requestId = `usage-import:${caller}:${model}:${day}`;
|
||||
// Upsert by request_id (one row per caller/model/day): re-reporting an
|
||||
// in-progress day updates its totals instead of creating duplicates.
|
||||
const updated = await db.query(
|
||||
`UPDATE request_tracking SET tokens_in=$1, tokens_out=$2, cost_usd=$3, created_at=$4 WHERE request_id=$5`,
|
||||
[tokensIn, tokensOut, costUsd, usedAt, requestId]
|
||||
);
|
||||
if (updated.rowCount === 0) {
|
||||
const requestLogger = createRequestLogger(db);
|
||||
await requestLogger.logRequest(requestId, caller, 'usage_import', model, 'approved', tokensIn, tokensOut, costUsd, 0);
|
||||
await db.query(`UPDATE request_tracking SET created_at=$1 WHERE request_id=$2`, [usedAt, requestId]);
|
||||
}
|
||||
return reply.status(200).send({ success: true, imported: { caller, model, day, tokensIn, tokensOut, costUsd, usedAt } });
|
||||
} catch (error) {
|
||||
logger.error({ error }, 'Failed to import usage report');
|
||||
return reply.status(500).send({ success: false, error: 'Failed to import usage report' });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -38,22 +38,40 @@ async function checkOllama(baseUrl: string): Promise<{ status: 'ok' | 'down'; la
|
||||
|
||||
async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> {
|
||||
try {
|
||||
await query('SELECT 1');
|
||||
await withTimeout(query('SELECT 1'), 2500, 'database check timed out');
|
||||
return { status: 'ok' };
|
||||
} catch (err) {
|
||||
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
|
||||
}
|
||||
}
|
||||
|
||||
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, message: string): Promise<T> {
|
||||
let timer: NodeJS.Timeout | undefined;
|
||||
try {
|
||||
return await Promise.race([
|
||||
promise,
|
||||
new Promise<T>((_resolve, reject) => {
|
||||
timer = setTimeout(() => reject(new Error(message)), timeoutMs);
|
||||
}),
|
||||
]);
|
||||
} finally {
|
||||
if (timer) clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> {
|
||||
const boss = getPgBoss();
|
||||
if (!boss) return { status: 'unknown' };
|
||||
|
||||
try {
|
||||
const [queued, active] = await Promise.all([
|
||||
boss.getQueueSize('llm-batch', { before: 'completed' }),
|
||||
boss.getQueueSize('llm-batch', { before: 'active' }),
|
||||
]);
|
||||
const [queued, active] = await withTimeout(
|
||||
Promise.all([
|
||||
boss.getQueueSize('llm-batch', { before: 'completed' }),
|
||||
boss.getQueueSize('llm-batch', { before: 'active' }),
|
||||
]),
|
||||
2500,
|
||||
'queue check timed out',
|
||||
);
|
||||
return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) };
|
||||
} catch (err) {
|
||||
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
|
||||
@ -62,8 +80,10 @@ async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?
|
||||
|
||||
async function getReviewQueueCount(): Promise<number> {
|
||||
try {
|
||||
const result = await query<{ count: string }>(
|
||||
'SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL',
|
||||
const result = await withTimeout(
|
||||
query<{ count: string }>('SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL'),
|
||||
2500,
|
||||
'review queue check timed out',
|
||||
);
|
||||
return parseInt(result.rows[0]?.count ?? '0', 10);
|
||||
} catch {
|
||||
@ -109,8 +129,8 @@ export async function healthRoute(fastify: FastifyInstance): Promise<void> {
|
||||
|
||||
const breakerStates = getAllBreakerStates();
|
||||
|
||||
const isDown = ollamaCheck.status === 'down' || dbCheck.status === 'down';
|
||||
const isDegraded = queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
|
||||
const isDown = dbCheck.status === 'down';
|
||||
const isDegraded = ollamaCheck.status === 'down' || queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
|
||||
|
||||
const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok';
|
||||
|
||||
|
||||
@ -107,9 +107,22 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip for localhost/loopback callers (infra-health, fix-engine, internal services)
|
||||
const reqHost = String(request.headers['host'] ?? '');
|
||||
if (reqHost.startsWith('localhost') || reqHost.startsWith('127.0.0.1')) {
|
||||
const hostHeader = String(request.headers['host'] ?? '');
|
||||
const forwardedHost = String(request.headers['x-forwarded-host'] ?? '');
|
||||
const remoteAddress = request.ip ?? '';
|
||||
const host = forwardedHost || hostHeader;
|
||||
const isLoopbackHost =
|
||||
/^localhost(?::\d+)?$/i.test(host) ||
|
||||
/^127\.0\.0\.1(?::\d+)?$/.test(host) ||
|
||||
/^\[::1\](?::\d+)?$/.test(host);
|
||||
const isLoopbackRemote =
|
||||
remoteAddress === '127.0.0.1' ||
|
||||
remoteAddress === '::1' ||
|
||||
remoteAddress === '::ffff:127.0.0.1';
|
||||
|
||||
// Internal loopback callers such as Magatama Core run behind the same host
|
||||
// and must not be redirected to HTTPS unless the Gateway actually serves TLS.
|
||||
if (isLoopbackHost || isLoopbackRemote) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -120,7 +133,6 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
|
||||
(request.headers['x-forwarded-proto'] === 'https');
|
||||
|
||||
if (!isSecure && process.env['NODE_ENV'] === 'production') {
|
||||
const host = request.headers['x-forwarded-host'] || request.headers['host'];
|
||||
return reply.redirect(`https://${host}${request.url}`);
|
||||
}
|
||||
});
|
||||
@ -132,14 +144,11 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
|
||||
*/
|
||||
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
|
||||
server.addHook('onSend', async (request, reply) => {
|
||||
// Content Security Policy — route handlers may set a narrower CSP before this hook.
|
||||
// Default allows 'unsafe-inline' for the dashboard UI.
|
||||
if (!reply.getHeader('Content-Security-Policy')) {
|
||||
reply.header(
|
||||
'Content-Security-Policy',
|
||||
"default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
|
||||
);
|
||||
}
|
||||
// Content Security Policy for the self-contained dashboard UI.
|
||||
reply.header(
|
||||
'Content-Security-Policy',
|
||||
"default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
|
||||
);
|
||||
|
||||
// Prevent clickjacking
|
||||
reply.header('X-Frame-Options', 'DENY');
|
||||
|
||||
@ -12,11 +12,23 @@ import { dashboardRoute } from './routes/dashboard.js';
|
||||
import { streamRoute } from './routes/stream.js';
|
||||
import { learningInsightsRoute } from './routes/learning-insights.js';
|
||||
import { staticRoute } from './routes/static.js';
|
||||
import tenantAuth from './security/tenant-auth.js';
|
||||
import { internalRoute } from './routes/internal.js';
|
||||
import { getPool } from './db/client.js';
|
||||
import { runMigrations } from './db/migrate.js';
|
||||
import { initPgBoss } from './queue/pg-boss-client.js';
|
||||
import { logger } from './observability/logger.js';
|
||||
import { scheduleLearningCycles } from './learning/learning-engine.js';
|
||||
import { autoSpawnOnBoot } from './modules/auto-discovery.js';
|
||||
import { embeddingsRoute } from './routes/embeddings.js';
|
||||
import { replayRoute } from './routes/replay.js';
|
||||
import { audioRoute } from './routes/audio.js';
|
||||
import { mcpRoute } from './modules/mcp-server.js';
|
||||
import { loadWorkspacePreset, applyWorkspaceDefaults } from './modules/workspace-presets.js';
|
||||
import { loadPlugins } from './modules/plugin-system.js';
|
||||
import { ingestPeerStats, scheduleFederationPublisher, buildStats } from './modules/federated-stats.js';
|
||||
import { scheduleAdaptiveLearner, getAllRecommendations } from './modules/adaptive-routing.js';
|
||||
import { startBridgeWatchdog } from './modules/bridge-watchdog.js';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname, join } from 'path';
|
||||
import { readFileSync, existsSync } from 'fs';
|
||||
@ -93,9 +105,11 @@ async function buildServer() {
|
||||
'http://192.168.178.196:3000',
|
||||
/^http:\/\/192\.168\.178\.\d+/,
|
||||
/^https:\/\/.*\.context-x\.org$/,
|
||||
/^https:\/\/(www\.)?runwerk\.app$/,
|
||||
/^https:\/\/.*\.runwerk\.app$/,
|
||||
],
|
||||
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
|
||||
allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID'],
|
||||
allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID', 'X-Runwerk-Caller', 'X-Runwerk-Privacy', 'X-Runwerk-Tier', 'X-Runwerk-Purpose'],
|
||||
credentials: true,
|
||||
});
|
||||
|
||||
@ -114,7 +128,17 @@ async function buildServer() {
|
||||
}),
|
||||
});
|
||||
|
||||
await server.register(tenantAuth);
|
||||
await server.register(internalRoute);
|
||||
await server.register(completionRoute, { prefix: '/v1' });
|
||||
await server.register(embeddingsRoute, { prefix: '/v1' });
|
||||
await server.register(replayRoute, { prefix: '/v1' });
|
||||
await server.register(audioRoute, { prefix: '/v1' });
|
||||
await server.register(mcpRoute);
|
||||
server.post('/v1/federation/ingest', async (request, reply) => {
|
||||
const result = ingestPeerStats(request.body as never);
|
||||
return reply.send({ success: true, ...result });
|
||||
});
|
||||
await server.register(batchRoute, { prefix: '/v1' });
|
||||
await server.register(classifyRoute, { prefix: '/v1' });
|
||||
await server.register(reviewRoute, { prefix: '/v1' });
|
||||
@ -193,9 +217,54 @@ async function main() {
|
||||
} catch (pgErr) {
|
||||
logger.warn({ pgErr }, 'PgBoss init failed - continuing without queue');
|
||||
}
|
||||
// Workspace preset (apply env defaults from workspace.yaml if present)
|
||||
try {
|
||||
const preset = await loadWorkspacePreset();
|
||||
if (preset) applyWorkspaceDefaults(preset);
|
||||
} catch (err) {
|
||||
logger.warn({ err }, 'Workspace preset load failed (non-fatal)');
|
||||
}
|
||||
|
||||
// Plugin system (load pre/post hooks from PLUGINS_DIR)
|
||||
try {
|
||||
await loadPlugins();
|
||||
} catch (err) {
|
||||
logger.warn({ err }, 'Plugin loading failed (non-fatal)');
|
||||
}
|
||||
|
||||
scheduleLearningCycles();
|
||||
await server.listen({ port, host });
|
||||
logger.info({ port, host }, 'LLM Gateway started');
|
||||
|
||||
// Auto-spawn detected subscription bridges if AUTO_SPAWN_BRIDGES=1
|
||||
void autoSpawnOnBoot();
|
||||
|
||||
// Bridge watchdog (opt-in via WATCHDOG_ENABLED=1)
|
||||
try {
|
||||
startBridgeWatchdog();
|
||||
} catch (err) {
|
||||
logger.warn({ err }, 'Bridge watchdog start failed');
|
||||
}
|
||||
|
||||
// Adaptive routing learner (opt-in via ADAPTIVE_ROUTING_ENABLED=1)
|
||||
try {
|
||||
const pool = getPool();
|
||||
scheduleAdaptiveLearner(pool as never);
|
||||
} catch (err) {
|
||||
logger.warn({ err }, 'Adaptive learner scheduling failed');
|
||||
}
|
||||
|
||||
// Federation publisher (opt-in via FEDERATION_ENABLED=1)
|
||||
scheduleFederationPublisher(async () => {
|
||||
const recos = getAllRecommendations();
|
||||
return buildStats(recos.map((r) => ({
|
||||
task_type: r.taskType,
|
||||
model_used: r.preferredModel,
|
||||
samples: r.rationale.samples,
|
||||
success_rate: r.rationale.successRate,
|
||||
avg_latency_ms: r.rationale.avgLatencyMs,
|
||||
})));
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error({ err }, 'Failed to start server');
|
||||
process.exit(1);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// Tokenvault Integration Hooks
|
||||
// Instruments LeanCTX and RTK compression tracking
|
||||
// Instruments LLM Gateway compression tracking (legacy hook names retained for backward compat)
|
||||
// Updated: 2026-04-19
|
||||
|
||||
import { Pool, QueryResult } from 'pg';
|
||||
@ -62,13 +62,13 @@ export function estimateTokens(text: string | object): number {
|
||||
}
|
||||
|
||||
/**
|
||||
* Log compression ratio for RTK output
|
||||
* Log compression ratio for token-trim output
|
||||
*/
|
||||
export async function logRTKCompression(
|
||||
export async function logGatewayTrimCompression(
|
||||
db: Pool,
|
||||
rawOutput: string,
|
||||
compressedOutput: string,
|
||||
toolUsed: string = 'rtk'
|
||||
toolUsed: string = 'llm-gateway-trim'
|
||||
): Promise<CompressionMetric> {
|
||||
const tokensBefore = estimateTokens(rawOutput);
|
||||
const tokensAfter = estimateTokens(compressedOutput);
|
||||
@ -93,9 +93,9 @@ export async function logRTKCompression(
|
||||
}
|
||||
|
||||
/**
|
||||
* Track LeanCTX file read operations
|
||||
* Track gateway file-read operations
|
||||
*/
|
||||
export async function logLeanCTXRead(
|
||||
export async function logGatewayFileRead(
|
||||
db: Pool,
|
||||
filePath: string,
|
||||
mode: string,
|
||||
@ -115,7 +115,7 @@ export async function logLeanCTXRead(
|
||||
tokensBefore: rawTokens,
|
||||
tokensAfter: compressedTokens,
|
||||
savingsPct,
|
||||
toolUsed: 'lean-ctx'
|
||||
toolUsed: 'llm-gateway'
|
||||
};
|
||||
|
||||
await logCompressionMetric(db, metric);
|
||||
@ -207,7 +207,7 @@ export async function getCompressionStats(
|
||||
tool_used,
|
||||
COUNT(*) as count
|
||||
FROM tokenvault_metrics
|
||||
WHERE created_at > NOW() - INTERVAL $1 HOUR
|
||||
WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
|
||||
GROUP BY tool_used`,
|
||||
[hoursBack]
|
||||
);
|
||||
@ -270,7 +270,7 @@ export async function getCostSummary(
|
||||
project,
|
||||
SUM(CASE WHEN cost_usd > 0 THEN 1 ELSE 0 END) as paid_tasks
|
||||
FROM cost_analytics
|
||||
WHERE created_at > NOW() - INTERVAL $1 HOUR
|
||||
WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
|
||||
GROUP BY project`,
|
||||
[hoursBack]
|
||||
);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user