From c7c457ae2a5232816f9ea06ed3ef304e58c83dfd Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Fri, 5 Jun 2026 21:07:57 +0000 Subject: [PATCH] feat: merge Gitea main (injection-defense, bridges, dashboard) + Erik WIP features Reconcile 6-week divergence: Gitea main (injection-defense, output-defense, prompt-guard-client, admin-auth, start-with-env, dashboard-v2, savings-calculator, race-mode, gamification + 13 more modules) merged with Erik's deployed features (usage-report endpoint, per-device entries, CEST timezone, cost-panel, bridge routing). ecosystem.config.cjs excluded (live token, never commit). --- Dockerfile | 1 - docker-compose.yaml | 16 + package-lock.json | 57 ++- package.json | 2 +- packages/gateway/package.json | 5 +- .../prompts/templates/linkedin_post.yaml | 112 +++-- packages/gateway/public/dashboard.html | 451 +++++++++++++++++- packages/gateway/src/config/models.yaml | 71 +-- .../gateway/src/config/routing-rules.yaml | 4 +- .../002-tokenvault-cost-tracking.sql | 2 +- packages/gateway/src/db/schema-extensions.sql | 8 +- .../src/observability/cost-calculator.ts | 2 +- .../src/pipeline/external-providers.ts | 4 +- .../gateway/src/pipeline/request-scorer.ts | 36 +- packages/gateway/src/pipeline/router.ts | 9 +- packages/gateway/src/routes/completion.ts | 25 +- packages/gateway/src/routes/dashboard.ts | 146 +++++- packages/gateway/src/routes/health.ts | 38 +- packages/gateway/src/security/tls-config.ts | 33 +- packages/gateway/src/server.ts | 71 ++- .../gateway/src/utils/tokenvault-hooks.ts | 18 +- 21 files changed, 940 insertions(+), 171 deletions(-) diff --git a/Dockerfile b/Dockerfile index 865d4bc..58a421b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,7 +36,6 @@ COPY --from=builder /app/packages/gateway/dist ./packages/gateway/dist # Copy production node_modules COPY --from=builder /app/node_modules ./node_modules -COPY --from=builder /app/packages/gateway/node_modules ./packages/gateway/node_modules 2>/dev/null || true # Copy runtime assets (prompt templates, config) COPY packages/gateway/prompts ./packages/gateway/prompts diff --git a/docker-compose.yaml b/docker-compose.yaml index 68f5c9b..618febe 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -4,15 +4,31 @@ services: container_name: llm-gateway ports: - "3100:3100" + extra_hosts: + - "host.docker.internal:host-gateway" environment: NODE_ENV: production PORT: "3100" DATABASE_URL: "${DATABASE_URL}" TIP_DATABASE_URL: "${TIP_DATABASE_URL}" OLLAMA_URL: "http://192.168.178.169:11434" + OLLAMA_BASE_URL: "${OLLAMA_BASE_URL:-https://ollama.fichtmueller.org}" + CLAUDE_BRIDGE_ENABLED: "true" + CLAUDE_BRIDGE_URL: "${CLAUDE_BRIDGE_URL:-http://host.docker.internal:3250}" + CLAUDE_CODE_URL: "${CLAUDE_CODE_URL:-http://host.docker.internal:3250}" + OPENAI_BRIDGE_URL: "${OPENAI_BRIDGE_URL:-http://host.docker.internal:3251}" + CHATGPT_BRIDGE_URL: "${CHATGPT_BRIDGE_URL:-http://host.docker.internal:3251}" + COPILOT_BRIDGE_URL: "${COPILOT_BRIDGE_URL:-http://host.docker.internal:3252}" + GEMINI_BRIDGE_URL: "${GEMINI_BRIDGE_URL:-http://host.docker.internal:3254}" + CODEX_BRIDGE_URL: "${CODEX_BRIDGE_URL:-http://host.docker.internal:3253}" + OPENAI_CODEX_URL: "${OPENAI_CODEX_URL:-http://host.docker.internal:3253}" + AIDER_BRIDGE_URL: "${AIDER_BRIDGE_URL:-http://host.docker.internal:3256}" SHIELDX_URL: "${SHIELDX_URL:-}" GITEA_URL: "http://gitea.context-x.org" LOG_LEVEL: "${LOG_LEVEL:-info}" + DASHBOARD_AUTH_TOKEN: "${DASHBOARD_AUTH_TOKEN:-}" + REFERENCE_INPUT_COST_PER_1K: "${REFERENCE_INPUT_COST_PER_1K:-0.005}" + REFERENCE_OUTPUT_COST_PER_1K: "${REFERENCE_OUTPUT_COST_PER_1K:-0.015}" restart: unless-stopped healthcheck: test: ["CMD", "wget", "-q", "-O-", "http://localhost:3100/health/live"] diff --git a/package-lock.json b/package-lock.json index f0aba6d..7a066da 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,10 +11,10 @@ "packages/*" ], "dependencies": { - "jose": "^6.2.2" + "jose": "^6.2.3" } }, - "../../../shieldx": { + "../../shieldx": { "extraneous": true }, "node_modules/@esbuild/darwin-arm64": { @@ -305,6 +305,10 @@ "resolved": "packages/codex-lsp-adapter", "link": true }, + "node_modules/@llm-gateway/companion": { + "resolved": "packages/companion", + "link": true + }, "node_modules/@llm-gateway/ctx-health": { "resolved": "packages/ctx-health", "link": true @@ -321,6 +325,10 @@ "resolved": "packages/learning-integration", "link": true }, + "node_modules/@llm-gateway/mcp-server": { + "resolved": "packages/mcp-server", + "link": true + }, "node_modules/@llm-gateway/prompt-optimizer": { "resolved": "packages/prompt-optimizer", "link": true @@ -1127,6 +1135,8 @@ }, "node_modules/fastify-plugin": { "version": "5.1.0", + "resolved": "https://registry.npmjs.org/fastify-plugin/-/fastify-plugin-5.1.0.tgz", + "integrity": "sha512-FAIDA8eovSt5qcDgcBvDuX/v0Cjz0ohGhENZ/wpc3y+oZCY2afZ9Baqql3g/lC+OHRnciQol4ww7tuthOb9idw==", "funding": [ { "type": "github", @@ -1475,9 +1485,9 @@ } }, "node_modules/jose": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", - "integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==", + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz", + "integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==", "license": "MIT", "funding": { "url": "https://github.com/sponsors/panva" @@ -3178,6 +3188,21 @@ "node": ">=0.4" } }, + "node_modules/yaml": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.9.0.tgz", + "integrity": "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==", + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } + }, "node_modules/yocto-queue": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz", @@ -4086,6 +4111,16 @@ } } }, + "packages/companion": { + "name": "@llm-gateway/companion", + "version": "1.0.0", + "bin": { + "llm-gateway-companion": "bin/llm-gateway-companion.js" + }, + "engines": { + "node": ">=18" + } + }, "packages/ctx-health": { "name": "@llm-gateway/ctx-health", "version": "1.0.0", @@ -4114,6 +4149,7 @@ "@fastify/static": "^8.3.0", "ajv": "^8.17.1", "fastify": "^5.8.5", + "fastify-plugin": "^5.1.0", "franc": "^6.2.0", "jose": "^5.4.0", "js-yaml": "^4.1.0", @@ -4122,6 +4158,7 @@ "pg-boss": "^10.1.3", "pino": "^9.5.0", "prom-client": "^15.1.3", + "yaml": "^2.9.0", "zod": "^3.23.8" }, "devDependencies": { @@ -4448,6 +4485,16 @@ } } }, + "packages/mcp-server": { + "name": "@llm-gateway/mcp-server", + "version": "1.0.0", + "bin": { + "llm-gateway-mcp": "bin/llm-gateway-mcp.js" + }, + "engines": { + "node": ">=18" + } + }, "packages/prompt-optimizer": { "name": "@llm-gateway/prompt-optimizer", "version": "0.1.0", diff --git a/package.json b/package.json index b24c7ac..41552a6 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,6 @@ "ctx-health:dev": "npm run dev --workspace=packages/ctx-health" }, "dependencies": { - "jose": "^6.2.2" + "jose": "^6.2.3" } } diff --git a/packages/gateway/package.json b/packages/gateway/package.json index 1527ac7..22ba807 100644 --- a/packages/gateway/package.json +++ b/packages/gateway/package.json @@ -7,7 +7,8 @@ "build": "tsc && npm run build:copy-assets", "build:copy-assets": "mkdir -p dist/db/migrations dist/config dist/public && cp -r src/db/migrations/*.sql dist/db/migrations/ 2>/dev/null || true && cp -r src/config/*.yaml dist/config/ 2>/dev/null || true && cp -r public/* dist/public/ 2>/dev/null || true", "start": "node dist/server.js", - "test": "vitest" + "test": "vitest", + "prestart": "node scripts/check-build-drift.mjs" }, "dependencies": { "@fastify/cors": "^10.1.0", @@ -16,6 +17,7 @@ "@fastify/static": "^8.3.0", "ajv": "^8.17.1", "fastify": "^5.8.5", + "fastify-plugin": "^5.1.0", "franc": "^6.2.0", "jose": "^5.4.0", "js-yaml": "^4.1.0", @@ -24,6 +26,7 @@ "pg-boss": "^10.1.3", "pino": "^9.5.0", "prom-client": "^15.1.3", + "yaml": "^2.9.0", "zod": "^3.23.8" }, "devDependencies": { diff --git a/packages/gateway/prompts/templates/linkedin_post.yaml b/packages/gateway/prompts/templates/linkedin_post.yaml index 7c3a598..e98adb2 100644 --- a/packages/gateway/prompts/templates/linkedin_post.yaml +++ b/packages/gateway/prompts/templates/linkedin_post.yaml @@ -1,63 +1,105 @@ id: linkedin_post -version: "1.0.0" +version: "2.0.0" task_type: linkedin_post +description: "LinkedIn teaser in Rene Fichtmueller's voice. Anti-AI, anti-marketing, technical, direct." system_prompt: | - You are a professional LinkedIn content writer. Write engaging, authentic posts that sound human. + You write a single short LinkedIn post in Rene Fichtmueller's voice. Rene is a network/optics engineer who blogs at blog.fichtmueller.org. His voice is direct, technical, sometimes contrarian, never marketing. - Rules: - - Maximum 1300 characters (LinkedIn soft limit) - - No hashtag spam (max 3 relevant hashtags) - - No engagement-bait questions at the end - - No "In today's fast-paced world" openings - - Write in first person, direct and confident tone - - Include a clear value point or insight - - Current date: {{current_date}} + HARD RULES — do not violate: + - 2 to 3 short sentences. Maximum 4. Period. + - No hashtags. None. Not at the end, not anywhere. + - No emojis. Not even one. + - No engagement-bait. Do not end with "What do you think?", "Thoughts?", "Have you seen this?". + - No call-to-action language ("Check it out", "Read more", "Don't miss"). + - No meta-references to the blog post itself: do not write "I wrote about this", "I published a piece", "I broke this down", "more in the article". + - End with the URL on its own line. Nothing after the URL. + + BANNED PHRASES — never use any of these: + - delve, leverage, robust, journey, embark, paradigm, unlock, seamlessly, holistic, harness, foster, amplify, underscore, indelible, profound, intricate, meticulous, testament, vibrant, bespoke, encompass, hitherto, realm, utilize, synergy + - "leaving money on the table" + - "until it's too late" + - "the line item most X skip" + - "turns out" + - "the unexpected part is" + - "the gap between X and Y is wider than" + - "in today's fast-paced", "in the world of", "in the realm of" + - "it's important to note", "it's worth noting" + - "let's dive into", "let's explore" + - "the future of X", "the next generation of X" (unless quoting someone) + - "game-changer", "cutting-edge", "groundbreaking", "comprehensive" + + TONE — match these traits: + - Specific numbers over generalities. 20W is better than "high power". 14 weeks is better than "long lead time". + - Named products, standards, RFCs when relevant. 400ZR+, RPKI, IEEE 802.3. + - First person ("I", "my", "we") where genuine. + - Short sentences. Period. Short sentences. Period. + - Concession sometimes: admit what you don't know or what surprised you. + - Closing line stands on its own. No qualifier, no hedge. + + Current date: {{current_date}} {{few_shot_examples}} system_prompt_de: | - Du bist ein professioneller LinkedIn-Content-Writer. Schreibe authentische, menschlich klingende Beiträge. + Du schreibst einen kurzen LinkedIn-Post in der Stimme von Rene Fichtmueller. Direkt, technisch, manchmal contrarian, nie Marketing. - Regeln: - - Maximal 1300 Zeichen (LinkedIn Soft-Limit) - - Keine Hashtag-Spam (max. 3 relevante Hashtags) - - Keine Engagement-Bait-Fragen am Ende - - Keine Einstiege mit "In der heutigen schnelllebigen Welt" - - Schreibe in der Ich-Perspektive, direkt und selbstsicher - - Enthalte einen klaren Mehrwert oder Einblick - - Aktuelles Datum: {{current_date}} + HARTE REGELN — nie verletzen: + - 2 bis 3 kurze Sätze. Maximal 4. Punkt. + - Keine Hashtags. Keine. Nirgendwo. + - Keine Emojis. Auch nicht einer. + - Kein Engagement-Bait. Niemals enden mit "Was meint ihr?", "Eure Erfahrung?". + - Keine Call-to-Action-Sprache ("Schaut mal rein", "Hier mehr lesen"). + - Keine Meta-Referenzen auf den Blog-Post: kein "Ich habe dazu geschrieben", "Mehr im Artikel". + - URL alleine in der letzten Zeile. Nichts danach. + + VERBOTENE WORTE/PHRASEN: + - "leverage", "delve", "robust", "harness", "navigieren", "Reise", "Paradigma", "freischalten", "ganzheitlich", "Synergie", "umfassend" + - "in der heutigen schnelllebigen Welt" + - "es lohnt sich zu erwähnen" + + TON: + - Konkrete Zahlen statt Verallgemeinerungen. + - Erste Person wo authentisch. + - Kurze Sätze. Punkt. + - Konzession wo möglich: zugeben was überrascht hat. + + Aktuelles Datum: {{current_date}} user_template: | - Topic/Input: + Article info: {{input}} - Context: - {{user_context}} - - Write a LinkedIn post: + Write the LinkedIn post now. 2-3 sentences. No hashtags. URL on last line. Stop after the URL. user_template_de: | - Thema/Eingabe: + Artikel-Infos: {{input}} - Kontext: - {{user_context}} - - Schreibe einen LinkedIn-Beitrag: + Schreibe den LinkedIn-Post jetzt. 2-3 Sätze. Keine Hashtags. URL in der letzten Zeile. Stopp nach der URL. few_shot_examples: - - user: "We deployed a new BGP monitoring system last week" + - user: "Article info:\nTitle: BGP Route Leak Hit My Network Last Tuesday\nExcerpt: A transit provider re-advertised our customer cone to the wrong peers. Detection took 4 hours. The mitigation took 14 minutes once we saw it.\nURL: https://blog.fichtmueller.org/example1/" assistant: | - We spent 18 months dealing with BGP route leaks that cost us 4 hours of network visibility per incident. + A transit provider re-advertised our customer cone to the wrong peers last Tuesday. We caught it after 4 hours. The fix took 14 minutes once we saw it — the cost was the 4 hours we didn't. - Last week we deployed our own monitoring system. First result: caught a route leak from a transit provider within 3 minutes, before any customer noticed. + https://blog.fichtmueller.org/example1/ - The fix wasn't sophisticated. It was systematic alerting on RPKI invalid routes combined with PeeringDB cross-referencing. + - user: "Article info:\nTitle: 400ZR Reach Math Nobody Shows You\nExcerpt: Datasheets quote a maximum reach number. That number is for a clean link with margin you don't have.\nURL: https://blog.fichtmueller.org/example2/" + assistant: | + The 400ZR datasheet number is a ceiling, not a target. Real-world links have eight connectors, two amplifier hops, and aged fiber — the spec assumes none of that. - If you're running a mid-size ISP and still relying on customer tickets to discover routing issues, that's the gap worth closing. + Build to the budget you actually have, not the one the marketing slide showed you. - #BGP #NetworkEngineering #ISP + https://blog.fichtmueller.org/example2/ + + - user: "Article info:\nTitle: ADHD Diary #008: The 47-Tabs Problem\nExcerpt: I closed 47 browser tabs Tuesday. By Wednesday I had 51 again. This is not a tool problem.\nURL: https://blog.fichtmueller.org/example3/" + assistant: | + I closed 47 browser tabs on Tuesday. By Wednesday morning I had 51 again. + + This isn't a tool problem. Every tab is a "I might do this later" promise I made to myself. The fix isn't a better tab manager — it's saying no. + + https://blog.fichtmueller.org/example3/ variables: - input diff --git a/packages/gateway/public/dashboard.html b/packages/gateway/public/dashboard.html index ca6958e..7d231f0 100644 --- a/packages/gateway/public/dashboard.html +++ b/packages/gateway/public/dashboard.html @@ -1,6 +1,10 @@ + llm.gateway / workbench @@ -127,13 +131,11 @@ /* ─── Tab navigation ──────────────────────────────────────────────────── */ .tabs { display: flex; + flex-wrap: wrap; gap: 0; border-bottom: 1px solid var(--line); margin: 0 0 28px; - overflow-x: auto; - scrollbar-width: none; } - .tabs::-webkit-scrollbar { display: none; } .tab-trigger { background: none; border: none; @@ -498,7 +500,7 @@ body.hide-empty-providers .wallet-card[data-status="unknown"] { display: none; } /* In Simple Mode, hide the noisy "5-axis" header explainer */ - body.simple-mode .h-section .h-meta:contains('Lean-CTX') { display: none; } + body.simple-mode .h-section .h-meta:contains('LLM Gateway') { display: none; } /* ─── Hero (Buddy + Savings + Cost-VS) ───────────────────────────────── */ .hero-grid { @@ -1066,6 +1068,99 @@ font-size: 0.8rem; } + /* ─── Discover Panel ──────────────────────────────────────────────── */ + .discover-grid { + display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); + gap: 12px; margin-bottom: 16px; + } + .discover-card { + border: 1px solid var(--line-2); + border-radius: 10px; + padding: 12px 14px; + background: var(--surface-1, rgba(255,255,255,0.02)); + } + .discover-card-title { + font-size: 0.72rem; color: var(--text-muted, #888); + text-transform: uppercase; letter-spacing: 0.08em; + margin-bottom: 4px; + } + .discover-card-stat { + font-family: var(--mono); font-size: 1.4rem; + color: var(--accent); margin-bottom: 8px; + } + .discover-card-list { + list-style: none; padding: 0; margin: 0; + font-size: 0.78rem; font-family: var(--mono); + } + .discover-card-list li { + padding: 4px 0; + border-top: 1px solid var(--line-1, rgba(255,255,255,0.05)); + display: flex; justify-content: space-between; align-items: center; + } + .discover-card-list li:first-child { border-top: none; } + .discover-card-list .disc-ok { color: var(--accent); } + .discover-card-list .disc-no { color: var(--text-muted, #888); opacity: 0.6; } + + /* ─── API Tab ──────────────────────────────────────────────────────── */ + .api-card { + border: 1px solid var(--line-2); + border-radius: 10px; + padding: 14px 16px; + margin-bottom: 14px; + background: var(--surface-1, rgba(255,255,255,0.02)); + } + .api-card-head { + display: flex; align-items: center; gap: 10px; flex-wrap: wrap; + margin-bottom: 10px; + } + .api-method { + font-family: var(--mono); font-size: 0.7rem; font-weight: 700; + padding: 3px 8px; border-radius: 4px; + background: var(--accent); color: #fff; letter-spacing: 0.05em; + } + .api-path { + font-family: var(--mono); font-size: 0.92rem; + color: var(--text); + } + .api-tag { + font-size: 0.72rem; color: var(--text-muted, #888); + font-style: italic; flex: 1; + } + .api-snippet { + font-family: var(--mono); font-size: 0.8rem; + background: var(--surface-2, rgba(0,0,0,0.25)); + border: 1px solid var(--line-1, rgba(255,255,255,0.05)); + padding: 12px 14px; border-radius: 6px; + overflow-x: auto; white-space: pre; + color: var(--text); margin: 0; + } + .api-snippet code { background: transparent; padding: 0; } + .api-copy { padding: 4px 12px; font-size: 0.7rem; } + + .api-tryout { + border: 1px solid var(--line-2); + border-radius: 10px; + padding: 14px 16px; + background: var(--surface-1, rgba(255,255,255,0.02)); + } + .api-tryout-row { display: flex; flex-wrap: wrap; align-items: center; } + + .api-bridge-table-wrap { overflow-x: auto; border: 1px solid var(--line-2); border-radius: 10px; } + .api-bridge-table { + width: 100%; border-collapse: collapse; font-size: 0.85rem; + } + .api-bridge-table th, .api-bridge-table td { + padding: 10px 12px; text-align: left; border-bottom: 1px solid var(--line-1, rgba(255,255,255,0.05)); + } + .api-bridge-table th { + font-weight: 600; color: var(--text-muted, #888); + text-transform: uppercase; letter-spacing: 0.05em; font-size: 0.72rem; + } + .api-bridge-table tr:last-child td { border-bottom: none; } + .api-bridge-status { font-family: var(--mono); font-size: 0.78rem; } + .api-bridge-status.ok { color: var(--accent); } + .api-bridge-status.err { color: #e34; } + /* ─── Buttons ────────────────────────────────────────────────────────── */ .btn { font-family: var(--mono); @@ -1338,6 +1433,7 @@ + @@ -1356,7 +1452,7 @@
0tokens
⚡ Gateway (LLM calls)0
- +
@@ -1392,8 +1488,8 @@
- -

Savings Sources we measure 5 axes — Lean-CTX measures 1

+ +

Savings Sources 5 measurement axes across all calls

loading
@@ -1472,8 +1568,34 @@ auto-gateway detection only — installed CLI subscriptions are wrapped into HTTP bridges and exposed via /v1/chat/completions - +
+ + +
+ + + +
discovering installed subscriptions
@@ -1567,6 +1689,11 @@
0%
hits ÷ total req
+
+
compressed since last restart
+
0
+
— · — ops · since —
+

Top Caching Callers most savings

@@ -1675,6 +1802,136 @@
Tip: in the report window, press Cmd/Ctrl+P → "Save as PDF". The report is fully styled for A4 print.
+ +
+

API Reference all endpoints route through compression + caller tracking

+ +
+ The LLM Gateway exposes three POST endpoints and one GET. Every call is logged in + activity, compressed when input ≥ 700 tokens, and routed via routing-rules.yaml + to the right subscription bridge (Claude Code, ChatGPT, Copilot, M365 Copilot, Codex) or local Ollama. +
+ + +
+
+ POST + /v1/chat/completions + OpenAI-compatible · works with `openai` SDK + +
+
curl https://llm-gateway.context-x.org/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "messages": [{"role": "user", "content": "hi"}]
+  }'
+
+ + +
+
+ POST + /v1/messages + Anthropic-compatible · works with `@anthropic-ai/sdk` + +
+
curl https://llm-gateway.context-x.org/v1/messages \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "messages": [{"role": "user", "content": "hi"}],
+    "max_tokens": 1024
+  }'
+
+ + +
+
+ POST + /v1/completion + native — full caller-tracking + compression options + +
+
curl https://llm-gateway.context-x.org/v1/completion \
+  -H "Content-Type: application/json" \
+  -d '{
+    "caller": "my-app",
+    "task_type": "generic_qa",
+    "input": "your prompt here",
+    "options": { "compression": { "enabled": true, "mode": "auto" } }
+  }'
+
+ + +
+
+ GET + /v1/models + list every model the gateway can route to + +
+
curl https://llm-gateway.context-x.org/v1/models
+
+ + +

Try it out live POST against the gateway

+
+
+ + +
+ +
+ + +
+ +
+ + +

Model → Bridge Mapping which subscription each model alias routes to

+
+ + + + + + + + + + + + + + + + + + +
Model aliasBridgeSubscription usedPortStatus
claude-sonnet-4.6, claude-haiku, claude-opusclaude-bridgeClaude Code Max (OAuth)3250
gpt-4o, gpt-4.1, gpt-5.xopenai-bridgeChatGPT Plus / Pro3251
copilot-gpt-4o, copilot-claude-3.7copilot-bridgeGitHub Copilot3252
codex-mini, gpt-5.1-codex-minicodex-bridgeOpenAI Codex CLI3253
m365-copilotm365-copilot-bridgeMicrosoft 365 Copilot3257
qwen2.5:3b / 7b / 14b / 32b, magatama:32b, magatama-coderollama (Mac Studio)local — no cost11434
+
+ +
+