chore: commit deployed gateway state (dashboard, streaming, routing, bridges, cost-tracking)
Live production state on Erik that had drifted from Gitea — deployed across several sessions but never committed. Excludes deploy/ecosystem.config.cjs (holds live tokens). - dashboard: passive usage-report endpoint, per-device entries, CEST timezone, cost-panel rounding - completion: SSE + HTTP/2 streaming - pipeline: routing-rules, request-scorer, external-providers (subscription bridges) - cost-tracking: tokenvault migration, cost-calculator, request-logger - infra: docker-compose bridge env, server/health/tls, deps
This commit is contained in:
parent
91384dbb2a
commit
0191c60b64
@ -36,7 +36,6 @@ COPY --from=builder /app/packages/gateway/dist ./packages/gateway/dist
|
|||||||
|
|
||||||
# Copy production node_modules
|
# Copy production node_modules
|
||||||
COPY --from=builder /app/node_modules ./node_modules
|
COPY --from=builder /app/node_modules ./node_modules
|
||||||
COPY --from=builder /app/packages/gateway/node_modules ./packages/gateway/node_modules 2>/dev/null || true
|
|
||||||
|
|
||||||
# Copy runtime assets (prompt templates, config)
|
# Copy runtime assets (prompt templates, config)
|
||||||
COPY packages/gateway/prompts ./packages/gateway/prompts
|
COPY packages/gateway/prompts ./packages/gateway/prompts
|
||||||
|
|||||||
@ -4,15 +4,31 @@ services:
|
|||||||
container_name: llm-gateway
|
container_name: llm-gateway
|
||||||
ports:
|
ports:
|
||||||
- "3100:3100"
|
- "3100:3100"
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
environment:
|
environment:
|
||||||
NODE_ENV: production
|
NODE_ENV: production
|
||||||
PORT: "3100"
|
PORT: "3100"
|
||||||
DATABASE_URL: "${DATABASE_URL}"
|
DATABASE_URL: "${DATABASE_URL}"
|
||||||
TIP_DATABASE_URL: "${TIP_DATABASE_URL}"
|
TIP_DATABASE_URL: "${TIP_DATABASE_URL}"
|
||||||
OLLAMA_URL: "http://192.168.178.169:11434"
|
OLLAMA_URL: "http://192.168.178.169:11434"
|
||||||
|
OLLAMA_BASE_URL: "${OLLAMA_BASE_URL:-https://ollama.fichtmueller.org}"
|
||||||
|
CLAUDE_BRIDGE_ENABLED: "true"
|
||||||
|
CLAUDE_BRIDGE_URL: "${CLAUDE_BRIDGE_URL:-http://host.docker.internal:3250}"
|
||||||
|
CLAUDE_CODE_URL: "${CLAUDE_CODE_URL:-http://host.docker.internal:3250}"
|
||||||
|
OPENAI_BRIDGE_URL: "${OPENAI_BRIDGE_URL:-http://host.docker.internal:3251}"
|
||||||
|
CHATGPT_BRIDGE_URL: "${CHATGPT_BRIDGE_URL:-http://host.docker.internal:3251}"
|
||||||
|
COPILOT_BRIDGE_URL: "${COPILOT_BRIDGE_URL:-http://host.docker.internal:3252}"
|
||||||
|
GEMINI_BRIDGE_URL: "${GEMINI_BRIDGE_URL:-http://host.docker.internal:3254}"
|
||||||
|
CODEX_BRIDGE_URL: "${CODEX_BRIDGE_URL:-http://host.docker.internal:3253}"
|
||||||
|
OPENAI_CODEX_URL: "${OPENAI_CODEX_URL:-http://host.docker.internal:3253}"
|
||||||
|
AIDER_BRIDGE_URL: "${AIDER_BRIDGE_URL:-http://host.docker.internal:3256}"
|
||||||
SHIELDX_URL: "${SHIELDX_URL:-}"
|
SHIELDX_URL: "${SHIELDX_URL:-}"
|
||||||
GITEA_URL: "http://gitea.context-x.org"
|
GITEA_URL: "http://gitea.context-x.org"
|
||||||
LOG_LEVEL: "${LOG_LEVEL:-info}"
|
LOG_LEVEL: "${LOG_LEVEL:-info}"
|
||||||
|
DASHBOARD_AUTH_TOKEN: "${DASHBOARD_AUTH_TOKEN:-}"
|
||||||
|
REFERENCE_INPUT_COST_PER_1K: "${REFERENCE_INPUT_COST_PER_1K:-0.005}"
|
||||||
|
REFERENCE_OUTPUT_COST_PER_1K: "${REFERENCE_OUTPUT_COST_PER_1K:-0.015}"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "wget", "-q", "-O-", "http://localhost:3100/health/live"]
|
test: ["CMD", "wget", "-q", "-O-", "http://localhost:3100/health/live"]
|
||||||
|
|||||||
57
package-lock.json
generated
57
package-lock.json
generated
@ -11,10 +11,10 @@
|
|||||||
"packages/*"
|
"packages/*"
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"jose": "^6.2.2"
|
"jose": "^6.2.3"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"../../../shieldx": {
|
"../../shieldx": {
|
||||||
"extraneous": true
|
"extraneous": true
|
||||||
},
|
},
|
||||||
"node_modules/@esbuild/darwin-arm64": {
|
"node_modules/@esbuild/darwin-arm64": {
|
||||||
@ -305,6 +305,10 @@
|
|||||||
"resolved": "packages/codex-lsp-adapter",
|
"resolved": "packages/codex-lsp-adapter",
|
||||||
"link": true
|
"link": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@llm-gateway/companion": {
|
||||||
|
"resolved": "packages/companion",
|
||||||
|
"link": true
|
||||||
|
},
|
||||||
"node_modules/@llm-gateway/ctx-health": {
|
"node_modules/@llm-gateway/ctx-health": {
|
||||||
"resolved": "packages/ctx-health",
|
"resolved": "packages/ctx-health",
|
||||||
"link": true
|
"link": true
|
||||||
@ -321,6 +325,10 @@
|
|||||||
"resolved": "packages/learning-integration",
|
"resolved": "packages/learning-integration",
|
||||||
"link": true
|
"link": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@llm-gateway/mcp-server": {
|
||||||
|
"resolved": "packages/mcp-server",
|
||||||
|
"link": true
|
||||||
|
},
|
||||||
"node_modules/@llm-gateway/prompt-optimizer": {
|
"node_modules/@llm-gateway/prompt-optimizer": {
|
||||||
"resolved": "packages/prompt-optimizer",
|
"resolved": "packages/prompt-optimizer",
|
||||||
"link": true
|
"link": true
|
||||||
@ -1127,6 +1135,8 @@
|
|||||||
},
|
},
|
||||||
"node_modules/fastify-plugin": {
|
"node_modules/fastify-plugin": {
|
||||||
"version": "5.1.0",
|
"version": "5.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/fastify-plugin/-/fastify-plugin-5.1.0.tgz",
|
||||||
|
"integrity": "sha512-FAIDA8eovSt5qcDgcBvDuX/v0Cjz0ohGhENZ/wpc3y+oZCY2afZ9Baqql3g/lC+OHRnciQol4ww7tuthOb9idw==",
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
"type": "github",
|
"type": "github",
|
||||||
@ -1475,9 +1485,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/jose": {
|
"node_modules/jose": {
|
||||||
"version": "6.2.2",
|
"version": "6.2.3",
|
||||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz",
|
||||||
"integrity": "sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==",
|
"integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"funding": {
|
"funding": {
|
||||||
"url": "https://github.com/sponsors/panva"
|
"url": "https://github.com/sponsors/panva"
|
||||||
@ -3178,6 +3188,21 @@
|
|||||||
"node": ">=0.4"
|
"node": ">=0.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/yaml": {
|
||||||
|
"version": "2.9.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.9.0.tgz",
|
||||||
|
"integrity": "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==",
|
||||||
|
"license": "ISC",
|
||||||
|
"bin": {
|
||||||
|
"yaml": "bin.mjs"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 14.6"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/eemeli"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/yocto-queue": {
|
"node_modules/yocto-queue": {
|
||||||
"version": "1.2.2",
|
"version": "1.2.2",
|
||||||
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
|
||||||
@ -4086,6 +4111,16 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"packages/companion": {
|
||||||
|
"name": "@llm-gateway/companion",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"bin": {
|
||||||
|
"llm-gateway-companion": "bin/llm-gateway-companion.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
"packages/ctx-health": {
|
"packages/ctx-health": {
|
||||||
"name": "@llm-gateway/ctx-health",
|
"name": "@llm-gateway/ctx-health",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
@ -4114,6 +4149,7 @@
|
|||||||
"@fastify/static": "^8.3.0",
|
"@fastify/static": "^8.3.0",
|
||||||
"ajv": "^8.17.1",
|
"ajv": "^8.17.1",
|
||||||
"fastify": "^5.8.5",
|
"fastify": "^5.8.5",
|
||||||
|
"fastify-plugin": "^5.1.0",
|
||||||
"franc": "^6.2.0",
|
"franc": "^6.2.0",
|
||||||
"jose": "^5.4.0",
|
"jose": "^5.4.0",
|
||||||
"js-yaml": "^4.1.0",
|
"js-yaml": "^4.1.0",
|
||||||
@ -4122,6 +4158,7 @@
|
|||||||
"pg-boss": "^10.1.3",
|
"pg-boss": "^10.1.3",
|
||||||
"pino": "^9.5.0",
|
"pino": "^9.5.0",
|
||||||
"prom-client": "^15.1.3",
|
"prom-client": "^15.1.3",
|
||||||
|
"yaml": "^2.9.0",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@ -4448,6 +4485,16 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"packages/mcp-server": {
|
||||||
|
"name": "@llm-gateway/mcp-server",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"bin": {
|
||||||
|
"llm-gateway-mcp": "bin/llm-gateway-mcp.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
"packages/prompt-optimizer": {
|
"packages/prompt-optimizer": {
|
||||||
"name": "@llm-gateway/prompt-optimizer",
|
"name": "@llm-gateway/prompt-optimizer",
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
|
|||||||
@ -18,6 +18,6 @@
|
|||||||
"ctx-health:dev": "npm run dev --workspace=packages/ctx-health"
|
"ctx-health:dev": "npm run dev --workspace=packages/ctx-health"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"jose": "^6.2.2"
|
"jose": "^6.2.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,7 +7,8 @@
|
|||||||
"build": "tsc && npm run build:copy-assets",
|
"build": "tsc && npm run build:copy-assets",
|
||||||
"build:copy-assets": "mkdir -p dist/db/migrations dist/config dist/public && cp -r src/db/migrations/*.sql dist/db/migrations/ 2>/dev/null || true && cp -r src/config/*.yaml dist/config/ 2>/dev/null || true && cp -r public/* dist/public/ 2>/dev/null || true",
|
"build:copy-assets": "mkdir -p dist/db/migrations dist/config dist/public && cp -r src/db/migrations/*.sql dist/db/migrations/ 2>/dev/null || true && cp -r src/config/*.yaml dist/config/ 2>/dev/null || true && cp -r public/* dist/public/ 2>/dev/null || true",
|
||||||
"start": "node dist/server.js",
|
"start": "node dist/server.js",
|
||||||
"test": "vitest"
|
"test": "vitest",
|
||||||
|
"prestart": "node scripts/check-build-drift.mjs"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@fastify/cors": "^10.1.0",
|
"@fastify/cors": "^10.1.0",
|
||||||
@ -16,6 +17,7 @@
|
|||||||
"@fastify/static": "^8.3.0",
|
"@fastify/static": "^8.3.0",
|
||||||
"ajv": "^8.17.1",
|
"ajv": "^8.17.1",
|
||||||
"fastify": "^5.8.5",
|
"fastify": "^5.8.5",
|
||||||
|
"fastify-plugin": "^5.1.0",
|
||||||
"franc": "^6.2.0",
|
"franc": "^6.2.0",
|
||||||
"jose": "^5.4.0",
|
"jose": "^5.4.0",
|
||||||
"js-yaml": "^4.1.0",
|
"js-yaml": "^4.1.0",
|
||||||
@ -24,6 +26,7 @@
|
|||||||
"pg-boss": "^10.1.3",
|
"pg-boss": "^10.1.3",
|
||||||
"pino": "^9.5.0",
|
"pino": "^9.5.0",
|
||||||
"prom-client": "^15.1.3",
|
"prom-client": "^15.1.3",
|
||||||
|
"yaml": "^2.9.0",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
@ -1,63 +1,105 @@
|
|||||||
id: linkedin_post
|
id: linkedin_post
|
||||||
version: "1.0.0"
|
version: "2.0.0"
|
||||||
task_type: linkedin_post
|
task_type: linkedin_post
|
||||||
|
description: "LinkedIn teaser in Rene Fichtmueller's voice. Anti-AI, anti-marketing, technical, direct."
|
||||||
|
|
||||||
system_prompt: |
|
system_prompt: |
|
||||||
You are a professional LinkedIn content writer. Write engaging, authentic posts that sound human.
|
You write a single short LinkedIn post in Rene Fichtmueller's voice. Rene is a network/optics engineer who blogs at blog.fichtmueller.org. His voice is direct, technical, sometimes contrarian, never marketing.
|
||||||
|
|
||||||
Rules:
|
HARD RULES — do not violate:
|
||||||
- Maximum 1300 characters (LinkedIn soft limit)
|
- 2 to 3 short sentences. Maximum 4. Period.
|
||||||
- No hashtag spam (max 3 relevant hashtags)
|
- No hashtags. None. Not at the end, not anywhere.
|
||||||
- No engagement-bait questions at the end
|
- No emojis. Not even one.
|
||||||
- No "In today's fast-paced world" openings
|
- No engagement-bait. Do not end with "What do you think?", "Thoughts?", "Have you seen this?".
|
||||||
- Write in first person, direct and confident tone
|
- No call-to-action language ("Check it out", "Read more", "Don't miss").
|
||||||
- Include a clear value point or insight
|
- No meta-references to the blog post itself: do not write "I wrote about this", "I published a piece", "I broke this down", "more in the article".
|
||||||
- Current date: {{current_date}}
|
- End with the URL on its own line. Nothing after the URL.
|
||||||
|
|
||||||
|
BANNED PHRASES — never use any of these:
|
||||||
|
- delve, leverage, robust, journey, embark, paradigm, unlock, seamlessly, holistic, harness, foster, amplify, underscore, indelible, profound, intricate, meticulous, testament, vibrant, bespoke, encompass, hitherto, realm, utilize, synergy
|
||||||
|
- "leaving money on the table"
|
||||||
|
- "until it's too late"
|
||||||
|
- "the line item most X skip"
|
||||||
|
- "turns out"
|
||||||
|
- "the unexpected part is"
|
||||||
|
- "the gap between X and Y is wider than"
|
||||||
|
- "in today's fast-paced", "in the world of", "in the realm of"
|
||||||
|
- "it's important to note", "it's worth noting"
|
||||||
|
- "let's dive into", "let's explore"
|
||||||
|
- "the future of X", "the next generation of X" (unless quoting someone)
|
||||||
|
- "game-changer", "cutting-edge", "groundbreaking", "comprehensive"
|
||||||
|
|
||||||
|
TONE — match these traits:
|
||||||
|
- Specific numbers over generalities. 20W is better than "high power". 14 weeks is better than "long lead time".
|
||||||
|
- Named products, standards, RFCs when relevant. 400ZR+, RPKI, IEEE 802.3.
|
||||||
|
- First person ("I", "my", "we") where genuine.
|
||||||
|
- Short sentences. Period. Short sentences. Period.
|
||||||
|
- Concession sometimes: admit what you don't know or what surprised you.
|
||||||
|
- Closing line stands on its own. No qualifier, no hedge.
|
||||||
|
|
||||||
|
Current date: {{current_date}}
|
||||||
|
|
||||||
{{few_shot_examples}}
|
{{few_shot_examples}}
|
||||||
|
|
||||||
system_prompt_de: |
|
system_prompt_de: |
|
||||||
Du bist ein professioneller LinkedIn-Content-Writer. Schreibe authentische, menschlich klingende Beiträge.
|
Du schreibst einen kurzen LinkedIn-Post in der Stimme von Rene Fichtmueller. Direkt, technisch, manchmal contrarian, nie Marketing.
|
||||||
|
|
||||||
Regeln:
|
HARTE REGELN — nie verletzen:
|
||||||
- Maximal 1300 Zeichen (LinkedIn Soft-Limit)
|
- 2 bis 3 kurze Sätze. Maximal 4. Punkt.
|
||||||
- Keine Hashtag-Spam (max. 3 relevante Hashtags)
|
- Keine Hashtags. Keine. Nirgendwo.
|
||||||
- Keine Engagement-Bait-Fragen am Ende
|
- Keine Emojis. Auch nicht einer.
|
||||||
- Keine Einstiege mit "In der heutigen schnelllebigen Welt"
|
- Kein Engagement-Bait. Niemals enden mit "Was meint ihr?", "Eure Erfahrung?".
|
||||||
- Schreibe in der Ich-Perspektive, direkt und selbstsicher
|
- Keine Call-to-Action-Sprache ("Schaut mal rein", "Hier mehr lesen").
|
||||||
- Enthalte einen klaren Mehrwert oder Einblick
|
- Keine Meta-Referenzen auf den Blog-Post: kein "Ich habe dazu geschrieben", "Mehr im Artikel".
|
||||||
- Aktuelles Datum: {{current_date}}
|
- URL alleine in der letzten Zeile. Nichts danach.
|
||||||
|
|
||||||
|
VERBOTENE WORTE/PHRASEN:
|
||||||
|
- "leverage", "delve", "robust", "harness", "navigieren", "Reise", "Paradigma", "freischalten", "ganzheitlich", "Synergie", "umfassend"
|
||||||
|
- "in der heutigen schnelllebigen Welt"
|
||||||
|
- "es lohnt sich zu erwähnen"
|
||||||
|
|
||||||
|
TON:
|
||||||
|
- Konkrete Zahlen statt Verallgemeinerungen.
|
||||||
|
- Erste Person wo authentisch.
|
||||||
|
- Kurze Sätze. Punkt.
|
||||||
|
- Konzession wo möglich: zugeben was überrascht hat.
|
||||||
|
|
||||||
|
Aktuelles Datum: {{current_date}}
|
||||||
|
|
||||||
user_template: |
|
user_template: |
|
||||||
Topic/Input:
|
Article info:
|
||||||
{{input}}
|
{{input}}
|
||||||
|
|
||||||
Context:
|
Write the LinkedIn post now. 2-3 sentences. No hashtags. URL on last line. Stop after the URL.
|
||||||
{{user_context}}
|
|
||||||
|
|
||||||
Write a LinkedIn post:
|
|
||||||
|
|
||||||
user_template_de: |
|
user_template_de: |
|
||||||
Thema/Eingabe:
|
Artikel-Infos:
|
||||||
{{input}}
|
{{input}}
|
||||||
|
|
||||||
Kontext:
|
Schreibe den LinkedIn-Post jetzt. 2-3 Sätze. Keine Hashtags. URL in der letzten Zeile. Stopp nach der URL.
|
||||||
{{user_context}}
|
|
||||||
|
|
||||||
Schreibe einen LinkedIn-Beitrag:
|
|
||||||
|
|
||||||
few_shot_examples:
|
few_shot_examples:
|
||||||
- user: "We deployed a new BGP monitoring system last week"
|
- user: "Article info:\nTitle: BGP Route Leak Hit My Network Last Tuesday\nExcerpt: A transit provider re-advertised our customer cone to the wrong peers. Detection took 4 hours. The mitigation took 14 minutes once we saw it.\nURL: https://blog.fichtmueller.org/example1/"
|
||||||
assistant: |
|
assistant: |
|
||||||
We spent 18 months dealing with BGP route leaks that cost us 4 hours of network visibility per incident.
|
A transit provider re-advertised our customer cone to the wrong peers last Tuesday. We caught it after 4 hours. The fix took 14 minutes once we saw it — the cost was the 4 hours we didn't.
|
||||||
|
|
||||||
Last week we deployed our own monitoring system. First result: caught a route leak from a transit provider within 3 minutes, before any customer noticed.
|
https://blog.fichtmueller.org/example1/
|
||||||
|
|
||||||
The fix wasn't sophisticated. It was systematic alerting on RPKI invalid routes combined with PeeringDB cross-referencing.
|
- user: "Article info:\nTitle: 400ZR Reach Math Nobody Shows You\nExcerpt: Datasheets quote a maximum reach number. That number is for a clean link with margin you don't have.\nURL: https://blog.fichtmueller.org/example2/"
|
||||||
|
assistant: |
|
||||||
|
The 400ZR datasheet number is a ceiling, not a target. Real-world links have eight connectors, two amplifier hops, and aged fiber — the spec assumes none of that.
|
||||||
|
|
||||||
If you're running a mid-size ISP and still relying on customer tickets to discover routing issues, that's the gap worth closing.
|
Build to the budget you actually have, not the one the marketing slide showed you.
|
||||||
|
|
||||||
#BGP #NetworkEngineering #ISP
|
https://blog.fichtmueller.org/example2/
|
||||||
|
|
||||||
|
- user: "Article info:\nTitle: ADHD Diary #008: The 47-Tabs Problem\nExcerpt: I closed 47 browser tabs Tuesday. By Wednesday I had 51 again. This is not a tool problem.\nURL: https://blog.fichtmueller.org/example3/"
|
||||||
|
assistant: |
|
||||||
|
I closed 47 browser tabs on Tuesday. By Wednesday morning I had 51 again.
|
||||||
|
|
||||||
|
This isn't a tool problem. Every tab is a "I might do this later" promise I made to myself. The fix isn't a better tab manager — it's saying no.
|
||||||
|
|
||||||
|
https://blog.fichtmueller.org/example3/
|
||||||
|
|
||||||
variables:
|
variables:
|
||||||
- input
|
- input
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
|||||||
# LLM Gateway Model Configuration
|
# LLM Gateway Model Configuration
|
||||||
# Ollama base URL: http://192.168.178.169:11434
|
# Ollama base URL: http://192.168.178.169:11434
|
||||||
|
|
||||||
ollama_base_url: "https://ollama.fichtmueller.org"
|
ollama_base_url: "http://127.0.0.1:11434"
|
||||||
|
|
||||||
tiers:
|
tiers:
|
||||||
fast:
|
fast:
|
||||||
@ -26,7 +26,7 @@ models:
|
|||||||
qwen2.5:3b:
|
qwen2.5:3b:
|
||||||
tier: fast
|
tier: fast
|
||||||
context_length: 32768
|
context_length: 32768
|
||||||
strengths: [classification, short_text, routing]
|
strengths: [classification, summarization, routing]
|
||||||
max_tokens_default: 512
|
max_tokens_default: 512
|
||||||
|
|
||||||
qwen2.5:7b:
|
qwen2.5:7b:
|
||||||
@ -35,83 +35,58 @@ models:
|
|||||||
strengths: [classification, summarization, short_analysis]
|
strengths: [classification, summarization, short_analysis]
|
||||||
max_tokens_default: 1024
|
max_tokens_default: 1024
|
||||||
|
|
||||||
phi3.5:3.8b:
|
qwen2.5:7b-instruct:
|
||||||
tier: fast
|
tier: fast
|
||||||
context_length: 128000
|
context_length: 32768
|
||||||
strengths: [classification, summarization]
|
strengths: [classification, summarization, short_analysis]
|
||||||
|
max_tokens_default: 1024
|
||||||
|
|
||||||
|
qwen2.5-coder:7b-instruct:
|
||||||
|
tier: fast
|
||||||
|
context_length: 32768
|
||||||
|
strengths: [code_generation, technical_analysis, routing]
|
||||||
max_tokens_default: 512
|
max_tokens_default: 512
|
||||||
|
|
||||||
# ─── MAGATAMA — Fine-tuned Security Intelligence (Context X) ─────────────────
|
# ─── MAGATAMA — Fine-tuned Security Intelligence (Context X) ─────────────────
|
||||||
magatama:32b:
|
magatama:32b:
|
||||||
tier: large
|
tier: large
|
||||||
context_length: 131072
|
context_length: 131072
|
||||||
strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting]
|
strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting, complex_writing, deep_analysis, technical]
|
||||||
max_tokens_default: 4096
|
max_tokens_default: 4096
|
||||||
description: "MAGATAMA まがたま — TEPPEKI 7-pillar security AI, fine-tuned on Qwen2.5-32B"
|
description: "MAGATAMA まがたま — TEPPEKI 7-pillar security AI, fine-tuned on Qwen2.5-32B"
|
||||||
|
|
||||||
# Custom fine-tuned models (Context X)
|
|
||||||
ctxhealer:latest:
|
|
||||||
tier: medium
|
|
||||||
context_length: 32768
|
|
||||||
strengths: [infrastructure_diagnosis, root_cause_analysis, remediation_steps]
|
|
||||||
max_tokens_default: 1024
|
|
||||||
|
|
||||||
llama-guard3:1b:
|
|
||||||
tier: fast
|
|
||||||
context_length: 8192
|
|
||||||
strengths: [safety_classification, threat_detection]
|
|
||||||
max_tokens_default: 256
|
|
||||||
|
|
||||||
# Medium tier
|
# Medium tier
|
||||||
qwen2.5:14b:
|
qwen2.5:14b:
|
||||||
tier: medium
|
tier: medium
|
||||||
context_length: 131072
|
context_length: 131072
|
||||||
strengths: [general, writing, analysis, coding]
|
strengths: [general, writing, analysis, coding, dialogue]
|
||||||
max_tokens_default: 2048
|
max_tokens_default: 2048
|
||||||
|
|
||||||
mistral:7b:
|
magatama-llm-v2-0:latest:
|
||||||
tier: medium
|
tier: medium
|
||||||
context_length: 32768
|
context_length: 131072
|
||||||
strengths: [general, writing]
|
strengths: [general, writing, analysis, coding, dialogue]
|
||||||
max_tokens_default: 2048
|
max_tokens_default: 2048
|
||||||
|
|
||||||
llama3.2:8b:
|
magatama-coder:latest:
|
||||||
tier: medium
|
|
||||||
context_length: 128000
|
|
||||||
strengths: [general, chat, analysis]
|
|
||||||
max_tokens_default: 2048
|
|
||||||
|
|
||||||
deepseek-r1:8b:
|
|
||||||
tier: medium
|
tier: medium
|
||||||
context_length: 65536
|
context_length: 65536
|
||||||
strengths: [reasoning, analysis, coding]
|
strengths: [code_generation, technical_analysis, debugging]
|
||||||
max_tokens_default: 2048
|
max_tokens_default: 2048
|
||||||
|
|
||||||
# Large tier
|
# Large tier
|
||||||
qwen2.5:32b:
|
qwen2.5:32b:
|
||||||
tier: large
|
tier: large
|
||||||
context_length: 131072
|
context_length: 131072
|
||||||
strengths: [complex_writing, deep_analysis, technical]
|
strengths: [complex_writing, deep_analysis, technical, security_analysis]
|
||||||
max_tokens_default: 4096
|
|
||||||
|
|
||||||
llama3.3:70b:
|
|
||||||
tier: large
|
|
||||||
context_length: 128000
|
|
||||||
strengths: [complex_reasoning, long_form, research]
|
|
||||||
max_tokens_default: 4096
|
|
||||||
|
|
||||||
deepseek-r1:32b:
|
|
||||||
tier: large
|
|
||||||
context_length: 131072
|
|
||||||
strengths: [chain_of_thought, complex_reasoning]
|
|
||||||
max_tokens_default: 4096
|
max_tokens_default: 4096
|
||||||
|
|
||||||
# Fallback chains per tier
|
# Fallback chains per tier
|
||||||
fallback_chains:
|
fallback_chains:
|
||||||
fast: [qwen2.5:3b, qwen2.5:7b, phi3.5:3.8b]
|
fast: [qwen2.5:7b-instruct, qwen2.5-coder:7b-instruct]
|
||||||
medium: [qwen2.5:14b, mistral:7b, llama3.2:8b]
|
medium: [magatama-llm-v2-0:latest, magatama-coder:latest, qwen2.5:7b-instruct]
|
||||||
large: [qwen2.5:32b, llama3.3:70b, deepseek-r1:32b]
|
large: [magatama:32b, magatama-llm-v2-0:latest]
|
||||||
code_generation: [deepseek-r1:32b, qwen2.5:32b, llama3.3:70b]
|
code_generation: [magatama-coder:latest, qwen2.5-coder:7b-instruct]
|
||||||
|
|
||||||
# Cross-tier fallback when primary tier fails
|
# Cross-tier fallback when primary tier fails
|
||||||
tier_fallback:
|
tier_fallback:
|
||||||
|
|||||||
@ -1110,7 +1110,7 @@ routing_rules:
|
|||||||
|
|
||||||
# ─── CONTENT / LINKEDIN ──────────────────────────────────────────────────────
|
# ─── CONTENT / LINKEDIN ──────────────────────────────────────────────────────
|
||||||
linkedin_post:
|
linkedin_post:
|
||||||
model: qwen2.5:32b
|
model: fo-blog-v10
|
||||||
tier: large
|
tier: large
|
||||||
prompt_template: linkedin_post
|
prompt_template: linkedin_post
|
||||||
temperature: 0.7
|
temperature: 0.7
|
||||||
@ -1118,7 +1118,7 @@ routing_rules:
|
|||||||
output_format: text
|
output_format: text
|
||||||
requires_fact_check: false
|
requires_fact_check: false
|
||||||
validators: [banlist, language, length, question_closer]
|
validators: [banlist, language, length, question_closer]
|
||||||
callers: [n8n, internal]
|
callers: [n8n, internal, linkedin-distributor]
|
||||||
|
|
||||||
linkedin_comment:
|
linkedin_comment:
|
||||||
model: qwen2.5:14b
|
model: qwen2.5:14b
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
-- Purpose: Track token compression and cost analytics
|
-- Purpose: Track token compression and cost analytics
|
||||||
-- PostgreSQL compatible version (version 16+)
|
-- PostgreSQL compatible version (version 16+)
|
||||||
|
|
||||||
-- Table: Token compression metrics (LeanCTX, RTK)
|
-- Table: Token compression metrics (LLM Gateway)
|
||||||
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
|
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
file_path VARCHAR(255),
|
file_path VARCHAR(255),
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
-- Tokenvault & Cost Tracking Schema Extensions
|
-- Tokenvault & Cost Tracking Schema Extensions
|
||||||
-- Created: 2026-04-19
|
-- Created: 2026-04-19
|
||||||
-- Purpose: Track token compression (LeanCTX + RTK) and cost analytics
|
-- Purpose: Track token compression (LLM Gateway) and cost analytics
|
||||||
|
|
||||||
-- Table: Token compression metrics (LeanCTX, RTK)
|
-- Table: Token compression metrics (LLM Gateway)
|
||||||
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
|
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
file_path VARCHAR(255),
|
file_path VARCHAR(255),
|
||||||
mode VARCHAR(50), -- 'lean-aggressive', 'lean-map', 'rtk-max', etc.
|
mode VARCHAR(50), -- 'gateway-aggressive', 'gateway-map', 'gateway-trim', etc.
|
||||||
tokens_before INT,
|
tokens_before INT,
|
||||||
tokens_after INT,
|
tokens_after INT,
|
||||||
savings_pct DECIMAL(5,2),
|
savings_pct DECIMAL(5,2),
|
||||||
@ -26,7 +26,7 @@ CREATE TABLE IF NOT EXISTS cost_analytics (
|
|||||||
agent_id VARCHAR(50), -- 'claude-code', 'qwen-reviewer', etc.
|
agent_id VARCHAR(50), -- 'claude-code', 'qwen-reviewer', etc.
|
||||||
tokens_in INT,
|
tokens_in INT,
|
||||||
tokens_out INT,
|
tokens_out INT,
|
||||||
tokens_compressed INT, -- After LeanCTX + RTK
|
tokens_compressed INT, -- After LLM Gateway compression
|
||||||
cost_usd DECIMAL(10,6),
|
cost_usd DECIMAL(10,6),
|
||||||
cost_saved_usd DECIMAL(10,6),
|
cost_saved_usd DECIMAL(10,6),
|
||||||
provider VARCHAR(50), -- 'ollama', 'cerebras', 'groq', 'claude', etc.
|
provider VARCHAR(50), -- 'ollama', 'cerebras', 'groq', 'claude', etc.
|
||||||
|
|||||||
@ -109,6 +109,11 @@ export class RequestLogger {
|
|||||||
cost_usd: number;
|
cost_usd: number;
|
||||||
latency_ms: number;
|
latency_ms: number;
|
||||||
fallback_used: boolean;
|
fallback_used: boolean;
|
||||||
|
compression_mode?: string;
|
||||||
|
compression_tokens_before?: number;
|
||||||
|
compression_tokens_after?: number;
|
||||||
|
compression_tokens_saved?: number;
|
||||||
|
compression_savings_pct?: number;
|
||||||
error_message?: string;
|
error_message?: string;
|
||||||
created_at: string;
|
created_at: string;
|
||||||
}>
|
}>
|
||||||
@ -116,22 +121,35 @@ export class RequestLogger {
|
|||||||
const result = await this.db.query(
|
const result = await this.db.query(
|
||||||
`
|
`
|
||||||
SELECT
|
SELECT
|
||||||
request_id,
|
rt.request_id,
|
||||||
caller_id as caller,
|
rt.caller_id as caller,
|
||||||
task_type,
|
rt.task_type,
|
||||||
model,
|
rt.model,
|
||||||
status,
|
rt.status,
|
||||||
confidence_score,
|
rt.confidence_score,
|
||||||
tokens_in,
|
rt.tokens_in,
|
||||||
tokens_out,
|
rt.tokens_out,
|
||||||
cost_usd,
|
rt.cost_usd,
|
||||||
latency_ms,
|
rt.latency_ms,
|
||||||
fallback_used,
|
rt.fallback_used,
|
||||||
error_message,
|
tv.mode as compression_mode,
|
||||||
created_at
|
tv.tokens_before as compression_tokens_before,
|
||||||
FROM request_tracking
|
tv.tokens_after as compression_tokens_after,
|
||||||
WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)
|
GREATEST(COALESCE(tv.tokens_before, 0) - COALESCE(tv.tokens_after, 0), 0) as compression_tokens_saved,
|
||||||
ORDER BY created_at DESC
|
tv.savings_pct as compression_savings_pct,
|
||||||
|
rt.error_message,
|
||||||
|
rt.created_at
|
||||||
|
FROM request_tracking rt
|
||||||
|
LEFT JOIN LATERAL (
|
||||||
|
SELECT mode, tokens_before, tokens_after, savings_pct
|
||||||
|
FROM tokenvault_metrics
|
||||||
|
WHERE tool_used = 'gateway'
|
||||||
|
AND file_path = rt.request_id
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
) tv ON true
|
||||||
|
WHERE rt.created_at > NOW() - MAKE_INTERVAL(hours => $1)
|
||||||
|
ORDER BY rt.created_at DESC
|
||||||
LIMIT $2
|
LIMIT $2
|
||||||
`,
|
`,
|
||||||
[offsetHours, limit]
|
[offsetHours, limit]
|
||||||
@ -149,6 +167,11 @@ export class RequestLogger {
|
|||||||
cost_usd: row.cost_usd,
|
cost_usd: row.cost_usd,
|
||||||
latency_ms: row.latency_ms,
|
latency_ms: row.latency_ms,
|
||||||
fallback_used: row.fallback_used,
|
fallback_used: row.fallback_used,
|
||||||
|
compression_mode: row.compression_mode,
|
||||||
|
compression_tokens_before: row.compression_tokens_before ? parseInt(row.compression_tokens_before, 10) : undefined,
|
||||||
|
compression_tokens_after: row.compression_tokens_after ? parseInt(row.compression_tokens_after, 10) : undefined,
|
||||||
|
compression_tokens_saved: row.compression_tokens_saved ? parseInt(row.compression_tokens_saved, 10) : 0,
|
||||||
|
compression_savings_pct: row.compression_savings_pct ? parseFloat(row.compression_savings_pct) : 0,
|
||||||
error_message: row.error_message,
|
error_message: row.error_message,
|
||||||
created_at: row.created_at
|
created_at: row.created_at
|
||||||
}));
|
}));
|
||||||
@ -160,6 +183,17 @@ export class RequestLogger {
|
|||||||
async getMetrics(bucketMinutes: number = 60): Promise<{
|
async getMetrics(bucketMinutes: number = 60): Promise<{
|
||||||
total_requests: number;
|
total_requests: number;
|
||||||
total_cost: number;
|
total_cost: number;
|
||||||
|
estimated_api_cost: number;
|
||||||
|
estimated_api_cost_avoided: number;
|
||||||
|
total_tokens_in: number;
|
||||||
|
total_tokens_out: number;
|
||||||
|
total_tokens: number;
|
||||||
|
compression_operations: number;
|
||||||
|
compression_tokens_before: number;
|
||||||
|
compression_tokens_after: number;
|
||||||
|
compression_tokens_saved: number;
|
||||||
|
compression_rate: number;
|
||||||
|
cache_hit_rate: number;
|
||||||
avg_latency: number;
|
avg_latency: number;
|
||||||
success_rate: number;
|
success_rate: number;
|
||||||
avg_confidence: number;
|
avg_confidence: number;
|
||||||
@ -177,13 +211,15 @@ export class RequestLogger {
|
|||||||
`
|
`
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(*) as total_requests,
|
COUNT(*) as total_requests,
|
||||||
SUM(cost_usd) as total_cost,
|
COALESCE(SUM(cost_usd), 0) as total_cost,
|
||||||
AVG(latency_ms) as avg_latency,
|
COALESCE(SUM(tokens_in), 0) as total_tokens_in,
|
||||||
SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as success_rate,
|
COALESCE(SUM(tokens_out), 0) as total_tokens_out,
|
||||||
AVG(confidence_score) as avg_confidence,
|
COALESCE(AVG(latency_ms), 0) as avg_latency,
|
||||||
SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as fallback_percentage
|
CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as success_rate,
|
||||||
|
COALESCE(AVG(confidence_score), 0) as avg_confidence,
|
||||||
|
CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN fallback_used = true THEN 1 ELSE 0 END)::FLOAT / COUNT(*) END as fallback_percentage
|
||||||
FROM request_tracking
|
FROM request_tracking
|
||||||
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
|
WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
|
||||||
`,
|
`,
|
||||||
[bucketMinutes]
|
[bucketMinutes]
|
||||||
);
|
);
|
||||||
@ -192,7 +228,7 @@ export class RequestLogger {
|
|||||||
`
|
`
|
||||||
SELECT caller_id as caller, COUNT(*) as count
|
SELECT caller_id as caller, COUNT(*) as count
|
||||||
FROM request_tracking
|
FROM request_tracking
|
||||||
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
|
WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
|
||||||
GROUP BY caller_id
|
GROUP BY caller_id
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 5
|
LIMIT 5
|
||||||
@ -204,7 +240,7 @@ export class RequestLogger {
|
|||||||
`
|
`
|
||||||
SELECT model, COUNT(*) as count
|
SELECT model, COUNT(*) as count
|
||||||
FROM request_tracking
|
FROM request_tracking
|
||||||
WHERE created_at > NOW() - MAKE_INTERVAL(mins => $1)
|
WHERE created_at > NOW() - ($1 * INTERVAL '1 minute')
|
||||||
GROUP BY model
|
GROUP BY model
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 5
|
LIMIT 5
|
||||||
@ -224,11 +260,47 @@ export class RequestLogger {
|
|||||||
[bucketMinutes]
|
[bucketMinutes]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const compressionResult = await this.db.query(
|
||||||
|
`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as operations,
|
||||||
|
COALESCE(SUM(tokens_before), 0) as tokens_before,
|
||||||
|
COALESCE(SUM(tokens_after), 0) as tokens_after,
|
||||||
|
COALESCE(SUM(GREATEST(tokens_before - tokens_after, 0)), 0) as tokens_saved
|
||||||
|
FROM tokenvault_metrics
|
||||||
|
WHERE tool_used = 'gateway'
|
||||||
|
AND created_at > NOW() - ($1 * INTERVAL '1 minute')
|
||||||
|
`,
|
||||||
|
[bucketMinutes]
|
||||||
|
);
|
||||||
|
|
||||||
const metrics = metricsResult.rows[0];
|
const metrics = metricsResult.rows[0];
|
||||||
|
const totalTokensIn = parseInt(metrics.total_tokens_in, 10) || 0;
|
||||||
|
const totalTokensOut = parseInt(metrics.total_tokens_out, 10) || 0;
|
||||||
|
const totalTokens = totalTokensIn + totalTokensOut;
|
||||||
|
const compression = compressionResult.rows[0] ?? {};
|
||||||
|
const compressionTokensBefore = parseInt(compression.tokens_before, 10) || 0;
|
||||||
|
const compressionTokensAfter = parseInt(compression.tokens_after, 10) || 0;
|
||||||
|
const compressionTokensSaved = parseInt(compression.tokens_saved, 10) || 0;
|
||||||
|
const referenceInputCostPer1k = parseFloat(process.env['REFERENCE_INPUT_COST_PER_1K'] ?? '0.005');
|
||||||
|
const referenceOutputCostPer1k = parseFloat(process.env['REFERENCE_OUTPUT_COST_PER_1K'] ?? '0.015');
|
||||||
|
const estimatedApiCost = (totalTokensIn / 1000) * referenceInputCostPer1k + (totalTokensOut / 1000) * referenceOutputCostPer1k;
|
||||||
|
const totalCost = parseFloat(metrics.total_cost) || 0;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
total_requests: parseInt(metrics.total_requests) || 0,
|
total_requests: parseInt(metrics.total_requests) || 0,
|
||||||
total_cost: parseFloat(metrics.total_cost) || 0,
|
total_cost: totalCost,
|
||||||
|
estimated_api_cost: estimatedApiCost,
|
||||||
|
estimated_api_cost_avoided: Math.max(0, estimatedApiCost - totalCost),
|
||||||
|
total_tokens_in: totalTokensIn,
|
||||||
|
total_tokens_out: totalTokensOut,
|
||||||
|
total_tokens: totalTokens,
|
||||||
|
compression_operations: parseInt(compression.operations, 10) || 0,
|
||||||
|
compression_tokens_before: compressionTokensBefore,
|
||||||
|
compression_tokens_after: compressionTokensAfter,
|
||||||
|
compression_tokens_saved: compressionTokensSaved,
|
||||||
|
compression_rate: compressionTokensBefore > 0 ? compressionTokensSaved / compressionTokensBefore : 0,
|
||||||
|
cache_hit_rate: 0,
|
||||||
avg_latency: Math.round(parseFloat(metrics.avg_latency) || 0),
|
avg_latency: Math.round(parseFloat(metrics.avg_latency) || 0),
|
||||||
success_rate: parseFloat(metrics.success_rate) || 0,
|
success_rate: parseFloat(metrics.success_rate) || 0,
|
||||||
avg_confidence: parseFloat(metrics.avg_confidence) || 0,
|
avg_confidence: parseFloat(metrics.avg_confidence) || 0,
|
||||||
|
|||||||
@ -101,7 +101,7 @@ export function calculateCost(
|
|||||||
/**
|
/**
|
||||||
* Calculate cost savings from compression
|
* Calculate cost savings from compression
|
||||||
* @param model Model identifier
|
* @param model Model identifier
|
||||||
* @param tokensBeforeCompression Tokens before LeanCTX + RTK
|
* @param tokensBeforeCompression Tokens before LLM Gateway compression
|
||||||
* @param tokensAfterCompression Tokens after compression
|
* @param tokensAfterCompression Tokens after compression
|
||||||
* @returns Savings in USD
|
* @returns Savings in USD
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -47,7 +47,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
|
|||||||
enabled: true,
|
enabled: true,
|
||||||
models: [
|
models: [
|
||||||
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
|
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
|
||||||
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
|
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
|
||||||
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
|
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@ -86,6 +86,17 @@ const PROVIDERS: readonly ExternalProvider[] = [
|
|||||||
{ id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 4096 },
|
{ id: 'gpt-3.5-turbo', tier: 'medium', contextLength: 4096 },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'm365-copilot-bridge',
|
||||||
|
baseUrl: '', // constructed from M365_COPILOT_BRIDGE_URL env var
|
||||||
|
envKey: 'M365_COPILOT_BRIDGE_URL',
|
||||||
|
rateLimitRpm: 60,
|
||||||
|
enabled: true,
|
||||||
|
models: [
|
||||||
|
{ id: 'microsoft-365-copilot', tier: 'reasoning', contextLength: 128000 },
|
||||||
|
{ id: 'm365-copilot-chat', tier: 'large', contextLength: 128000 },
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'cerebras',
|
name: 'cerebras',
|
||||||
baseUrl: 'https://api.cerebras.ai/v1',
|
baseUrl: 'https://api.cerebras.ai/v1',
|
||||||
@ -146,12 +157,13 @@ const PROVIDERS: readonly ExternalProvider[] = [
|
|||||||
{
|
{
|
||||||
name: 'openai-codex',
|
name: 'openai-codex',
|
||||||
baseUrl: 'https://api.openai.com/v1',
|
baseUrl: 'https://api.openai.com/v1',
|
||||||
envKey: 'OPENAI_API_KEY',
|
envKey: 'OPENAI_CODEX_URL',
|
||||||
rateLimitRpm: 60,
|
rateLimitRpm: 60,
|
||||||
enabled: true,
|
enabled: true,
|
||||||
models: [
|
models: [
|
||||||
{ id: 'gpt-4-turbo', tier: 'reasoning', contextLength: 128000 },
|
{ id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
|
||||||
{ id: 'gpt-3.5-turbo', tier: 'fast', contextLength: 16384 },
|
{ id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
|
||||||
|
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -162,23 +174,35 @@ const PROVIDERS: readonly ExternalProvider[] = [
|
|||||||
enabled: true,
|
enabled: true,
|
||||||
models: [
|
models: [
|
||||||
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
|
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
|
||||||
{ id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
|
{ id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
|
||||||
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
|
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'codex',
|
name: 'codex',
|
||||||
baseUrl: 'https://api.github.com/copilot_inner/v2',
|
baseUrl: 'https://api.github.com/copilot_inner/v2',
|
||||||
envKey: 'GITHUB_CODEX_TOKEN',
|
envKey: 'CODEX_BRIDGE_URL',
|
||||||
rateLimitRpm: 60,
|
rateLimitRpm: 60,
|
||||||
enabled: true,
|
enabled: true,
|
||||||
models: [
|
models: [
|
||||||
{ id: 'github-copilot-x', tier: 'large', contextLength: 8192 },
|
{ id: 'gpt-5.1-codex', tier: 'reasoning', contextLength: 256000 },
|
||||||
{ id: 'code-davinci-002', tier: 'medium', contextLength: 4096 },
|
{ id: 'gpt-5.1-codex-mini', tier: 'large', contextLength: 256000 },
|
||||||
|
{ id: 'codex-mini-latest', tier: 'medium', contextLength: 200000 },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const AUTHLESS_BRIDGE_PROVIDERS = new Set([
|
||||||
|
'claude-bridge',
|
||||||
|
'claude-code',
|
||||||
|
'openai-bridge',
|
||||||
|
'chatgpt-bridge',
|
||||||
|
'copilot-bridge',
|
||||||
|
'm365-copilot-bridge',
|
||||||
|
]);
|
||||||
|
|
||||||
|
const GENERATE_BRIDGE_PROVIDERS = new Set(['claude-bridge', 'claude-code']);
|
||||||
|
|
||||||
// ─── Rate Limiter (simple sliding window) ───────────────────────────
|
// ─── Rate Limiter (simple sliding window) ───────────────────────────
|
||||||
|
|
||||||
const requestTimestamps: Map<string, number[]> = new Map();
|
const requestTimestamps: Map<string, number[]> = new Map();
|
||||||
@ -213,25 +237,34 @@ function getApiKey(provider: ExternalProvider): string | undefined {
|
|||||||
return url ? 'claude-code-enabled' : undefined;
|
return url ? 'claude-code-enabled' : undefined;
|
||||||
}
|
}
|
||||||
if (provider.name === 'openai-bridge') {
|
if (provider.name === 'openai-bridge') {
|
||||||
// openai-bridge uses OPENAI_API_KEY for auth, but also needs bridge URL
|
// Subscription bridge auth is handled by the bridge process/CLI session.
|
||||||
const apiKey = process.env['OPENAI_API_KEY'];
|
|
||||||
const url = process.env['OPENAI_BRIDGE_URL'];
|
const url = process.env['OPENAI_BRIDGE_URL'];
|
||||||
return apiKey && url ? apiKey : undefined;
|
return url ? 'openai-bridge-enabled' : undefined;
|
||||||
}
|
}
|
||||||
if (provider.name === 'chatgpt-bridge') {
|
if (provider.name === 'chatgpt-bridge') {
|
||||||
// chatgpt-bridge can use same URL as openai-bridge (same service), but needs API key
|
// ChatGPT Plus bridge can reuse the OpenAI bridge when configured that way.
|
||||||
const apiKey = process.env['OPENAI_API_KEY'];
|
|
||||||
const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'];
|
const url = process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'];
|
||||||
return apiKey && url ? apiKey : undefined;
|
return url ? 'chatgpt-bridge-enabled' : undefined;
|
||||||
}
|
}
|
||||||
if (provider.name === 'copilot-bridge') {
|
if (provider.name === 'copilot-bridge') {
|
||||||
// copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api)
|
// copilot-bridge uses GitHub Copilot subscription (auth handled internally by copilot-api).
|
||||||
// Just needs URL to be configured
|
|
||||||
const url = process.env['COPILOT_BRIDGE_URL'];
|
const url = process.env['COPILOT_BRIDGE_URL'];
|
||||||
return url ? 'copilot-authenticated' : undefined;
|
return url ? 'copilot-authenticated' : undefined;
|
||||||
}
|
}
|
||||||
|
if (provider.name === 'm365-copilot-bridge') {
|
||||||
|
// Microsoft 365 Copilot uses Microsoft Graph delegated auth inside the bridge.
|
||||||
|
const url = process.env['M365_COPILOT_BRIDGE_URL'];
|
||||||
|
return url ? 'm365-copilot-bridge-enabled' : undefined;
|
||||||
|
}
|
||||||
|
if (provider.name === 'openai-codex') {
|
||||||
|
const bridgeUrl = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
|
||||||
|
if (bridgeUrl) return 'openai-codex-bridge-enabled';
|
||||||
|
return process.env['OPENAI_API_KEY'] || undefined;
|
||||||
|
}
|
||||||
if (provider.name === 'codex') {
|
if (provider.name === 'codex') {
|
||||||
// codex uses GitHub Codex API token
|
// Codex can run through an authless local/subscription bridge. A token remains supported as fallback.
|
||||||
|
const bridgeUrl = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
|
||||||
|
if (bridgeUrl) return 'codex-bridge-enabled';
|
||||||
const token = process.env['GITHUB_CODEX_TOKEN'];
|
const token = process.env['GITHUB_CODEX_TOKEN'];
|
||||||
return token ? token : undefined;
|
return token ? token : undefined;
|
||||||
}
|
}
|
||||||
@ -241,11 +274,11 @@ function getApiKey(provider: ExternalProvider): string | undefined {
|
|||||||
function getBaseUrl(provider: ExternalProvider): string {
|
function getBaseUrl(provider: ExternalProvider): string {
|
||||||
if (provider.name === 'claude-bridge') {
|
if (provider.name === 'claude-bridge') {
|
||||||
const url = process.env['CLAUDE_BRIDGE_URL'];
|
const url = process.env['CLAUDE_BRIDGE_URL'];
|
||||||
return url ? `${url}/v1` : '';
|
return url ?? '';
|
||||||
}
|
}
|
||||||
if (provider.name === 'claude-code') {
|
if (provider.name === 'claude-code') {
|
||||||
const url = process.env['CLAUDE_CODE_URL'];
|
const url = process.env['CLAUDE_CODE_URL'];
|
||||||
return url ? `${url}/v1` : '';
|
return url ?? '';
|
||||||
}
|
}
|
||||||
if (provider.name === 'openai-bridge') {
|
if (provider.name === 'openai-bridge') {
|
||||||
const url = process.env['OPENAI_BRIDGE_URL'];
|
const url = process.env['OPENAI_BRIDGE_URL'];
|
||||||
@ -257,7 +290,19 @@ function getBaseUrl(provider: ExternalProvider): string {
|
|||||||
}
|
}
|
||||||
if (provider.name === 'copilot-bridge') {
|
if (provider.name === 'copilot-bridge') {
|
||||||
const url = process.env['COPILOT_BRIDGE_URL'];
|
const url = process.env['COPILOT_BRIDGE_URL'];
|
||||||
return url ? `${url}` : '';
|
return url ? `${url}/v1` : '';
|
||||||
|
}
|
||||||
|
if (provider.name === 'm365-copilot-bridge') {
|
||||||
|
const url = process.env['M365_COPILOT_BRIDGE_URL'];
|
||||||
|
return url ? `${url}/v1` : '';
|
||||||
|
}
|
||||||
|
if (provider.name === 'openai-codex') {
|
||||||
|
const url = process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'];
|
||||||
|
return url ? `${url}/v1` : provider.baseUrl;
|
||||||
|
}
|
||||||
|
if (provider.name === 'codex') {
|
||||||
|
const url = process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL'];
|
||||||
|
return url ? `${url}/v1` : provider.baseUrl;
|
||||||
}
|
}
|
||||||
if (provider.name === 'cloudflare') {
|
if (provider.name === 'cloudflare') {
|
||||||
const accountId = process.env['CLOUDFLARE_ACCOUNT_ID'];
|
const accountId = process.env['CLOUDFLARE_ACCOUNT_ID'];
|
||||||
@ -271,6 +316,11 @@ export function getAvailableProviders(): readonly ExternalProvider[] {
|
|||||||
return PROVIDERS.filter((p) => p.enabled && getApiKey(p));
|
return PROVIDERS.filter((p) => p.enabled && getApiKey(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns ALL configured providers (enabled or not, with or without API key). For dashboard listing. */
|
||||||
|
export function getAllProviders(): readonly ExternalProvider[] {
|
||||||
|
return PROVIDERS;
|
||||||
|
}
|
||||||
|
|
||||||
function findBestModel(
|
function findBestModel(
|
||||||
provider: ExternalProvider,
|
provider: ExternalProvider,
|
||||||
targetTier: 'fast' | 'medium' | 'large' | 'reasoning',
|
targetTier: 'fast' | 'medium' | 'large' | 'reasoning',
|
||||||
@ -296,7 +346,11 @@ function findBestModel(
|
|||||||
|
|
||||||
function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> {
|
function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> {
|
||||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||||
if (!['claude-bridge', 'claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge'].includes(provider.name)) {
|
const usesAuthlessBridge = AUTHLESS_BRIDGE_PROVIDERS.has(provider.name)
|
||||||
|
|| (provider.name === 'openai-codex' && !!(process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL']))
|
||||||
|
|| (provider.name === 'codex' && !!(process.env['CODEX_BRIDGE_URL'] || process.env['OPENAI_CODEX_URL']));
|
||||||
|
|
||||||
|
if (!usesAuthlessBridge) {
|
||||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||||
}
|
}
|
||||||
return headers;
|
return headers;
|
||||||
@ -311,13 +365,29 @@ function buildRequestPayload(model: ExternalModel, request: ExternalCompletionRe
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildGenerateBridgePayload(model: ExternalModel, request: ExternalCompletionRequest): Record<string, unknown> {
|
||||||
|
const system = request.messages.find((m) => m.role === 'system')?.content;
|
||||||
|
const prompt = request.messages
|
||||||
|
.filter((m) => m.role !== 'system')
|
||||||
|
.map((m) => `${m.role}: ${m.content}`)
|
||||||
|
.join('\n\n');
|
||||||
|
|
||||||
|
return {
|
||||||
|
model: model.id,
|
||||||
|
prompt,
|
||||||
|
system,
|
||||||
|
temperature: request.temperature ?? 0.3,
|
||||||
|
max_tokens: request.max_tokens ?? 2048,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
function parseExternalResponse(
|
function parseExternalResponse(
|
||||||
data: any,
|
data: any,
|
||||||
model: ExternalModel,
|
model: ExternalModel,
|
||||||
provider: ExternalProvider,
|
provider: ExternalProvider,
|
||||||
start: number,
|
start: number,
|
||||||
): ExternalCompletionResponse {
|
): ExternalCompletionResponse {
|
||||||
const content = data.choices?.[0]?.message?.content ?? '';
|
const content = data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? data.message?.content ?? '';
|
||||||
recordRequest(provider.name);
|
recordRequest(provider.name);
|
||||||
return {
|
return {
|
||||||
response: content,
|
response: content,
|
||||||
@ -341,14 +411,15 @@ async function callProvider(
|
|||||||
const baseUrl = getBaseUrl(provider);
|
const baseUrl = getBaseUrl(provider);
|
||||||
if (!baseUrl) throw new Error(`No base URL for ${provider.name}`);
|
if (!baseUrl) throw new Error(`No base URL for ${provider.name}`);
|
||||||
|
|
||||||
const url = `${baseUrl}/chat/completions`;
|
const generateBridge = GENERATE_BRIDGE_PROVIDERS.has(provider.name);
|
||||||
|
const url = generateBridge ? `${baseUrl}/api/generate` : `${baseUrl}/chat/completions`;
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const headers = buildRequestHeaders(provider, apiKey);
|
const headers = buildRequestHeaders(provider, apiKey);
|
||||||
const payload = buildRequestPayload(model, request);
|
const payload = generateBridge ? buildGenerateBridgePayload(model, request) : buildRequestPayload(model, request);
|
||||||
|
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
|
|||||||
@ -728,6 +728,36 @@ function handleFormalLogicOverride(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Helper: Code Generation Intent Override ───────────────────────────────
|
||||||
|
|
||||||
|
const CODE_GENERATION_PATTERNS = [
|
||||||
|
/\bwrite\s+(?:a\s+)?(?:typescript|javascript|python|go|rust|react|next\.js|node)?\s*(?:function|class|script|module|component|test|handler|middleware)\b/i,
|
||||||
|
/\b(?:implement|create|build|generate|scaffold)\b[\s\S]{0,160}\b(?:api|endpoint|function|class|component|service|schema|migration|crud|jwt|test|project|module)\b/i,
|
||||||
|
/\b(?:rest|graphql)\s+api\b[\s\S]{0,160}\b(?:implement|create|build|endpoint|authentication|jwt)\b/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
function handleCodeGenerationOverride(
|
||||||
|
fullText: string,
|
||||||
|
input: ScorerInput,
|
||||||
|
userMessages: readonly WeightedMessage[],
|
||||||
|
): ScoringResult | null {
|
||||||
|
if (!CODE_GENERATION_PATTERNS.some((pattern) => pattern.test(fullText))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dimensions = computeAllDimensions(input, userMessages, fullText);
|
||||||
|
const result: ScoringResult = {
|
||||||
|
tier: 'code_generation',
|
||||||
|
score: 0.62,
|
||||||
|
confidence: 0.86,
|
||||||
|
reason: 'code generation intent detected',
|
||||||
|
dimensions,
|
||||||
|
};
|
||||||
|
recordSessionTier('code_generation');
|
||||||
|
logger.debug({ tier: 'code_generation', reason: 'code_generation_override' }, 'Request scored via code generation override');
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// ── Helper: Apply Score Overrides ──────────────────────────────────────────
|
// ── Helper: Apply Score Overrides ──────────────────────────────────────────
|
||||||
|
|
||||||
interface ScoreOverridesInput {
|
interface ScoreOverridesInput {
|
||||||
@ -754,6 +784,7 @@ function applyScoreOverrides(
|
|||||||
const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
|
const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
|
||||||
if (codeGenDim && codeGenDim.rawScore > 0.25) {
|
if (codeGenDim && codeGenDim.rawScore > 0.25) {
|
||||||
tier = 'code_generation';
|
tier = 'code_generation';
|
||||||
|
confidence = Math.max(confidence, 0.78);
|
||||||
reason = 'code generation keywords detected';
|
reason = 'code generation keywords detected';
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -771,7 +802,7 @@ function applyScoreOverrides(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Ambiguity check
|
// Ambiguity check
|
||||||
if (confidence < 0.45) {
|
if (confidence < 0.45 && tier !== 'code_generation' && tier !== 'reasoning') {
|
||||||
tier = 'medium';
|
tier = 'medium';
|
||||||
reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
|
reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
|
||||||
}
|
}
|
||||||
@ -795,6 +826,9 @@ export function scoreRequest(
|
|||||||
const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages);
|
const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages);
|
||||||
if (formalLogicResult) return formalLogicResult;
|
if (formalLogicResult) return formalLogicResult;
|
||||||
|
|
||||||
|
const codeGenerationResult = handleCodeGenerationOverride(fullText, input, userMessages);
|
||||||
|
if (codeGenerationResult) return codeGenerationResult;
|
||||||
|
|
||||||
const dimensions = computeAllDimensions(input, userMessages, fullText);
|
const dimensions = computeAllDimensions(input, userMessages, fullText);
|
||||||
let rawScore = 0;
|
let rawScore = 0;
|
||||||
for (const dim of dimensions) {
|
for (const dim of dimensions) {
|
||||||
|
|||||||
@ -184,14 +184,14 @@ export function getOllamaBaseUrl(): string {
|
|||||||
/**
|
/**
|
||||||
* Maps a scorer tier to the best primary model and its fallback chain.
|
* Maps a scorer tier to the best primary model and its fallback chain.
|
||||||
* The 'reasoning' tier uses llama3.3:70b (complex_reasoning strength) from the large tier.
|
* The 'reasoning' tier uses llama3.3:70b (complex_reasoning strength) from the large tier.
|
||||||
* The 'code_generation' tier uses OpenAI Codex (gpt-4-turbo) as primary via external provider.
|
* The 'code_generation' tier uses OpenAI Codex as primary via external provider.
|
||||||
*/
|
*/
|
||||||
const TIER_MODEL_MAP: Record<Tier, { primary: string; configTier: 'fast' | 'medium' | 'large'; provider?: string }> = {
|
const TIER_MODEL_MAP: Record<Tier, { primary: string; configTier: 'fast' | 'medium' | 'large'; provider?: string }> = {
|
||||||
fast: { primary: 'qwen2.5:3b', configTier: 'fast' },
|
fast: { primary: 'qwen2.5:3b', configTier: 'fast' },
|
||||||
medium: { primary: 'qwen2.5:14b', configTier: 'medium' },
|
medium: { primary: 'qwen2.5:14b', configTier: 'medium' },
|
||||||
large: { primary: 'qwen2.5:32b', configTier: 'large' },
|
large: { primary: 'qwen2.5:32b', configTier: 'large' },
|
||||||
reasoning: { primary: 'llama3.3:70b', configTier: 'large' },
|
reasoning: { primary: 'llama3.3:70b', configTier: 'large' },
|
||||||
code_generation: { primary: 'gpt-4-turbo', configTier: 'large', provider: 'openai-codex' },
|
code_generation: { primary: 'gpt-5.1-codex-mini', configTier: 'large', provider: 'openai-codex' },
|
||||||
};
|
};
|
||||||
|
|
||||||
function buildMediumTierFallback(
|
function buildMediumTierFallback(
|
||||||
@ -223,7 +223,8 @@ function buildScoredFallbackChain(
|
|||||||
models: ModelsYaml,
|
models: ModelsYaml,
|
||||||
): string[] {
|
): string[] {
|
||||||
if (tier === 'reasoning' || tier === 'code_generation') {
|
if (tier === 'reasoning' || tier === 'code_generation') {
|
||||||
return [selectedModel, ...buildFallbackChain(selectedModel, configTier, models).filter((m) => m !== selectedModel)];
|
const fallbackTier = tier === 'code_generation' ? 'code_generation' : configTier;
|
||||||
|
return [selectedModel, ...buildFallbackChain(selectedModel, fallbackTier, models).filter((m) => m !== selectedModel)];
|
||||||
}
|
}
|
||||||
return buildFallbackChain(selectedModel, configTier, models);
|
return buildFallbackChain(selectedModel, configTier, models);
|
||||||
}
|
}
|
||||||
@ -302,7 +303,7 @@ export function routeByScore(
|
|||||||
const mapping = TIER_MODEL_MAP[scoringResult.tier];
|
const mapping = TIER_MODEL_MAP[scoringResult.tier];
|
||||||
const selectedModel = mapping.primary;
|
const selectedModel = mapping.primary;
|
||||||
const configTier = mapping.configTier;
|
const configTier = mapping.configTier;
|
||||||
const tierConfig = models.tiers[configTier];
|
const tierConfig = models.tiers[scoringResult.tier] ?? models.tiers[configTier];
|
||||||
|
|
||||||
if (!tierConfig) {
|
if (!tierConfig) {
|
||||||
logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium');
|
logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium');
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -38,22 +38,40 @@ async function checkOllama(baseUrl: string): Promise<{ status: 'ok' | 'down'; la
|
|||||||
|
|
||||||
async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> {
|
async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> {
|
||||||
try {
|
try {
|
||||||
await query('SELECT 1');
|
await withTimeout(query('SELECT 1'), 2500, 'database check timed out');
|
||||||
return { status: 'ok' };
|
return { status: 'ok' };
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
|
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, message: string): Promise<T> {
|
||||||
|
let timer: NodeJS.Timeout | undefined;
|
||||||
|
try {
|
||||||
|
return await Promise.race([
|
||||||
|
promise,
|
||||||
|
new Promise<T>((_resolve, reject) => {
|
||||||
|
timer = setTimeout(() => reject(new Error(message)), timeoutMs);
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
} finally {
|
||||||
|
if (timer) clearTimeout(timer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> {
|
async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> {
|
||||||
const boss = getPgBoss();
|
const boss = getPgBoss();
|
||||||
if (!boss) return { status: 'unknown' };
|
if (!boss) return { status: 'unknown' };
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const [queued, active] = await Promise.all([
|
const [queued, active] = await withTimeout(
|
||||||
boss.getQueueSize('llm-batch', { before: 'completed' }),
|
Promise.all([
|
||||||
boss.getQueueSize('llm-batch', { before: 'active' }),
|
boss.getQueueSize('llm-batch', { before: 'completed' }),
|
||||||
]);
|
boss.getQueueSize('llm-batch', { before: 'active' }),
|
||||||
|
]),
|
||||||
|
2500,
|
||||||
|
'queue check timed out',
|
||||||
|
);
|
||||||
return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) };
|
return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) };
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
|
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
|
||||||
@ -62,8 +80,10 @@ async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?
|
|||||||
|
|
||||||
async function getReviewQueueCount(): Promise<number> {
|
async function getReviewQueueCount(): Promise<number> {
|
||||||
try {
|
try {
|
||||||
const result = await query<{ count: string }>(
|
const result = await withTimeout(
|
||||||
'SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL',
|
query<{ count: string }>('SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL'),
|
||||||
|
2500,
|
||||||
|
'review queue check timed out',
|
||||||
);
|
);
|
||||||
return parseInt(result.rows[0]?.count ?? '0', 10);
|
return parseInt(result.rows[0]?.count ?? '0', 10);
|
||||||
} catch {
|
} catch {
|
||||||
@ -78,8 +98,9 @@ export async function healthRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
// Check if this is a dashboard UI request with ?ui=1 or ?dashboard=1
|
// Check if this is a dashboard UI request with ?ui=1 or ?dashboard=1
|
||||||
const query = request.query as any;
|
const query = request.query as any;
|
||||||
const isDashboardRequest = query.ui || query.dashboard;
|
const isDashboardRequest = query.ui || query.dashboard;
|
||||||
|
const acceptsHtml = String(request.headers.accept ?? '').includes('text/html');
|
||||||
|
|
||||||
if (isDashboardRequest) {
|
if (isDashboardRequest || acceptsHtml) {
|
||||||
try {
|
try {
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
@ -108,8 +129,8 @@ export async function healthRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
|
|
||||||
const breakerStates = getAllBreakerStates();
|
const breakerStates = getAllBreakerStates();
|
||||||
|
|
||||||
const isDown = ollamaCheck.status === 'down' || dbCheck.status === 'down';
|
const isDown = dbCheck.status === 'down';
|
||||||
const isDegraded = queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
|
const isDegraded = ollamaCheck.status === 'down' || queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
|
||||||
|
|
||||||
const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok';
|
const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok';
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,22 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
|
|
||||||
logger.info({ publicDir }, 'Static file serving initialized');
|
logger.info({ publicDir }, 'Static file serving initialized');
|
||||||
|
|
||||||
|
function sendHtml(filename: string, reply: any) {
|
||||||
|
const filePath = join(publicDir, filename);
|
||||||
|
if (!existsSync(filePath)) {
|
||||||
|
logger.warn({ path: filePath }, `${filename} not found`);
|
||||||
|
return reply.status(404).send({ error: `${filename} not found` });
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = readFileSync(filePath, 'utf-8');
|
||||||
|
return reply
|
||||||
|
.header('Cache-Control', 'no-cache, no-store, must-revalidate, max-age=0')
|
||||||
|
.header('Pragma', 'no-cache')
|
||||||
|
.header('Expires', '0')
|
||||||
|
.type('text/html')
|
||||||
|
.send(content);
|
||||||
|
}
|
||||||
|
|
||||||
// Serve root path
|
// Serve root path
|
||||||
fastify.get('/', async (request, reply) => {
|
fastify.get('/', async (request, reply) => {
|
||||||
logger.info({ method: request.method, url: request.url, host: request.hostname }, 'Root path requested');
|
logger.info({ method: request.method, url: request.url, host: request.hostname }, 'Root path requested');
|
||||||
@ -26,13 +42,47 @@ export async function staticRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
|
|
||||||
// Serve /dashboard.html
|
// Serve /dashboard.html
|
||||||
fastify.get('/dashboard.html', async (_request, reply) => {
|
fastify.get('/dashboard.html', async (_request, reply) => {
|
||||||
const dashboardPath = join(publicDir, 'dashboard.html');
|
return sendHtml('dashboard.html', reply);
|
||||||
if (!existsSync(dashboardPath)) {
|
});
|
||||||
logger.warn({ path: dashboardPath }, 'dashboard.html not found');
|
|
||||||
return reply.status(404).send({ error: 'dashboard.html not found' });
|
fastify.get('/dashboard-v2.html', async (_request, reply) => {
|
||||||
}
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
const content = readFileSync(dashboardPath, 'utf-8');
|
});
|
||||||
return reply.type('text/html').send(content);
|
|
||||||
|
fastify.get('/v2/dashboard', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/v2/dashboard/', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/v2', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/v2/', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/dashboard/v2', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/dashboard/v2/', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/api/dashboard-v2', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/api/v2/dashboard', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/api/dashboard/v2', async (_request, reply) => {
|
||||||
|
return sendHtml('dashboard-v2.html', reply);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Serve /api/dashboard as HTML for compatibility
|
// Serve /api/dashboard as HTML for compatibility
|
||||||
|
|||||||
@ -107,6 +107,25 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const hostHeader = String(request.headers['host'] ?? '');
|
||||||
|
const forwardedHost = String(request.headers['x-forwarded-host'] ?? '');
|
||||||
|
const remoteAddress = request.ip ?? '';
|
||||||
|
const host = forwardedHost || hostHeader;
|
||||||
|
const isLoopbackHost =
|
||||||
|
/^localhost(?::\d+)?$/i.test(host) ||
|
||||||
|
/^127\.0\.0\.1(?::\d+)?$/.test(host) ||
|
||||||
|
/^\[::1\](?::\d+)?$/.test(host);
|
||||||
|
const isLoopbackRemote =
|
||||||
|
remoteAddress === '127.0.0.1' ||
|
||||||
|
remoteAddress === '::1' ||
|
||||||
|
remoteAddress === '::ffff:127.0.0.1';
|
||||||
|
|
||||||
|
// Internal loopback callers such as Magatama Core run behind the same host
|
||||||
|
// and must not be redirected to HTTPS unless the Gateway actually serves TLS.
|
||||||
|
if (isLoopbackHost || isLoopbackRemote) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if connection is not secure
|
// Check if connection is not secure
|
||||||
// In production, X-Forwarded-Proto is set by reverse proxy (Cloudflare)
|
// In production, X-Forwarded-Proto is set by reverse proxy (Cloudflare)
|
||||||
const isSecure =
|
const isSecure =
|
||||||
@ -114,7 +133,6 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
|
|||||||
(request.headers['x-forwarded-proto'] === 'https');
|
(request.headers['x-forwarded-proto'] === 'https');
|
||||||
|
|
||||||
if (!isSecure && process.env['NODE_ENV'] === 'production') {
|
if (!isSecure && process.env['NODE_ENV'] === 'production') {
|
||||||
const host = request.headers['x-forwarded-host'] || request.headers['host'];
|
|
||||||
return reply.redirect(`https://${host}${request.url}`);
|
return reply.redirect(`https://${host}${request.url}`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -126,10 +144,10 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
|
|||||||
*/
|
*/
|
||||||
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
|
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
|
||||||
server.addHook('onSend', async (request, reply) => {
|
server.addHook('onSend', async (request, reply) => {
|
||||||
// Content Security Policy - strict, no inline scripts
|
// Content Security Policy for the self-contained dashboard UI.
|
||||||
reply.header(
|
reply.header(
|
||||||
'Content-Security-Policy',
|
'Content-Security-Policy',
|
||||||
"default-src 'self'; script-src 'self'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
|
"default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
|
||||||
);
|
);
|
||||||
|
|
||||||
// Prevent clickjacking
|
// Prevent clickjacking
|
||||||
|
|||||||
@ -12,11 +12,23 @@ import { dashboardRoute } from './routes/dashboard.js';
|
|||||||
import { streamRoute } from './routes/stream.js';
|
import { streamRoute } from './routes/stream.js';
|
||||||
import { learningInsightsRoute } from './routes/learning-insights.js';
|
import { learningInsightsRoute } from './routes/learning-insights.js';
|
||||||
import { staticRoute } from './routes/static.js';
|
import { staticRoute } from './routes/static.js';
|
||||||
|
import tenantAuth from './security/tenant-auth.js';
|
||||||
|
import { internalRoute } from './routes/internal.js';
|
||||||
import { getPool } from './db/client.js';
|
import { getPool } from './db/client.js';
|
||||||
import { runMigrations } from './db/migrate.js';
|
import { runMigrations } from './db/migrate.js';
|
||||||
import { initPgBoss } from './queue/pg-boss-client.js';
|
import { initPgBoss } from './queue/pg-boss-client.js';
|
||||||
import { logger } from './observability/logger.js';
|
import { logger } from './observability/logger.js';
|
||||||
import { scheduleLearningCycles } from './learning/learning-engine.js';
|
import { scheduleLearningCycles } from './learning/learning-engine.js';
|
||||||
|
import { autoSpawnOnBoot } from './modules/auto-discovery.js';
|
||||||
|
import { embeddingsRoute } from './routes/embeddings.js';
|
||||||
|
import { replayRoute } from './routes/replay.js';
|
||||||
|
import { audioRoute } from './routes/audio.js';
|
||||||
|
import { mcpRoute } from './modules/mcp-server.js';
|
||||||
|
import { loadWorkspacePreset, applyWorkspaceDefaults } from './modules/workspace-presets.js';
|
||||||
|
import { loadPlugins } from './modules/plugin-system.js';
|
||||||
|
import { ingestPeerStats, scheduleFederationPublisher, buildStats } from './modules/federated-stats.js';
|
||||||
|
import { scheduleAdaptiveLearner, getAllRecommendations } from './modules/adaptive-routing.js';
|
||||||
|
import { startBridgeWatchdog } from './modules/bridge-watchdog.js';
|
||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
import { dirname, join } from 'path';
|
import { dirname, join } from 'path';
|
||||||
import { readFileSync, existsSync } from 'fs';
|
import { readFileSync, existsSync } from 'fs';
|
||||||
@ -77,6 +89,7 @@ async function buildServer() {
|
|||||||
directives: {
|
directives: {
|
||||||
defaultSrc: ["'self'"],
|
defaultSrc: ["'self'"],
|
||||||
scriptSrc: ["'self'", "'unsafe-inline'"],
|
scriptSrc: ["'self'", "'unsafe-inline'"],
|
||||||
|
styleSrc: ["'self'", "'unsafe-inline'"],
|
||||||
objectSrc: ["'none'"],
|
objectSrc: ["'none'"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -92,15 +105,17 @@ async function buildServer() {
|
|||||||
'http://192.168.178.196:3000',
|
'http://192.168.178.196:3000',
|
||||||
/^http:\/\/192\.168\.178\.\d+/,
|
/^http:\/\/192\.168\.178\.\d+/,
|
||||||
/^https:\/\/.*\.context-x\.org$/,
|
/^https:\/\/.*\.context-x\.org$/,
|
||||||
|
/^https:\/\/(www\.)?runwerk\.app$/,
|
||||||
|
/^https:\/\/.*\.runwerk\.app$/,
|
||||||
],
|
],
|
||||||
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
|
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
|
||||||
allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID'],
|
allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID', 'X-Runwerk-Caller', 'X-Runwerk-Privacy', 'X-Runwerk-Tier', 'X-Runwerk-Purpose'],
|
||||||
credentials: true,
|
credentials: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
await server.register(fastifyRateLimit, {
|
await server.register(fastifyRateLimit, {
|
||||||
global: true,
|
global: true,
|
||||||
max: 100,
|
max: 1000,
|
||||||
timeWindow: '1 minute',
|
timeWindow: '1 minute',
|
||||||
keyGenerator: (request) => {
|
keyGenerator: (request) => {
|
||||||
const caller = (request.headers['x-caller-id'] as string) ?? 'default';
|
const caller = (request.headers['x-caller-id'] as string) ?? 'default';
|
||||||
@ -113,7 +128,17 @@ async function buildServer() {
|
|||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
await server.register(tenantAuth);
|
||||||
|
await server.register(internalRoute);
|
||||||
await server.register(completionRoute, { prefix: '/v1' });
|
await server.register(completionRoute, { prefix: '/v1' });
|
||||||
|
await server.register(embeddingsRoute, { prefix: '/v1' });
|
||||||
|
await server.register(replayRoute, { prefix: '/v1' });
|
||||||
|
await server.register(audioRoute, { prefix: '/v1' });
|
||||||
|
await server.register(mcpRoute);
|
||||||
|
server.post('/v1/federation/ingest', async (request, reply) => {
|
||||||
|
const result = ingestPeerStats(request.body as never);
|
||||||
|
return reply.send({ success: true, ...result });
|
||||||
|
});
|
||||||
await server.register(batchRoute, { prefix: '/v1' });
|
await server.register(batchRoute, { prefix: '/v1' });
|
||||||
await server.register(classifyRoute, { prefix: '/v1' });
|
await server.register(classifyRoute, { prefix: '/v1' });
|
||||||
await server.register(reviewRoute, { prefix: '/v1' });
|
await server.register(reviewRoute, { prefix: '/v1' });
|
||||||
@ -192,9 +217,54 @@ async function main() {
|
|||||||
} catch (pgErr) {
|
} catch (pgErr) {
|
||||||
logger.warn({ pgErr }, 'PgBoss init failed - continuing without queue');
|
logger.warn({ pgErr }, 'PgBoss init failed - continuing without queue');
|
||||||
}
|
}
|
||||||
|
// Workspace preset (apply env defaults from workspace.yaml if present)
|
||||||
|
try {
|
||||||
|
const preset = await loadWorkspacePreset();
|
||||||
|
if (preset) applyWorkspaceDefaults(preset);
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn({ err }, 'Workspace preset load failed (non-fatal)');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Plugin system (load pre/post hooks from PLUGINS_DIR)
|
||||||
|
try {
|
||||||
|
await loadPlugins();
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn({ err }, 'Plugin loading failed (non-fatal)');
|
||||||
|
}
|
||||||
|
|
||||||
scheduleLearningCycles();
|
scheduleLearningCycles();
|
||||||
await server.listen({ port, host });
|
await server.listen({ port, host });
|
||||||
logger.info({ port, host }, 'LLM Gateway started');
|
logger.info({ port, host }, 'LLM Gateway started');
|
||||||
|
|
||||||
|
// Auto-spawn detected subscription bridges if AUTO_SPAWN_BRIDGES=1
|
||||||
|
void autoSpawnOnBoot();
|
||||||
|
|
||||||
|
// Bridge watchdog (opt-in via WATCHDOG_ENABLED=1)
|
||||||
|
try {
|
||||||
|
startBridgeWatchdog();
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn({ err }, 'Bridge watchdog start failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adaptive routing learner (opt-in via ADAPTIVE_ROUTING_ENABLED=1)
|
||||||
|
try {
|
||||||
|
const pool = getPool();
|
||||||
|
scheduleAdaptiveLearner(pool as never);
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn({ err }, 'Adaptive learner scheduling failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Federation publisher (opt-in via FEDERATION_ENABLED=1)
|
||||||
|
scheduleFederationPublisher(async () => {
|
||||||
|
const recos = getAllRecommendations();
|
||||||
|
return buildStats(recos.map((r) => ({
|
||||||
|
task_type: r.taskType,
|
||||||
|
model_used: r.preferredModel,
|
||||||
|
samples: r.rationale.samples,
|
||||||
|
success_rate: r.rationale.successRate,
|
||||||
|
avg_latency_ms: r.rationale.avgLatencyMs,
|
||||||
|
})));
|
||||||
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error({ err }, 'Failed to start server');
|
logger.error({ err }, 'Failed to start server');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
// Tokenvault Integration Hooks
|
// Tokenvault Integration Hooks
|
||||||
// Instruments LeanCTX and RTK compression tracking
|
// Instruments LLM Gateway compression tracking (legacy hook names retained for backward compat)
|
||||||
// Updated: 2026-04-19
|
// Updated: 2026-04-19
|
||||||
|
|
||||||
import { Pool, QueryResult } from 'pg';
|
import { Pool, QueryResult } from 'pg';
|
||||||
@ -62,13 +62,13 @@ export function estimateTokens(text: string | object): number {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Log compression ratio for RTK output
|
* Log compression ratio for token-trim output
|
||||||
*/
|
*/
|
||||||
export async function logRTKCompression(
|
export async function logGatewayTrimCompression(
|
||||||
db: Pool,
|
db: Pool,
|
||||||
rawOutput: string,
|
rawOutput: string,
|
||||||
compressedOutput: string,
|
compressedOutput: string,
|
||||||
toolUsed: string = 'rtk'
|
toolUsed: string = 'llm-gateway-trim'
|
||||||
): Promise<CompressionMetric> {
|
): Promise<CompressionMetric> {
|
||||||
const tokensBefore = estimateTokens(rawOutput);
|
const tokensBefore = estimateTokens(rawOutput);
|
||||||
const tokensAfter = estimateTokens(compressedOutput);
|
const tokensAfter = estimateTokens(compressedOutput);
|
||||||
@ -93,9 +93,9 @@ export async function logRTKCompression(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Track LeanCTX file read operations
|
* Track gateway file-read operations
|
||||||
*/
|
*/
|
||||||
export async function logLeanCTXRead(
|
export async function logGatewayFileRead(
|
||||||
db: Pool,
|
db: Pool,
|
||||||
filePath: string,
|
filePath: string,
|
||||||
mode: string,
|
mode: string,
|
||||||
@ -115,7 +115,7 @@ export async function logLeanCTXRead(
|
|||||||
tokensBefore: rawTokens,
|
tokensBefore: rawTokens,
|
||||||
tokensAfter: compressedTokens,
|
tokensAfter: compressedTokens,
|
||||||
savingsPct,
|
savingsPct,
|
||||||
toolUsed: 'lean-ctx'
|
toolUsed: 'llm-gateway'
|
||||||
};
|
};
|
||||||
|
|
||||||
await logCompressionMetric(db, metric);
|
await logCompressionMetric(db, metric);
|
||||||
@ -207,7 +207,7 @@ export async function getCompressionStats(
|
|||||||
tool_used,
|
tool_used,
|
||||||
COUNT(*) as count
|
COUNT(*) as count
|
||||||
FROM tokenvault_metrics
|
FROM tokenvault_metrics
|
||||||
WHERE created_at > NOW() - INTERVAL $1 HOUR
|
WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
|
||||||
GROUP BY tool_used`,
|
GROUP BY tool_used`,
|
||||||
[hoursBack]
|
[hoursBack]
|
||||||
);
|
);
|
||||||
@ -270,7 +270,7 @@ export async function getCostSummary(
|
|||||||
project,
|
project,
|
||||||
SUM(CASE WHEN cost_usd > 0 THEN 1 ELSE 0 END) as paid_tasks
|
SUM(CASE WHEN cost_usd > 0 THEN 1 ELSE 0 END) as paid_tasks
|
||||||
FROM cost_analytics
|
FROM cost_analytics
|
||||||
WHERE created_at > NOW() - INTERVAL $1 HOUR
|
WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
|
||||||
GROUP BY project`,
|
GROUP BY project`,
|
||||||
[hoursBack]
|
[hoursBack]
|
||||||
);
|
);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user