feat: publish llm gateway v2 dashboard alongside restored workbench
This commit is contained in:
parent
e272105bcf
commit
060b846d9b
426
AI_CONTROL_PLANE_SYSTEM_DESIGN.md
Normal file
426
AI_CONTROL_PLANE_SYSTEM_DESIGN.md
Normal file
@ -0,0 +1,426 @@
|
|||||||
|
# AI Control Plane System Design
|
||||||
|
|
||||||
|
## 1. Purpose
|
||||||
|
|
||||||
|
LLM Gateway is a deterministic, observable, policy-driven routing layer for AI execution with memory and cost control.
|
||||||
|
|
||||||
|
It routes requests from clients to the right model, provider, agent, or tool based on:
|
||||||
|
|
||||||
|
- policy
|
||||||
|
- cost
|
||||||
|
- availability
|
||||||
|
- context
|
||||||
|
- memory
|
||||||
|
- trust level
|
||||||
|
- historical route success
|
||||||
|
|
||||||
|
It also provides:
|
||||||
|
|
||||||
|
- full observability through immutable receipts
|
||||||
|
- reproducible AI runs
|
||||||
|
- shared memory persistence
|
||||||
|
- route memory
|
||||||
|
- token and cost optimization
|
||||||
|
|
||||||
|
## 2. High-Level Architecture
|
||||||
|
|
||||||
|
```text
|
||||||
|
Input Layer
|
||||||
|
clients, APIs, MCP, internal connectors
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Control Plane
|
||||||
|
trust routing, policy, compression, memory, provider routing
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Execution Layer
|
||||||
|
local models, external providers, tools, services
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Output
|
||||||
|
response to caller
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Receipts + Memory Update
|
||||||
|
|
||||||
|
Side System:
|
||||||
|
Memory Layer
|
||||||
|
global memory, project memory, route memory, semantic cache
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Components
|
||||||
|
|
||||||
|
### 3.1 Client Entry
|
||||||
|
|
||||||
|
Clients connect via API, MCP, OpenAI-compatible endpoints, or internal connectors.
|
||||||
|
|
||||||
|
Supported client targets:
|
||||||
|
|
||||||
|
- Codex
|
||||||
|
- Claude Code
|
||||||
|
- ChatGPT
|
||||||
|
- Cursor
|
||||||
|
- VS Code and Continue-style IDEs
|
||||||
|
- automation pipelines
|
||||||
|
- n8n
|
||||||
|
- internal services
|
||||||
|
|
||||||
|
Each request should include:
|
||||||
|
|
||||||
|
- payload: prompt, input, files, tool call, or task
|
||||||
|
- metadata: user, project, agent, task type
|
||||||
|
- optional routing hints
|
||||||
|
- optional policy hints
|
||||||
|
|
||||||
|
### 3.2 Trust Router
|
||||||
|
|
||||||
|
The Trust Router is the first decision point.
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
|
||||||
|
- validate client identity
|
||||||
|
- assign trust level
|
||||||
|
- classify request type
|
||||||
|
- classify data sensitivity
|
||||||
|
- apply initial routing hints
|
||||||
|
- attach enriched request context
|
||||||
|
|
||||||
|
Example classification labels:
|
||||||
|
|
||||||
|
- code
|
||||||
|
- infra
|
||||||
|
- legal
|
||||||
|
- security
|
||||||
|
- general
|
||||||
|
- document
|
||||||
|
- automation
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
- enriched request context
|
||||||
|
- trust score
|
||||||
|
- sensitivity label
|
||||||
|
- classification label
|
||||||
|
|
||||||
|
### 3.3 Policy Engine
|
||||||
|
|
||||||
|
The Policy Engine is the core decision system.
|
||||||
|
|
||||||
|
It evaluates:
|
||||||
|
|
||||||
|
- data sensitivity
|
||||||
|
- allowed providers
|
||||||
|
- allowed models
|
||||||
|
- allowed tools
|
||||||
|
- cost constraints
|
||||||
|
- project rules
|
||||||
|
- compliance rules
|
||||||
|
- offline/simulation/live mode
|
||||||
|
|
||||||
|
Example policies:
|
||||||
|
|
||||||
|
- never send legal data to public APIs
|
||||||
|
- prefer local models for internal code
|
||||||
|
- use external models only if confidence is below a threshold
|
||||||
|
- block requests containing secrets
|
||||||
|
- require admin override for production deployment tools
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
- allowed routes
|
||||||
|
- blocked routes
|
||||||
|
- required redactions
|
||||||
|
- execution constraints
|
||||||
|
- policy decision log
|
||||||
|
|
||||||
|
### 3.4 Memory Query
|
||||||
|
|
||||||
|
Memory is queried before compression and execution.
|
||||||
|
|
||||||
|
Memory sources:
|
||||||
|
|
||||||
|
- project memory
|
||||||
|
- global memory
|
||||||
|
- route memory
|
||||||
|
- semantic cache
|
||||||
|
- handoffs
|
||||||
|
- receipts
|
||||||
|
- reproducible runs
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
- relevant memory context
|
||||||
|
- prior decisions
|
||||||
|
- route hints
|
||||||
|
- cache candidates
|
||||||
|
|
||||||
|
### 3.5 Compression Engine
|
||||||
|
|
||||||
|
The Compression Engine optimizes request and memory context before execution.
|
||||||
|
|
||||||
|
Functions:
|
||||||
|
|
||||||
|
- token reduction
|
||||||
|
- context deduplication
|
||||||
|
- semantic summarization
|
||||||
|
- cache lookup
|
||||||
|
- prompt/context packaging
|
||||||
|
- token budget enforcement
|
||||||
|
|
||||||
|
Input:
|
||||||
|
|
||||||
|
- raw request
|
||||||
|
- policy constraints
|
||||||
|
- memory context
|
||||||
|
- target model context budget
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
- compressed payload
|
||||||
|
- token metrics before and after
|
||||||
|
- cache hit or miss
|
||||||
|
- compression receipt data
|
||||||
|
|
||||||
|
### 3.6 Provider Router
|
||||||
|
|
||||||
|
The Provider Router makes the final execution decision.
|
||||||
|
|
||||||
|
It selects:
|
||||||
|
|
||||||
|
- local model
|
||||||
|
- external provider
|
||||||
|
- AI client/agent
|
||||||
|
- tool execution
|
||||||
|
- fallback route
|
||||||
|
|
||||||
|
Criteria:
|
||||||
|
|
||||||
|
- policy constraints
|
||||||
|
- trust level
|
||||||
|
- cost
|
||||||
|
- latency
|
||||||
|
- availability
|
||||||
|
- model capability
|
||||||
|
- route memory
|
||||||
|
- benchmark results
|
||||||
|
- agent reputation
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
- selected execution target
|
||||||
|
- fallback routes
|
||||||
|
- route explanation
|
||||||
|
|
||||||
|
### 3.7 Execution Layer
|
||||||
|
|
||||||
|
The Execution Layer handles actual processing.
|
||||||
|
|
||||||
|
Execution target types:
|
||||||
|
|
||||||
|
- local models such as Ollama, LM Studio, LocalAI, llama.cpp, vLLM
|
||||||
|
- external APIs such as OpenAI, Anthropic, Mistral, Groq, OpenRouter
|
||||||
|
- AI clients such as Claude Code, Codex, Cursor, ChatGPT adapters
|
||||||
|
- tools, scripts, workflows, and internal services
|
||||||
|
|
||||||
|
Execution returns:
|
||||||
|
|
||||||
|
- raw response
|
||||||
|
- latency
|
||||||
|
- token usage
|
||||||
|
- provider metadata
|
||||||
|
- errors
|
||||||
|
- tool call results
|
||||||
|
|
||||||
|
### 3.8 Receipt Engine
|
||||||
|
|
||||||
|
The Receipt Engine creates an immutable trace for each request.
|
||||||
|
|
||||||
|
Receipts include:
|
||||||
|
|
||||||
|
- request id
|
||||||
|
- input summary or redacted input
|
||||||
|
- trust decisions
|
||||||
|
- policy decisions
|
||||||
|
- memory refs
|
||||||
|
- compression results
|
||||||
|
- selected model/provider/tool
|
||||||
|
- fallback chain
|
||||||
|
- response summary or full response depending on policy
|
||||||
|
- token usage
|
||||||
|
- cost estimate
|
||||||
|
- timestamps
|
||||||
|
- errors
|
||||||
|
- blocked routes
|
||||||
|
|
||||||
|
Receipts are immutable and stored.
|
||||||
|
|
||||||
|
### 3.9 Memory Layer
|
||||||
|
|
||||||
|
Memory is separate from execution but connected to routing and compression.
|
||||||
|
|
||||||
|
Memory types:
|
||||||
|
|
||||||
|
1. Project memory
|
||||||
|
- task history
|
||||||
|
- decisions
|
||||||
|
- context
|
||||||
|
- handoffs
|
||||||
|
|
||||||
|
2. Global memory
|
||||||
|
- shared knowledge
|
||||||
|
- user/team preferences
|
||||||
|
- reusable runbooks
|
||||||
|
|
||||||
|
3. Route memory
|
||||||
|
- routing decisions
|
||||||
|
- success and failure patterns
|
||||||
|
- optimization feedback
|
||||||
|
|
||||||
|
4. Semantic cache
|
||||||
|
- previous responses
|
||||||
|
- embedding lookup
|
||||||
|
- prompt/result reuse
|
||||||
|
|
||||||
|
Memory is:
|
||||||
|
|
||||||
|
- append-only by default
|
||||||
|
- queryable
|
||||||
|
- versioned where possible
|
||||||
|
- used during routing and compression
|
||||||
|
|
||||||
|
### 3.10 Route Reflector Memory
|
||||||
|
|
||||||
|
Route Reflector Memory is specialized route memory inspired by BGP route reflectors.
|
||||||
|
|
||||||
|
Functions:
|
||||||
|
|
||||||
|
- learns optimal AI routes
|
||||||
|
- shares routing knowledge across clients
|
||||||
|
- improves future routing decisions
|
||||||
|
- records fallback success and failures
|
||||||
|
- contributes to Provider Router decisions
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
- code debugging works best through Codex plus local validation
|
||||||
|
- private infra diagnostics should route to local models
|
||||||
|
- long-form reasoning performs better on selected external models
|
||||||
|
- JSON extraction for project X has best success on model Y
|
||||||
|
|
||||||
|
## 4. Data Flow
|
||||||
|
|
||||||
|
1. Client sends request.
|
||||||
|
2. Trust Router classifies request and assigns trust.
|
||||||
|
3. Policy Engine filters allowed routes.
|
||||||
|
4. Memory Layer is queried for context and prior route knowledge.
|
||||||
|
5. Compression Engine optimizes payload.
|
||||||
|
6. Provider Router selects execution target and fallback chain.
|
||||||
|
7. Execution Layer processes request.
|
||||||
|
8. Response is returned to client.
|
||||||
|
9. Receipt Engine generates immutable receipt.
|
||||||
|
10. Memory Layer is updated with outcome.
|
||||||
|
11. Route Reflector Memory updates routing knowledge.
|
||||||
|
|
||||||
|
## 5. Modes Of Operation
|
||||||
|
|
||||||
|
### Live Mode
|
||||||
|
|
||||||
|
- real execution
|
||||||
|
- full routing active
|
||||||
|
- receipts stored
|
||||||
|
- memory updated
|
||||||
|
|
||||||
|
### Simulation Mode
|
||||||
|
|
||||||
|
- no real execution
|
||||||
|
- shows trust decisions
|
||||||
|
- shows policy decisions
|
||||||
|
- shows selected route and fallbacks
|
||||||
|
- estimates cost and tokens
|
||||||
|
- useful for testing policies
|
||||||
|
|
||||||
|
### Offline Mode
|
||||||
|
|
||||||
|
- only local models allowed
|
||||||
|
- no external provider calls
|
||||||
|
- remote sync disabled unless explicitly allowed
|
||||||
|
- receipts marked as offline
|
||||||
|
|
||||||
|
## 6. Control Functions
|
||||||
|
|
||||||
|
The system supports:
|
||||||
|
|
||||||
|
- trace request
|
||||||
|
- replay request
|
||||||
|
- force route
|
||||||
|
- override policy as admin
|
||||||
|
- inspect receipts
|
||||||
|
- inspect memory
|
||||||
|
- simulate routing
|
||||||
|
- compare routes
|
||||||
|
- inspect provider availability
|
||||||
|
- inspect route memory
|
||||||
|
|
||||||
|
## 7. Storage
|
||||||
|
|
||||||
|
Required storage components:
|
||||||
|
|
||||||
|
- receipts database: immutable logs
|
||||||
|
- memory database: structured + vector
|
||||||
|
- policy definitions
|
||||||
|
- routing history
|
||||||
|
- route reflector memory
|
||||||
|
- semantic cache
|
||||||
|
- reproducible run artifacts
|
||||||
|
|
||||||
|
Recommended default:
|
||||||
|
|
||||||
|
- SQLite for personal mode
|
||||||
|
- Postgres plus pgvector for team/server mode
|
||||||
|
- Git/Gitea as durable memory sync and audit transport
|
||||||
|
|
||||||
|
## 8. Metrics
|
||||||
|
|
||||||
|
System tracks:
|
||||||
|
|
||||||
|
- token usage
|
||||||
|
- compression ratio
|
||||||
|
- cache hit rate
|
||||||
|
- latency per provider
|
||||||
|
- cost per request
|
||||||
|
- routing success rate
|
||||||
|
- fallback rate
|
||||||
|
- trust level distribution
|
||||||
|
- blocked route count
|
||||||
|
- policy override count
|
||||||
|
- agent reputation
|
||||||
|
- benchmark scores
|
||||||
|
|
||||||
|
## 9. Security Model
|
||||||
|
|
||||||
|
- strict policy enforcement before external calls
|
||||||
|
- data classification at entry
|
||||||
|
- local-first routing possible
|
||||||
|
- no sensitive data leaves system if blocked by policy
|
||||||
|
- no secret sync to memory
|
||||||
|
- audit trail via receipts
|
||||||
|
- consent ledger for tool, memory, and provider permissions
|
||||||
|
- safe config writer for external tool setup
|
||||||
|
|
||||||
|
## 10. Extensibility
|
||||||
|
|
||||||
|
The system supports:
|
||||||
|
|
||||||
|
- new providers
|
||||||
|
- new local models
|
||||||
|
- new tools
|
||||||
|
- new MCP resources
|
||||||
|
- new policy rules
|
||||||
|
- custom routing logic
|
||||||
|
- custom memory backends
|
||||||
|
- custom benchmarks
|
||||||
|
- custom data source connectors
|
||||||
|
|
||||||
|
## 11. Core Idea
|
||||||
|
|
||||||
|
LLM Gateway is a deterministic, observable, policy-driven routing layer for AI execution with memory and cost control.
|
||||||
1270
OPEN_SOURCE_BLUEPRINT.md
Normal file
1270
OPEN_SOURCE_BLUEPRINT.md
Normal file
File diff suppressed because it is too large
Load Diff
66
OPEN_SOURCE_FEATURE_MATRIX.md
Normal file
66
OPEN_SOURCE_FEATURE_MATRIX.md
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# Open Source Feature Matrix
|
||||||
|
|
||||||
|
## Legend
|
||||||
|
|
||||||
|
- `ready`: exists and is usable with cleanup
|
||||||
|
- `partial`: exists but needs extraction/hardening
|
||||||
|
- `missing`: must be built
|
||||||
|
|
||||||
|
| Feature | Current | OSS Target | Priority |
|
||||||
|
|---|---|---|---:|
|
||||||
|
| Fastify gateway | ready | keep | P0 |
|
||||||
|
| Client SDK | ready | keep + docs | P0 |
|
||||||
|
| Health checks | ready | keep + doctor | P0 |
|
||||||
|
| Dashboard | partial | topology-first app | P1 |
|
||||||
|
| Ollama routing | ready | generic local provider | P0 |
|
||||||
|
| LM Studio detection | missing | discovery provider | P0 |
|
||||||
|
| LocalAI/llama.cpp/vLLM detection | missing | discovery provider | P0 |
|
||||||
|
| Hosted provider registry | partial | provider adapters + consent | P0 |
|
||||||
|
| OpenAI-compatible API | partial | first-class adapter | P0 |
|
||||||
|
| MCP server | missing | first-class | P0 |
|
||||||
|
| Claude Code integration | partial | MCP + bridge | P0 |
|
||||||
|
| Codex integration | partial | MCP + LSP | P0 |
|
||||||
|
| ChatGPT integration | missing | exports/import + adapter docs | P1 |
|
||||||
|
| Cursor/VS Code integration | missing | safe config writer | P1 |
|
||||||
|
| n8n integration | missing | workflow pack | P1 |
|
||||||
|
| Trust Router | missing | core | P0 |
|
||||||
|
| Policy Engine | missing | provider/model/tool constraints | P0 |
|
||||||
|
| Provider Router | partial | final route + fallback decision | P0 |
|
||||||
|
| Context Receipt | missing | core | P0 |
|
||||||
|
| Shared Gitea Memory | missing | core | P0 |
|
||||||
|
| Route Reflector Memory | missing | routing memory | P0 |
|
||||||
|
| AI Handoff Protocol | partial | core | P0 |
|
||||||
|
| Consent Ledger | missing | core | P0 |
|
||||||
|
| Setup Doctor | missing | CLI + UI | P0 |
|
||||||
|
| Safe Config Writer | missing | CLI + UI | P0 |
|
||||||
|
| Offline Mode | missing | policy mode | P0 |
|
||||||
|
| Simulation Mode | missing | dry-run routing decisions | P0 |
|
||||||
|
| Compression/token saving | partial | first-class engine | P1 |
|
||||||
|
| Semantic cache | missing | optional | P1 |
|
||||||
|
| Capability Benchmark Lab | missing | routing input | P1 |
|
||||||
|
| Agent Reputation Score | missing | routing input | P1 |
|
||||||
|
| Reproducible Runs | missing | audit/eval | P1 |
|
||||||
|
| Integration Marketplace | missing | local catalog | P1 |
|
||||||
|
| Data connectors | missing | scoped connectors | P1 |
|
||||||
|
| Team Mode | missing | RBAC/admin | P2 |
|
||||||
|
| Prompt/agent versioning | partial | Git-backed | P2 |
|
||||||
|
| Import wizard | missing | guided migration | P2 |
|
||||||
|
|
||||||
|
## Public Positioning
|
||||||
|
|
||||||
|
Do not position this as another LiteLLM clone.
|
||||||
|
|
||||||
|
Positioning:
|
||||||
|
|
||||||
|
> Adaptive LLM Gateway discovers your local and hosted AI stack, connects it through a secure MCP and OpenAI-compatible control plane, and gives every agent shared memory, policy, receipts, compression, and routing.
|
||||||
|
|
||||||
|
Core differentiators:
|
||||||
|
|
||||||
|
- AI environment discovery
|
||||||
|
- Trust Router
|
||||||
|
- Context Receipts
|
||||||
|
- Shared Git/Gitea Memory
|
||||||
|
- AI Handoff Protocol
|
||||||
|
- Consent Ledger
|
||||||
|
- Reproducible AI Runs
|
||||||
|
- model and agent benchmark learning
|
||||||
133
OPEN_SOURCE_GAP_ANALYSIS.md
Normal file
133
OPEN_SOURCE_GAP_ANALYSIS.md
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
# Open Source Gap Analysis
|
||||||
|
|
||||||
|
This document maps the current Context-X LLM Gateway to the planned open-source Adaptive LLM Gateway.
|
||||||
|
|
||||||
|
## Current Strengths
|
||||||
|
|
||||||
|
Already present in the repository:
|
||||||
|
|
||||||
|
| Area | Current State | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| Gateway API | Present | Fastify gateway in `packages/gateway`. |
|
||||||
|
| Completion API | Present | Main route: `/v1/completion`. |
|
||||||
|
| Classification | Present | `/v1/classify` and pre-classifier pipeline. |
|
||||||
|
| Batch jobs | Present | `/v1/batch` and PgBoss queue integration. |
|
||||||
|
| Health checks | Present | `/health`, `/health/live`, `/health/ready`. |
|
||||||
|
| Metrics | Present | Prometheus metrics and dashboard metrics. |
|
||||||
|
| Dashboard | Present | Operational dashboard exists in `packages/gateway/public`. |
|
||||||
|
| Routing rules | Present | YAML routing rules and model tiers. |
|
||||||
|
| Local model routing | Present | Ollama-based routing and fallback chains. |
|
||||||
|
| Hosted providers | Partial | External provider registry exists. Needs OSS cleanup and discovery. |
|
||||||
|
| Cost tracking | Present | Cost analytics, token tracking, cost stream. |
|
||||||
|
| Compression accounting | Partial | TokenVault/cost hooks exist. Needs first-class compression engine. |
|
||||||
|
| Learning engine | Present | Learning cycles, model performance tracking, fine-tuner package. |
|
||||||
|
| Client SDK | Present | `@llm-gateway/client`. |
|
||||||
|
| OpenAI compatibility | Partial | `chatgpt-api-adapter` and `openai-bridge` exist. Needs clean OSS path. |
|
||||||
|
| Codex integration | Partial | `packages/codex-lsp-adapter` exists. Needs production hardening. |
|
||||||
|
| Claude Code integration | Partial | `packages/claude-code-bridge` exists. Needs MCP-first flow. |
|
||||||
|
| LightRAG/RAG | Present | LightRAG sidecar exists. Needs generic connector story. |
|
||||||
|
| Handoff sync | Partial | `sync/` handoff folder exists. Needs protocol and tools. |
|
||||||
|
| Gitea use | Present internally | Needs generic Gitea memory backend. |
|
||||||
|
|
||||||
|
## Missing For Open Source
|
||||||
|
|
||||||
|
These features need to be added or extracted:
|
||||||
|
|
||||||
|
| Feature | Status | Priority | Target Package/Area |
|
||||||
|
|---|---|---:|---|
|
||||||
|
| First-run setup wizard | Missing | P0 | `packages/cli`, `packages/discovery` |
|
||||||
|
| Local AI discovery | Missing | P0 | `packages/discovery` |
|
||||||
|
| Public provider discovery | Partial | P0 | `packages/discovery`, `packages/providers` |
|
||||||
|
| AI client detection | Missing | P0 | `packages/discovery` |
|
||||||
|
| MCP server | Missing | P0 | `packages/mcp-server` |
|
||||||
|
| Trust Router | Missing | P0 | `packages/trust-router` |
|
||||||
|
| Consent Ledger | Missing | P0 | `packages/consent-ledger` |
|
||||||
|
| Shared Gitea Memory | Missing | P0 | `packages/memory-sync` |
|
||||||
|
| Context Receipt | Missing | P0 | `packages/context-receipts` |
|
||||||
|
| AI Handoff Protocol | Partial | P0 | `packages/handoff` |
|
||||||
|
| Safe Config Writer | Missing | P0 | `packages/config-writer` |
|
||||||
|
| Setup Doctor | Missing | P0 | `packages/doctor` |
|
||||||
|
| Offline Mode | Missing | P0 | gateway config/policy |
|
||||||
|
| Capability Benchmark Lab | Missing | P1 | `packages/benchmark-lab` |
|
||||||
|
| Agent Reputation Score | Missing | P1 | `packages/agent-reputation` |
|
||||||
|
| Reproducible Runs | Missing | P1 | `packages/run-ledger` |
|
||||||
|
| Visual Topology Map | Missing | P1 | dashboard UI/API |
|
||||||
|
| Integration Marketplace | Missing | P1 | `packages/integrations` + UI |
|
||||||
|
| Data source connectors | Missing | P1 | `packages/connectors` |
|
||||||
|
| Context Compression Engine | Partial | P1 | `packages/context-compression` |
|
||||||
|
| Semantic cache | Missing/mentioned | P1 | `packages/cache` |
|
||||||
|
| Team mode | Missing | P2 | auth/policy/admin UI |
|
||||||
|
| Prompt/agent versioning | Partial | P2 | memory/git/prompt registry |
|
||||||
|
| Migration/import wizard | Missing | P2 | `packages/import-wizard` |
|
||||||
|
|
||||||
|
## Context-X Assumptions To Remove
|
||||||
|
|
||||||
|
Before public release, remove or move behind an example profile:
|
||||||
|
|
||||||
|
- hardcoded `context-x.org` domains
|
||||||
|
- hardcoded `fichtmueller.org` Ollama endpoint
|
||||||
|
- Erik-specific paths such as `/opt/llm-gateway`
|
||||||
|
- private project callers and templates as defaults
|
||||||
|
- internal IP assumptions
|
||||||
|
- private training data
|
||||||
|
- private bridge assumptions
|
||||||
|
- secret-looking examples
|
||||||
|
- Context-X branding as default OSS UI
|
||||||
|
|
||||||
|
Keep them as:
|
||||||
|
|
||||||
|
```text
|
||||||
|
examples/profiles/context-x/
|
||||||
|
```
|
||||||
|
|
||||||
|
or as a private deployment overlay.
|
||||||
|
|
||||||
|
## Proposed New Packages
|
||||||
|
|
||||||
|
```text
|
||||||
|
packages/
|
||||||
|
cli/ # init, doctor, integrate, import, mode
|
||||||
|
discovery/ # detects models, clients, runtimes, providers
|
||||||
|
mcp-server/ # MCP tools/resources
|
||||||
|
trust-router/ # sensitivity + policy routing
|
||||||
|
consent-ledger/ # append-only permissions ledger
|
||||||
|
memory-sync/ # local/git/gitea memory backend
|
||||||
|
handoff/ # AI Handoff Protocol schema + helpers
|
||||||
|
context-receipts/ # receipts and audit artifacts
|
||||||
|
config-writer/ # safe config diffs and rollback
|
||||||
|
benchmark-lab/ # model/agent benchmark suite
|
||||||
|
agent-reputation/ # agent scorecards
|
||||||
|
run-ledger/ # reproducible AI runs
|
||||||
|
context-compression/ # compression + token budget manager
|
||||||
|
integrations/ # integration catalog manifests
|
||||||
|
connectors/ # data source connectors
|
||||||
|
import-wizard/ # migration/import helpers
|
||||||
|
```
|
||||||
|
|
||||||
|
## MVP Cut
|
||||||
|
|
||||||
|
The first useful OSS release should not try to ship everything.
|
||||||
|
|
||||||
|
MVP must include:
|
||||||
|
|
||||||
|
- CLI with `init`, `doctor`, `start`, `integrate`
|
||||||
|
- local AI discovery: Ollama + LM Studio + OpenAI-compatible `/v1/models`
|
||||||
|
- provider env discovery with consent
|
||||||
|
- MCP server with safe gateway and memory tools
|
||||||
|
- Trust Router with four trust levels
|
||||||
|
- Gitea/Git memory backend
|
||||||
|
- Context Receipts
|
||||||
|
- AI Handoff Protocol
|
||||||
|
- Safe Config Writer
|
||||||
|
- Offline Mode
|
||||||
|
- basic topology dashboard
|
||||||
|
|
||||||
|
MVP can defer:
|
||||||
|
|
||||||
|
- full benchmark lab
|
||||||
|
- team RBAC
|
||||||
|
- all data connectors
|
||||||
|
- full import wizard
|
||||||
|
- advanced compression comparisons
|
||||||
|
- agent reputation automation
|
||||||
|
|
||||||
212
OPEN_SOURCE_IMPLEMENTATION_ROADMAP.md
Normal file
212
OPEN_SOURCE_IMPLEMENTATION_ROADMAP.md
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
# Open Source Implementation Roadmap
|
||||||
|
|
||||||
|
## Phase 0: Sanitize And Productize
|
||||||
|
|
||||||
|
Goal: make the current codebase safe to publish and understandable outside Context-X.
|
||||||
|
|
||||||
|
Tasks:
|
||||||
|
|
||||||
|
- Add OSS name and package naming decision.
|
||||||
|
- Move Context-X-only files into `examples/profiles/context-x/`.
|
||||||
|
- Add `.env.example` without private domains or secrets.
|
||||||
|
- Replace hardcoded defaults with generated config.
|
||||||
|
- Add license, contributing guide, security policy, and public README.
|
||||||
|
- Run secret scan and dependency/license audit.
|
||||||
|
- Decide which training data can be published.
|
||||||
|
|
||||||
|
Exit criteria:
|
||||||
|
|
||||||
|
- Fresh clone can install without private services.
|
||||||
|
- No private domains or internal IPs are required for default startup.
|
||||||
|
- Public README explains local-only setup.
|
||||||
|
|
||||||
|
## Phase 1: Adaptive Init
|
||||||
|
|
||||||
|
Goal: detect the user's AI environment and create config.
|
||||||
|
|
||||||
|
Packages:
|
||||||
|
|
||||||
|
- `packages/cli`
|
||||||
|
- `packages/discovery`
|
||||||
|
- `packages/config-writer`
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
adaptive-llm-gateway init
|
||||||
|
adaptive-llm-gateway doctor
|
||||||
|
adaptive-llm-gateway integrate <target>
|
||||||
|
adaptive-llm-gateway mode offline
|
||||||
|
adaptive-llm-gateway simulate <request-file>
|
||||||
|
```
|
||||||
|
|
||||||
|
Detection targets:
|
||||||
|
|
||||||
|
- Ollama
|
||||||
|
- LM Studio
|
||||||
|
- LocalAI
|
||||||
|
- llama.cpp server
|
||||||
|
- vLLM
|
||||||
|
- Open WebUI
|
||||||
|
- OpenAI-compatible endpoints
|
||||||
|
- OpenAI/Anthropic/Groq/Mistral/OpenRouter env keys
|
||||||
|
- Claude Code
|
||||||
|
- Codex
|
||||||
|
- Cursor
|
||||||
|
- VS Code
|
||||||
|
- Continue.dev
|
||||||
|
- n8n
|
||||||
|
- Docker containers
|
||||||
|
- Git/Gitea availability
|
||||||
|
|
||||||
|
Exit criteria:
|
||||||
|
|
||||||
|
- `init` writes `~/.adaptive-llm-gateway/config.yaml`.
|
||||||
|
- No external integration is enabled without approval.
|
||||||
|
- `doctor` reports actionable health and setup status.
|
||||||
|
|
||||||
|
## Phase 2: Trust, Consent, Receipts
|
||||||
|
|
||||||
|
Goal: every request goes through policy and produces an audit artifact.
|
||||||
|
|
||||||
|
Packages:
|
||||||
|
|
||||||
|
- `packages/trust-router`
|
||||||
|
- `packages/policy-engine`
|
||||||
|
- `packages/consent-ledger`
|
||||||
|
- `packages/context-receipts`
|
||||||
|
- `packages/run-ledger`
|
||||||
|
- `packages/provider-router`
|
||||||
|
|
||||||
|
Features:
|
||||||
|
|
||||||
|
- four trust levels: public, internal, confidential, secret
|
||||||
|
- local-only/offline routing mode
|
||||||
|
- simulation mode with no execution
|
||||||
|
- provider router route constraints and fallbacks
|
||||||
|
- append-only consent ledger
|
||||||
|
- receipt for context used, blocked, redacted, routed
|
||||||
|
- reproducible run folder
|
||||||
|
|
||||||
|
Exit criteria:
|
||||||
|
|
||||||
|
- External providers are blocked for confidential/secret data by default.
|
||||||
|
- Receipts can be viewed from CLI and dashboard.
|
||||||
|
- Consent changes are append-only and reversible.
|
||||||
|
|
||||||
|
## Phase 3: Shared Memory And MCP
|
||||||
|
|
||||||
|
Goal: make the gateway the shared memory and tool layer for all AI clients.
|
||||||
|
|
||||||
|
Packages:
|
||||||
|
|
||||||
|
- `packages/memory-sync`
|
||||||
|
- `packages/handoff`
|
||||||
|
- `packages/mcp-server`
|
||||||
|
- `packages/route-reflector-memory`
|
||||||
|
|
||||||
|
Features:
|
||||||
|
|
||||||
|
- local memory repo
|
||||||
|
- Git/Gitea sync
|
||||||
|
- typed memory folders
|
||||||
|
- MCP tools for memory and gateway calls
|
||||||
|
- AI Handoff Protocol
|
||||||
|
- Route Reflector Memory for routing outcomes
|
||||||
|
- conflict-safe append-first writes
|
||||||
|
|
||||||
|
MCP tools:
|
||||||
|
|
||||||
|
- `gateway.complete`
|
||||||
|
- `gateway.chat`
|
||||||
|
- `gateway.health`
|
||||||
|
- `gateway.route_preview`
|
||||||
|
- `memory.search`
|
||||||
|
- `memory.read`
|
||||||
|
- `memory.write`
|
||||||
|
- `memory.append_session`
|
||||||
|
- `memory.record_decision`
|
||||||
|
- `memory.record_task`
|
||||||
|
- `memory.pull`
|
||||||
|
- `memory.push`
|
||||||
|
|
||||||
|
Exit criteria:
|
||||||
|
|
||||||
|
- Claude Code and Codex can access the same memory through MCP.
|
||||||
|
- Handoffs are stored in Git/Gitea.
|
||||||
|
- Memory sync refuses to commit secrets.
|
||||||
|
|
||||||
|
## Phase 4: Compression And Knowledge
|
||||||
|
|
||||||
|
Goal: reduce token use and retrieve only the right context.
|
||||||
|
|
||||||
|
Packages:
|
||||||
|
|
||||||
|
- `packages/context-compression`
|
||||||
|
- `packages/connectors`
|
||||||
|
- `packages/cache`
|
||||||
|
|
||||||
|
Features:
|
||||||
|
|
||||||
|
- token budget manager
|
||||||
|
- session compaction
|
||||||
|
- repo/doc summarization
|
||||||
|
- memory dedupe
|
||||||
|
- semantic cache
|
||||||
|
- SQLite vector default
|
||||||
|
- Postgres/Qdrant optional
|
||||||
|
- approved data source connectors
|
||||||
|
|
||||||
|
Exit criteria:
|
||||||
|
|
||||||
|
- Context packages include budget, source refs, and compression stats.
|
||||||
|
- Receipts show compressed-from and final token counts.
|
||||||
|
- Indexing requires explicit allowed roots.
|
||||||
|
|
||||||
|
## Phase 5: Benchmarking And Reputation
|
||||||
|
|
||||||
|
Goal: route based on evidence instead of static assumptions.
|
||||||
|
|
||||||
|
Packages:
|
||||||
|
|
||||||
|
- `packages/benchmark-lab`
|
||||||
|
- `packages/agent-reputation`
|
||||||
|
|
||||||
|
Features:
|
||||||
|
|
||||||
|
- model capability tests
|
||||||
|
- agent scorecards
|
||||||
|
- latency/cost/quality tracking
|
||||||
|
- JSON reliability test
|
||||||
|
- code patch/test benchmark
|
||||||
|
- local vs hosted comparison
|
||||||
|
|
||||||
|
Exit criteria:
|
||||||
|
|
||||||
|
- Trust Router can use benchmark scores.
|
||||||
|
- Dashboard shows model and agent strengths.
|
||||||
|
- Routing decisions explain benchmark influence.
|
||||||
|
|
||||||
|
## Phase 6: Product UI
|
||||||
|
|
||||||
|
Goal: turn the operational dashboard into a usable OSS app.
|
||||||
|
|
||||||
|
UI areas:
|
||||||
|
|
||||||
|
- Topology
|
||||||
|
- Models
|
||||||
|
- Agents
|
||||||
|
- Memory
|
||||||
|
- Policies
|
||||||
|
- Receipts
|
||||||
|
- Benchmarks
|
||||||
|
- Costs
|
||||||
|
- Integrations
|
||||||
|
- Doctor
|
||||||
|
- Settings
|
||||||
|
|
||||||
|
Exit criteria:
|
||||||
|
|
||||||
|
- First screen is topology/status.
|
||||||
|
- User can enable integrations from UI with diff preview.
|
||||||
|
- User can inspect receipts and memory sync status.
|
||||||
728
packages/gateway/public/dashboard-v2.html
Normal file
728
packages/gateway/public/dashboard-v2.html
Normal file
@ -0,0 +1,728 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>llm.gateway Workbench</title>
|
||||||
|
<style>
|
||||||
|
:root {
|
||||||
|
color-scheme: light;
|
||||||
|
--bg: #f4f7f9;
|
||||||
|
--paper: #fbfcfd;
|
||||||
|
--panel: #f8fafb;
|
||||||
|
--line: #ccd6df;
|
||||||
|
--line-dark: #aebdc8;
|
||||||
|
--text: #27323d;
|
||||||
|
--muted: #718090;
|
||||||
|
--soft: #8b98a7;
|
||||||
|
--green: #2f7d71;
|
||||||
|
--green-soft: #e3f2ef;
|
||||||
|
--amber: #a05c2b;
|
||||||
|
--amber-soft: #fff1e7;
|
||||||
|
--red: #9f3f3a;
|
||||||
|
--red-soft: #ffe9e7;
|
||||||
|
--blue-soft: #eaf2f8;
|
||||||
|
--shadow: 0 16px 50px rgba(43, 61, 74, 0.08);
|
||||||
|
--mono: "SFMono-Regular", "Cascadia Code", "Roboto Mono", Consolas, monospace;
|
||||||
|
--sans: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
* { box-sizing: border-box; }
|
||||||
|
|
||||||
|
body {
|
||||||
|
margin: 0;
|
||||||
|
min-height: 100vh;
|
||||||
|
background:
|
||||||
|
linear-gradient(90deg, rgba(39, 50, 61, 0.025) 1px, transparent 1px),
|
||||||
|
linear-gradient(rgba(39, 50, 61, 0.025) 1px, transparent 1px),
|
||||||
|
var(--bg);
|
||||||
|
background-size: 24px 24px;
|
||||||
|
color: var(--text);
|
||||||
|
font-family: var(--sans);
|
||||||
|
font-size: 14px;
|
||||||
|
letter-spacing: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.shell {
|
||||||
|
max-width: 1560px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 38px 38px 96px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.mono, .eyebrow, .tab, .badge, th, .status-line, .metric-label {
|
||||||
|
font-family: var(--mono);
|
||||||
|
}
|
||||||
|
|
||||||
|
header {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr auto;
|
||||||
|
align-items: start;
|
||||||
|
gap: 24px;
|
||||||
|
border-bottom: 1px solid var(--line);
|
||||||
|
padding-bottom: 18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.brand {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.mark {
|
||||||
|
width: 9px;
|
||||||
|
height: 9px;
|
||||||
|
background: var(--green);
|
||||||
|
margin-left: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 18px;
|
||||||
|
line-height: 1;
|
||||||
|
font-family: var(--mono);
|
||||||
|
letter-spacing: -0.02em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.crumb {
|
||||||
|
color: var(--soft);
|
||||||
|
font-family: var(--mono);
|
||||||
|
font-size: 12px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.14em;
|
||||||
|
margin-left: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn {
|
||||||
|
border: 1px solid var(--line-dark);
|
||||||
|
background: transparent;
|
||||||
|
color: var(--text);
|
||||||
|
height: 31px;
|
||||||
|
padding: 0 14px;
|
||||||
|
font-family: var(--mono);
|
||||||
|
font-size: 12px;
|
||||||
|
letter-spacing: 0.12em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn:hover { background: var(--paper); border-color: var(--green); color: var(--green); }
|
||||||
|
|
||||||
|
.status-strip {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: 18px;
|
||||||
|
border-bottom: 1px solid var(--line);
|
||||||
|
padding: 20px 0;
|
||||||
|
margin-bottom: 18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-group {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 18px;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-line {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 12px;
|
||||||
|
letter-spacing: 0.08em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
border-right: 1px solid var(--line);
|
||||||
|
padding-right: 18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-line:last-child { border-right: 0; }
|
||||||
|
.status-line strong { color: var(--text); font-weight: 700; text-transform: none; letter-spacing: 0; }
|
||||||
|
|
||||||
|
.dot {
|
||||||
|
width: 17px;
|
||||||
|
height: 17px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: var(--green-soft);
|
||||||
|
position: relative;
|
||||||
|
border: 1px solid #c6ddd8;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dot::after {
|
||||||
|
content: "";
|
||||||
|
position: absolute;
|
||||||
|
width: 7px;
|
||||||
|
height: 7px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: var(--green);
|
||||||
|
left: 4px;
|
||||||
|
top: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dot.warn { background: var(--amber-soft); border-color: #eccdb5; }
|
||||||
|
.dot.warn::after { background: var(--amber); }
|
||||||
|
.dot.bad { background: var(--red-soft); border-color: #efc1bd; }
|
||||||
|
.dot.bad::after { background: var(--red); }
|
||||||
|
|
||||||
|
.tabs {
|
||||||
|
display: flex;
|
||||||
|
gap: 0;
|
||||||
|
border-bottom: 1px solid var(--line);
|
||||||
|
margin-bottom: 34px;
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab {
|
||||||
|
min-width: 128px;
|
||||||
|
padding: 0 18px 16px;
|
||||||
|
color: var(--muted);
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 12px;
|
||||||
|
letter-spacing: 0.08em;
|
||||||
|
white-space: nowrap;
|
||||||
|
border-bottom: 1px solid transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tab.active {
|
||||||
|
color: var(--green);
|
||||||
|
border-bottom-color: var(--green);
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-head {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr auto;
|
||||||
|
align-items: center;
|
||||||
|
border-bottom: 1px solid var(--line);
|
||||||
|
margin: 0 0 16px;
|
||||||
|
min-height: 44px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-head::before {
|
||||||
|
content: "";
|
||||||
|
width: 20px;
|
||||||
|
height: 2px;
|
||||||
|
background: var(--green);
|
||||||
|
display: block;
|
||||||
|
align-self: end;
|
||||||
|
margin-bottom: 13px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-title {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
text-align: center;
|
||||||
|
font-family: var(--mono);
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--muted);
|
||||||
|
letter-spacing: 0.35em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
margin-top: -21px;
|
||||||
|
pointer-events: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-note {
|
||||||
|
justify-self: end;
|
||||||
|
font-family: var(--mono);
|
||||||
|
color: var(--soft);
|
||||||
|
font-size: 12px;
|
||||||
|
letter-spacing: 0.08em;
|
||||||
|
margin-top: -22px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.coverage {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(5, minmax(0, 1fr));
|
||||||
|
gap: 12px;
|
||||||
|
margin-bottom: 34px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tile {
|
||||||
|
border: 1px solid var(--line);
|
||||||
|
background: rgba(255, 255, 255, 0.58);
|
||||||
|
min-height: 120px;
|
||||||
|
padding: 15px 16px;
|
||||||
|
box-shadow: var(--shadow);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tile-head {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: start;
|
||||||
|
gap: 10px;
|
||||||
|
margin-bottom: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tile-title {
|
||||||
|
font-weight: 800;
|
||||||
|
line-height: 1.25;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge {
|
||||||
|
border: 1px solid #dfbda6;
|
||||||
|
color: var(--amber);
|
||||||
|
background: var(--amber-soft);
|
||||||
|
padding: 4px 8px;
|
||||||
|
font-size: 10px;
|
||||||
|
letter-spacing: 0.12em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.badge.ready {
|
||||||
|
border-color: #b7d8d1;
|
||||||
|
color: var(--green);
|
||||||
|
background: var(--green-soft);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tile-meta {
|
||||||
|
font-family: var(--mono);
|
||||||
|
color: #596777;
|
||||||
|
line-height: 1.45;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metrics {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(6, minmax(0, 1fr));
|
||||||
|
gap: 10px;
|
||||||
|
margin-bottom: 34px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metric {
|
||||||
|
border: 1px solid var(--line);
|
||||||
|
background: rgba(255,255,255,0.45);
|
||||||
|
padding: 13px 15px;
|
||||||
|
min-height: 86px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metric-label {
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 11px;
|
||||||
|
letter-spacing: 0.13em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
margin-bottom: 11px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.metric-value {
|
||||||
|
font-family: var(--mono);
|
||||||
|
font-size: 22px;
|
||||||
|
font-weight: 800;
|
||||||
|
}
|
||||||
|
|
||||||
|
.workbench {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1.15fr 0.85fr;
|
||||||
|
gap: 18px;
|
||||||
|
margin-bottom: 34px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.panel {
|
||||||
|
border: 1px solid var(--line);
|
||||||
|
background: rgba(255,255,255,0.48);
|
||||||
|
box-shadow: var(--shadow);
|
||||||
|
}
|
||||||
|
|
||||||
|
.panel-title {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: 12px;
|
||||||
|
border-bottom: 1px solid var(--line);
|
||||||
|
padding: 12px 16px;
|
||||||
|
font-family: var(--mono);
|
||||||
|
color: var(--muted);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.18em;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.panel-body { padding: 16px; }
|
||||||
|
|
||||||
|
.route-stack {
|
||||||
|
display: grid;
|
||||||
|
gap: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.route {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 164px 1fr auto;
|
||||||
|
align-items: center;
|
||||||
|
gap: 12px;
|
||||||
|
border: 1px solid var(--line);
|
||||||
|
background: var(--paper);
|
||||||
|
padding: 11px 12px;
|
||||||
|
min-height: 52px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.route-name {
|
||||||
|
font-weight: 800;
|
||||||
|
}
|
||||||
|
|
||||||
|
.route-desc {
|
||||||
|
font-family: var(--mono);
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 12px;
|
||||||
|
line-height: 1.35;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
border: 1px solid var(--line);
|
||||||
|
background: rgba(255,255,255,0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
th, td {
|
||||||
|
border-bottom: 1px solid var(--line);
|
||||||
|
padding: 13px 16px;
|
||||||
|
text-align: left;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
color: var(--muted);
|
||||||
|
background: rgba(39, 50, 61, 0.045);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.13em;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
td {
|
||||||
|
font-family: var(--mono);
|
||||||
|
color: #4d5c69;
|
||||||
|
}
|
||||||
|
|
||||||
|
.empty {
|
||||||
|
height: 112px;
|
||||||
|
text-align: center;
|
||||||
|
color: var(--soft);
|
||||||
|
font-family: var(--mono);
|
||||||
|
font-size: 13px;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.fixed-status {
|
||||||
|
position: fixed;
|
||||||
|
right: 18px;
|
||||||
|
bottom: 18px;
|
||||||
|
border: 1px solid var(--line-dark);
|
||||||
|
background: var(--paper);
|
||||||
|
padding: 10px 14px;
|
||||||
|
font-family: var(--mono);
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--muted);
|
||||||
|
box-shadow: var(--shadow);
|
||||||
|
}
|
||||||
|
|
||||||
|
.fixed-status span {
|
||||||
|
display: inline-block;
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
background: var(--green);
|
||||||
|
margin-right: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 1200px) {
|
||||||
|
.coverage { grid-template-columns: repeat(3, minmax(0, 1fr)); }
|
||||||
|
.metrics { grid-template-columns: repeat(3, minmax(0, 1fr)); }
|
||||||
|
.workbench { grid-template-columns: 1fr; }
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 760px) {
|
||||||
|
.shell { padding: 22px 16px 80px; }
|
||||||
|
header, .status-strip { grid-template-columns: 1fr; display: grid; }
|
||||||
|
.coverage, .metrics { grid-template-columns: 1fr; }
|
||||||
|
.route { grid-template-columns: 1fr; }
|
||||||
|
.section-note { display: none; }
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="shell">
|
||||||
|
<header>
|
||||||
|
<div class="brand">
|
||||||
|
<span class="mark"></span>
|
||||||
|
<h1>llm.gateway</h1>
|
||||||
|
<span class="crumb">/ gateway workbench · open source preview</span>
|
||||||
|
</div>
|
||||||
|
<button class="btn" id="settingsBtn">⊙ settings</button>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="status-strip">
|
||||||
|
<div class="status-group">
|
||||||
|
<div class="status-line"><span class="dot" id="dbDot"></span> DB <strong id="dbStatus">checking</strong></div>
|
||||||
|
<div class="status-line"><span class="dot" id="pollDot"></span> Poll <strong>live</strong></div>
|
||||||
|
<div class="status-line">Interval <strong>15s</strong></div>
|
||||||
|
</div>
|
||||||
|
<div class="status-line">Mode <strong id="modeStatus">auto</strong></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<nav class="tabs">
|
||||||
|
<a class="tab" href="#overview">01 overview</a>
|
||||||
|
<a class="tab" href="#providers">02 providers</a>
|
||||||
|
<a class="tab" href="#policies">03 routing</a>
|
||||||
|
<a class="tab active" href="#activity">04 activity</a>
|
||||||
|
<a class="tab" href="#savings">05 savings</a>
|
||||||
|
<a class="tab" href="#memory">06 memory</a>
|
||||||
|
<a class="tab" href="#doctor">07 doctor</a>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<section id="overview">
|
||||||
|
<div class="section-head">
|
||||||
|
<div class="section-title">gateway coverage</div>
|
||||||
|
<div class="section-note">existing adapters plus open-source targets</div>
|
||||||
|
</div>
|
||||||
|
<div class="coverage" id="coverage"></div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section id="activity">
|
||||||
|
<div class="section-head">
|
||||||
|
<div class="section-title">gateway metrics</div>
|
||||||
|
<div class="section-note">traffic · providers · savings · readiness</div>
|
||||||
|
</div>
|
||||||
|
<div class="metrics" id="metrics"></div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="workbench">
|
||||||
|
<div class="panel" id="policies">
|
||||||
|
<div class="panel-title">
|
||||||
|
<span>request pipeline</span>
|
||||||
|
<span>gateway core</span>
|
||||||
|
</div>
|
||||||
|
<div class="panel-body">
|
||||||
|
<div class="route-stack" id="pipeline"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="panel" id="memory">
|
||||||
|
<div class="panel-title">
|
||||||
|
<span>open-source extensions</span>
|
||||||
|
<span>roadmap</span>
|
||||||
|
</div>
|
||||||
|
<div class="panel-body">
|
||||||
|
<div class="route-stack" id="memoryRoutes"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<div class="section-head">
|
||||||
|
<div class="section-title">recent requests</div>
|
||||||
|
<div class="section-note">live polling</div>
|
||||||
|
</div>
|
||||||
|
<div style="display:flex; gap:6px; margin-bottom:16px;">
|
||||||
|
<button class="btn" data-hours="24">last 24h</button>
|
||||||
|
<button class="btn" data-hours="168">last 7d</button>
|
||||||
|
<button class="btn" data-hours="720">last 30d</button>
|
||||||
|
</div>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>request id</th>
|
||||||
|
<th>caller</th>
|
||||||
|
<th>model</th>
|
||||||
|
<th>status</th>
|
||||||
|
<th>ctx before</th>
|
||||||
|
<th>ctx sent</th>
|
||||||
|
<th>saved</th>
|
||||||
|
<th>compression</th>
|
||||||
|
<th>cost</th>
|
||||||
|
<th>latency</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="requests">
|
||||||
|
<tr><td class="empty" colspan="10">loading gateway traffic</td></tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="fixed-status"><span></span><strong id="fixedStatus">connected</strong></div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const API = window.location.origin;
|
||||||
|
let selectedHours = 24;
|
||||||
|
|
||||||
|
const clients = [
|
||||||
|
['OpenAI-compatible API', 'openai-api', 'already usable by most tools'],
|
||||||
|
['Ollama / Local models', 'ollama', 'local-first provider path'],
|
||||||
|
['Codex / CLI clients', 'codex', 'planned MCP helper'],
|
||||||
|
['Claude Code', 'claude-code', 'planned MCP bridge'],
|
||||||
|
['ChatGPT / OpenAI', 'chatgpt', 'API key or export workflow'],
|
||||||
|
['Cursor / VS Code', 'cursor', 'OpenAI-compatible base URL'],
|
||||||
|
];
|
||||||
|
|
||||||
|
const metricLabels = {
|
||||||
|
detectedClients: 'adapters',
|
||||||
|
localModels: 'local',
|
||||||
|
providersConfigured: 'providers',
|
||||||
|
trustPolicies: 'rules',
|
||||||
|
memoryBackends: 'memory',
|
||||||
|
plannedModules: 'extensions',
|
||||||
|
};
|
||||||
|
|
||||||
|
function esc(value) {
|
||||||
|
return String(value ?? '').replace(/[&<>"']/g, (c) => ({
|
||||||
|
'&': '&', '<': '<', '>': '>', '"': '"', "'": '''
|
||||||
|
}[c]));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getJson(path) {
|
||||||
|
const res = await fetch(`${API}${path}`, { cache: 'no-store', headers: { Accept: 'application/json' } });
|
||||||
|
if (!res.ok) throw new Error(`${path} ${res.status}`);
|
||||||
|
return res.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
function setDbStatus(status) {
|
||||||
|
const dot = document.getElementById('dbDot');
|
||||||
|
const label = document.getElementById('dbStatus');
|
||||||
|
if (status === 'connected') {
|
||||||
|
dot.className = 'dot';
|
||||||
|
label.textContent = 'connected';
|
||||||
|
} else if (status === 'degraded') {
|
||||||
|
dot.className = 'dot warn';
|
||||||
|
label.textContent = 'degraded';
|
||||||
|
} else {
|
||||||
|
dot.className = 'dot bad';
|
||||||
|
label.textContent = 'offline';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderCoverage(topology) {
|
||||||
|
const configured = new Set((topology.nodes || []).filter((n) => n.status === 'ready' || n.status === 'online').map((n) => n.id));
|
||||||
|
document.getElementById('coverage').innerHTML = clients.map(([name, key, note]) => {
|
||||||
|
const ready = configured.has(`client-${key}`) || configured.has(key);
|
||||||
|
return `
|
||||||
|
<article class="tile">
|
||||||
|
<div class="tile-head">
|
||||||
|
<div class="tile-title">${esc(name)}</div>
|
||||||
|
<div class="badge ${ready ? 'ready' : ''}">${ready ? 'ready' : 'not connected'}</div>
|
||||||
|
</div>
|
||||||
|
<div class="tile-meta">
|
||||||
|
0 requests · 0 saved<br>
|
||||||
|
status: discovery pending<br>
|
||||||
|
route: ${esc(note)}<br>
|
||||||
|
last: never
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
`;
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderMetrics(summary) {
|
||||||
|
document.getElementById('metrics').innerHTML = Object.entries(summary).map(([key, value]) => `
|
||||||
|
<div class="metric">
|
||||||
|
<div class="metric-label">${esc(metricLabels[key] || key)}</div>
|
||||||
|
<div class="metric-value">${esc(value)}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderPipeline(topology) {
|
||||||
|
const steps = [
|
||||||
|
['Client Entry', 'OpenAI-compatible requests from apps, agents, and scripts'],
|
||||||
|
['Gateway Router', 'model selection, fallback, budgets, latency preference'],
|
||||||
|
['Provider Layer', 'Ollama, OpenAI, Anthropic, Groq, Mistral, OpenRouter'],
|
||||||
|
['Compression', 'existing token savings plus semantic cache roadmap'],
|
||||||
|
['Receipts', 'trace request, route, model, tokens, cost, latency'],
|
||||||
|
['Memory', 'optional shared project memory for handoff between AI tools'],
|
||||||
|
];
|
||||||
|
document.getElementById('pipeline').innerHTML = steps.map(([name, desc], index) => `
|
||||||
|
<div class="route">
|
||||||
|
<div class="route-name">${String(index + 1).padStart(2, '0')} ${esc(name)}</div>
|
||||||
|
<div class="route-desc">${esc(desc)}</div>
|
||||||
|
<div class="badge ready">${index < 3 ? 'core' : 'next'}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
|
||||||
|
const extensions = [
|
||||||
|
['MCP server', 'Expose gateway status, providers, receipts, and memory to Codex, Claude Code, Cursor, and automations.', 'next'],
|
||||||
|
['Shared memory', 'Optional Git/Gitea-backed project memory for decisions, handoffs, receipts, and reusable context.', 'next'],
|
||||||
|
['Trust routing', 'Small policy layer for local-first routing, sensitive-data blocking, and provider allowlists.', 'next'],
|
||||||
|
['Setup doctor', 'Detect local tools, env vars, models, ports, and missing config without changing user files silently.', 'next'],
|
||||||
|
['Context receipts', 'Human-readable proof of what context was used, compressed, redacted, and routed.', 'planned'],
|
||||||
|
];
|
||||||
|
document.getElementById('memoryRoutes').innerHTML = extensions.map(([name, desc, state]) => `
|
||||||
|
<div class="route">
|
||||||
|
<div class="route-name">${esc(name)}</div>
|
||||||
|
<div class="route-desc">${esc(desc)}</div>
|
||||||
|
<div class="badge ready">${esc(state)}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderRequests(rows) {
|
||||||
|
const body = document.getElementById('requests');
|
||||||
|
if (!rows || rows.length === 0) {
|
||||||
|
body.innerHTML = '<tr><td class="empty" colspan="10">no requests in selected timeframe</td></tr>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
body.innerHTML = rows.slice(0, 40).map((r) => `
|
||||||
|
<tr>
|
||||||
|
<td>${esc((r.request_id || r.id || '').slice(0, 12))}</td>
|
||||||
|
<td>${esc(r.caller || 'unknown')}</td>
|
||||||
|
<td>${esc(r.model || 'n/a')}</td>
|
||||||
|
<td>${esc(r.status || 'n/a')}</td>
|
||||||
|
<td>${esc(r.tokens_in || 0)}</td>
|
||||||
|
<td>${esc((r.tokens_in || 0) + (r.tokens_out || 0))}</td>
|
||||||
|
<td>${esc(r.tokens_saved || 0)}</td>
|
||||||
|
<td>${esc(r.compression || 'n/a')}</td>
|
||||||
|
<td>$${Number(r.cost_usd || 0).toFixed(4)}</td>
|
||||||
|
<td>${esc(r.latency_ms || 0)}ms</td>
|
||||||
|
</tr>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadTopology() {
|
||||||
|
const data = await getJson('/api/dashboard/topology');
|
||||||
|
const topology = data.data;
|
||||||
|
document.getElementById('modeStatus').textContent = topology.mode === 'hybrid-safe' ? 'auto' : topology.mode;
|
||||||
|
renderCoverage(topology);
|
||||||
|
renderMetrics(topology.summary);
|
||||||
|
renderPipeline(topology);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadHealth() {
|
||||||
|
try {
|
||||||
|
const health = await getJson('/health');
|
||||||
|
if (health.status === 'ok') setDbStatus('connected');
|
||||||
|
else if (health.checks?.ollama?.status === 'ok') setDbStatus('degraded');
|
||||||
|
else setDbStatus('offline');
|
||||||
|
} catch {
|
||||||
|
setDbStatus('degraded');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadRequests() {
|
||||||
|
try {
|
||||||
|
const data = await getJson(`/api/dashboard/requests?limit=50&hours=${selectedHours}`);
|
||||||
|
renderRequests(data.data || []);
|
||||||
|
} catch {
|
||||||
|
renderRequests([]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function refreshAll() {
|
||||||
|
await Promise.all([loadTopology(), loadHealth(), loadRequests()]);
|
||||||
|
document.getElementById('fixedStatus').textContent = 'connected';
|
||||||
|
document.getElementById('pollDot').className = 'dot';
|
||||||
|
}
|
||||||
|
|
||||||
|
document.querySelectorAll('[data-hours]').forEach((button) => {
|
||||||
|
button.addEventListener('click', () => {
|
||||||
|
selectedHours = Number(button.dataset.hours || 24);
|
||||||
|
loadRequests();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById('settingsBtn').addEventListener('click', () => {
|
||||||
|
alert('Settings preview: providers, subscriptions, local models, budgets, memory backend, and OpenAI-compatible base URL.');
|
||||||
|
});
|
||||||
|
|
||||||
|
refreshAll().catch(() => {
|
||||||
|
document.getElementById('fixedStatus').textContent = 'degraded';
|
||||||
|
document.getElementById('pollDot').className = 'dot warn';
|
||||||
|
});
|
||||||
|
setInterval(refreshAll, 15000);
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
File diff suppressed because it is too large
Load Diff
@ -58,6 +58,233 @@ interface AlertData {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface TopologyNode {
|
||||||
|
id: string;
|
||||||
|
label: string;
|
||||||
|
kind: 'client' | 'gateway' | 'policy' | 'memory' | 'model' | 'provider' | 'tool' | 'planned';
|
||||||
|
status: 'online' | 'ready' | 'detected' | 'planned' | 'blocked' | 'offline';
|
||||||
|
trust: 'public' | 'internal' | 'confidential' | 'secret' | 'n/a';
|
||||||
|
description: string;
|
||||||
|
metrics?: Record<string, string | number | boolean>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TopologyEdge {
|
||||||
|
from: string;
|
||||||
|
to: string;
|
||||||
|
label: string;
|
||||||
|
status: 'active' | 'ready' | 'planned' | 'blocked';
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DashboardTopology {
|
||||||
|
product: string;
|
||||||
|
tagline: string;
|
||||||
|
mode: 'online' | 'offline' | 'hybrid-safe';
|
||||||
|
generatedAt: string;
|
||||||
|
summary: {
|
||||||
|
detectedClients: number;
|
||||||
|
localModels: number;
|
||||||
|
providersConfigured: number;
|
||||||
|
trustPolicies: number;
|
||||||
|
memoryBackends: number;
|
||||||
|
plannedModules: number;
|
||||||
|
};
|
||||||
|
nodes: TopologyNode[];
|
||||||
|
edges: TopologyEdge[];
|
||||||
|
trustLevels: Array<{ level: string; route: string; action: string }>;
|
||||||
|
receipts: Array<{ id: string; trust: string; route: string; protected: string; tokens: string }>;
|
||||||
|
roadmap: Array<{ module: string; status: string; priority: string }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getDashboardTopology(): Promise<DashboardTopology> {
|
||||||
|
let availableProviders: ReturnType<typeof getAvailableProviders> = [];
|
||||||
|
try {
|
||||||
|
availableProviders = getAvailableProviders();
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn({ err }, 'Provider discovery failed while building topology');
|
||||||
|
}
|
||||||
|
|
||||||
|
const configuredProviders = availableProviders.filter((provider) => {
|
||||||
|
if (!provider.enabled) return false;
|
||||||
|
if (provider.name === 'claude-bridge') {
|
||||||
|
return process.env['CLAUDE_BRIDGE_ENABLED'] === 'true' && Boolean(process.env['CLAUDE_BRIDGE_URL']);
|
||||||
|
}
|
||||||
|
if (['claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge', 'codex'].includes(provider.name)) {
|
||||||
|
return Boolean(process.env[provider.envKey]) || Boolean(process.env[`${provider.envKey.replace(/_KEY|_TOKEN/, '')}_URL`]);
|
||||||
|
}
|
||||||
|
return Boolean(process.env[provider.envKey]);
|
||||||
|
});
|
||||||
|
|
||||||
|
const providerNodes: TopologyNode[] = availableProviders.slice(0, 8).map((provider) => ({
|
||||||
|
id: `provider-${provider.name}`,
|
||||||
|
label: provider.name,
|
||||||
|
kind: provider.name.includes('bridge') || provider.name === 'codex' ? 'provider' : 'provider',
|
||||||
|
status: configuredProviders.some((p) => p.name === provider.name) ? 'ready' : 'detected',
|
||||||
|
trust: provider.name.includes('bridge') ? 'internal' : 'public',
|
||||||
|
description: `${provider.models.length} model routes, ${provider.rateLimitRpm} rpm`,
|
||||||
|
metrics: {
|
||||||
|
models: provider.models.length,
|
||||||
|
rpm: provider.rateLimitRpm,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
const nodes: TopologyNode[] = [
|
||||||
|
{
|
||||||
|
id: 'openai-api',
|
||||||
|
label: 'OpenAI-compatible API',
|
||||||
|
kind: 'client',
|
||||||
|
status: 'online',
|
||||||
|
trust: 'internal',
|
||||||
|
description: 'Existing gateway entrypoint for apps that can use a custom base URL.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'client-claude-code',
|
||||||
|
label: 'Claude Code',
|
||||||
|
kind: 'client',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'internal',
|
||||||
|
description: 'Optional MCP bridge for code work.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'client-codex',
|
||||||
|
label: 'Codex',
|
||||||
|
kind: 'client',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'internal',
|
||||||
|
description: 'Optional MCP helper and OpenAI-compatible client flow.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'client-chatgpt',
|
||||||
|
label: 'ChatGPT',
|
||||||
|
kind: 'client',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'public',
|
||||||
|
description: 'Export importer and optional browser/adapter flow.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'client-cursor',
|
||||||
|
label: 'Cursor / VS Code',
|
||||||
|
kind: 'client',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'internal',
|
||||||
|
description: 'Works through OpenAI-compatible base URL where supported.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'gateway',
|
||||||
|
label: 'LLM Gateway',
|
||||||
|
kind: 'gateway',
|
||||||
|
status: 'online',
|
||||||
|
trust: 'n/a',
|
||||||
|
description: 'Existing core: OpenAI-compatible API, routing, completions, metrics, fallback.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'trust-router',
|
||||||
|
label: 'Trust Router',
|
||||||
|
kind: 'policy',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'n/a',
|
||||||
|
description: 'Small policy layer for local-first routing and provider allowlists.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'context-receipts',
|
||||||
|
label: 'Context Receipts',
|
||||||
|
kind: 'policy',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'n/a',
|
||||||
|
description: 'Shows what context was used, compressed, redacted, and routed.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'memory-gitea',
|
||||||
|
label: 'Shared Git Memory',
|
||||||
|
kind: 'memory',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'confidential',
|
||||||
|
description: 'Optional Git/Gitea-backed memory for AI handoffs and project decisions.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'mcp-server',
|
||||||
|
label: 'MCP Control Plane',
|
||||||
|
kind: 'tool',
|
||||||
|
status: 'planned',
|
||||||
|
trust: 'internal',
|
||||||
|
description: 'Gateway, memory, repo, and policy tools exposed through MCP.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'ollama',
|
||||||
|
label: 'Ollama / Local Models',
|
||||||
|
kind: 'model',
|
||||||
|
status: 'ready',
|
||||||
|
trust: 'confidential',
|
||||||
|
description: 'Local-first model runtime for private and offline work.',
|
||||||
|
},
|
||||||
|
...providerNodes,
|
||||||
|
];
|
||||||
|
|
||||||
|
const edges: TopologyEdge[] = [
|
||||||
|
{ from: 'openai-api', to: 'gateway', label: 'OpenAI-compatible API', status: 'ready' },
|
||||||
|
{ from: 'client-claude-code', to: 'mcp-server', label: 'MCP tools/resources', status: 'planned' },
|
||||||
|
{ from: 'client-codex', to: 'mcp-server', label: 'MCP tools/resources', status: 'planned' },
|
||||||
|
{ from: 'client-chatgpt', to: 'gateway', label: 'export/import + OpenAI adapter', status: 'planned' },
|
||||||
|
{ from: 'client-cursor', to: 'gateway', label: 'custom base URL', status: 'planned' },
|
||||||
|
{ from: 'mcp-server', to: 'gateway', label: 'tool calls', status: 'planned' },
|
||||||
|
{ from: 'gateway', to: 'trust-router', label: 'policy decision', status: 'planned' },
|
||||||
|
{ from: 'trust-router', to: 'ollama', label: 'confidential/local route', status: 'ready' },
|
||||||
|
{ from: 'trust-router', to: 'memory-gitea', label: 'memory read/write', status: 'planned' },
|
||||||
|
{ from: 'gateway', to: 'context-receipts', label: 'audit artifact', status: 'planned' },
|
||||||
|
...providerNodes.map((node) => ({
|
||||||
|
from: 'trust-router',
|
||||||
|
to: node.id,
|
||||||
|
label: node.trust === 'public' ? 'public route' : 'approved route',
|
||||||
|
status: node.status === 'ready' ? 'ready' : 'planned',
|
||||||
|
} as TopologyEdge)),
|
||||||
|
];
|
||||||
|
|
||||||
|
const plannedModules = [
|
||||||
|
'Trust Router',
|
||||||
|
'Context Receipts',
|
||||||
|
'Shared Gitea Memory',
|
||||||
|
'AI Handoff Protocol',
|
||||||
|
'Consent Ledger',
|
||||||
|
'Setup Doctor',
|
||||||
|
'Safe Config Writer',
|
||||||
|
'Benchmark Lab',
|
||||||
|
'Agent Reputation',
|
||||||
|
'Compression Engine',
|
||||||
|
];
|
||||||
|
|
||||||
|
return {
|
||||||
|
product: 'llm.gateway',
|
||||||
|
tagline: 'OpenAI-compatible LLM Gateway with routing, savings, receipts, and optional shared memory',
|
||||||
|
mode: process.env['BLACKHOLE_OFFLINE_MODE'] === 'true' ? 'offline' : 'hybrid-safe',
|
||||||
|
generatedAt: new Date().toISOString(),
|
||||||
|
summary: {
|
||||||
|
detectedClients: 6,
|
||||||
|
localModels: 1,
|
||||||
|
providersConfigured: configuredProviders.length,
|
||||||
|
trustPolicies: 4,
|
||||||
|
memoryBackends: 1,
|
||||||
|
plannedModules: plannedModules.length,
|
||||||
|
},
|
||||||
|
nodes,
|
||||||
|
edges,
|
||||||
|
trustLevels: [
|
||||||
|
{ level: 'public', route: 'any enabled provider', action: 'allow' },
|
||||||
|
{ level: 'internal', route: 'local or approved provider', action: 'route with receipt' },
|
||||||
|
{ level: 'confidential', route: 'local-first', action: 'block public providers' },
|
||||||
|
{ level: 'secret', route: 'none', action: 'redact or block' },
|
||||||
|
],
|
||||||
|
receipts: [
|
||||||
|
{ id: 'ctxr-demo-001', trust: 'internal', route: 'Claude Code -> MCP -> local model', protected: '.env, tokens, private keys', tokens: '13.2k -> 4.2k' },
|
||||||
|
{ id: 'ctxr-demo-002', trust: 'confidential', route: 'Codex -> Gateway -> Ollama', protected: 'customer names, internal hosts', tokens: '8.4k -> 3.1k' },
|
||||||
|
{ id: 'ctxr-demo-003', trust: 'public', route: 'OpenAI-compatible app -> Gateway -> hosted model', protected: 'none detected', tokens: '2.0k -> 1.8k' },
|
||||||
|
],
|
||||||
|
roadmap: plannedModules.map((module, index) => ({
|
||||||
|
module,
|
||||||
|
status: index < 4 ? 'next' : 'planned',
|
||||||
|
priority: index < 7 ? 'P0' : 'P1',
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get dashboard summary stats for a time window
|
* Get dashboard summary stats for a time window
|
||||||
*/
|
*/
|
||||||
@ -306,6 +533,24 @@ async function getAlerts(): Promise<AlertData> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
||||||
|
fastify.get('/api/dashboard/topology', async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||||
|
try {
|
||||||
|
return reply.send({
|
||||||
|
success: true,
|
||||||
|
data: await getDashboardTopology(),
|
||||||
|
meta: {
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
logger.error({ error }, 'Failed to fetch topology');
|
||||||
|
return reply.status(500).send({
|
||||||
|
success: false,
|
||||||
|
error: 'Failed to fetch topology',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Dashboard summary endpoint
|
// Dashboard summary endpoint
|
||||||
fastify.get('/api/dashboard/summary', async (request: FastifyRequest, reply: FastifyReply) => {
|
fastify.get('/api/dashboard/summary', async (request: FastifyRequest, reply: FastifyReply) => {
|
||||||
const hours = (request.query as any).hours ?? 24;
|
const hours = (request.query as any).hours ?? 24;
|
||||||
@ -618,8 +863,7 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Dashboard UI endpoint (served at /api/dashboard/index for Cloudflare tunnel compatibility)
|
async function serveDashboardHtml(reply: FastifyReply, filename = 'dashboard.html', endpoint = '/dashboard') {
|
||||||
fastify.get('/api/dashboard/index', async (_request: FastifyRequest, reply: FastifyReply) => {
|
|
||||||
try {
|
try {
|
||||||
const { fileURLToPath } = await import('url');
|
const { fileURLToPath } = await import('url');
|
||||||
const { dirname, join } = await import('path');
|
const { dirname, join } = await import('path');
|
||||||
@ -628,84 +872,52 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
const publicDir = join(__dirname, '..', '..', 'public');
|
const publicDir = join(__dirname, '..', '..', 'public');
|
||||||
const dashboardPath = join(publicDir, 'dashboard.html');
|
const dashboardPath = join(publicDir, filename);
|
||||||
|
|
||||||
if (!existsSync(dashboardPath)) {
|
if (!existsSync(dashboardPath)) {
|
||||||
logger.warn({ path: dashboardPath }, 'dashboard.html not found');
|
logger.warn({ path: dashboardPath, endpoint }, 'dashboard html not found');
|
||||||
return reply.status(404).send({ error: 'dashboard.html not found' });
|
return reply.status(404).send({ error: `${filename} not found` });
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = readFileSync(dashboardPath, 'utf-8');
|
const content = readFileSync(dashboardPath, 'utf-8');
|
||||||
logger.info({ size: content.length }, 'Serving dashboard from /api/dashboard/ui');
|
logger.info({ size: content.length, endpoint, filename }, 'Serving dashboard html');
|
||||||
return reply.type('text/html').send(content);
|
|
||||||
} catch (error) {
|
|
||||||
logger.error({ error }, 'Failed to serve dashboard UI');
|
|
||||||
return reply.status(500).send({ error: 'Failed to serve dashboard' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Fresh dashboard endpoint (no cache) - for Cloudflare cache bypass testing
|
|
||||||
fastify.get('/dashboard', async (_request: FastifyRequest, reply: FastifyReply) => {
|
|
||||||
try {
|
|
||||||
const { fileURLToPath } = await import('url');
|
|
||||||
const { dirname, join } = await import('path');
|
|
||||||
const { readFileSync, existsSync } = await import('fs');
|
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
|
||||||
const __dirname = dirname(__filename);
|
|
||||||
const publicDir = join(__dirname, '..', '..', 'public');
|
|
||||||
const dashboardPath = join(publicDir, 'dashboard.html');
|
|
||||||
|
|
||||||
if (!existsSync(dashboardPath)) {
|
|
||||||
logger.warn({ path: dashboardPath }, 'dashboard.html not found');
|
|
||||||
return reply.status(404).send({ error: 'dashboard.html not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
const content = readFileSync(dashboardPath, 'utf-8');
|
|
||||||
logger.info({ size: content.length }, 'Serving dashboard from /dashboard');
|
|
||||||
return reply
|
return reply
|
||||||
.header('Cache-Control', 'no-cache, no-store, must-revalidate, max-age=0')
|
.header('Cache-Control', 'no-cache, no-store, must-revalidate, max-age=0')
|
||||||
.header('Pragma', 'no-cache')
|
.header('Pragma', 'no-cache')
|
||||||
.header('Expires', '0')
|
.header('Expires', '0')
|
||||||
.type('text/html')
|
.type('text/html; charset=utf-8')
|
||||||
.send(content);
|
.send(content);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error({ error }, 'Failed to serve dashboard');
|
logger.error({ error, endpoint, filename }, 'Failed to serve dashboard html');
|
||||||
return reply.status(500).send({ error: 'Failed to serve dashboard' });
|
return reply.status(500).send({ error: 'Failed to serve dashboard' });
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dashboard UI endpoint (served at /api/dashboard/index for Cloudflare tunnel compatibility)
|
||||||
|
fastify.get('/api/dashboard/index', async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||||
|
return serveDashboardHtml(reply, 'dashboard.html', '/api/dashboard/index');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fresh dashboard endpoint (no cache) - keeps the original Version 1 dashboard online.
|
||||||
|
fastify.get('/dashboard', async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||||
|
return serveDashboardHtml(reply, 'dashboard.html', '/dashboard');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Version 2 dashboard preview - open-source workbench without replacing Version 1.
|
||||||
|
fastify.get('/v2/dashboard', async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||||
|
return serveDashboardHtml(reply, 'dashboard-v2.html', '/v2/dashboard');
|
||||||
|
});
|
||||||
|
|
||||||
|
fastify.get('/dashboard-v2', async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||||
|
return serveDashboardHtml(reply, 'dashboard-v2.html', '/dashboard-v2');
|
||||||
});
|
});
|
||||||
|
|
||||||
// Cloudflare cache bypass endpoint - new URL that won't be cached by Cloudflare
|
// Cloudflare cache bypass endpoint - new URL that won't be cached by Cloudflare
|
||||||
fastify.get('/api/dashboard/ui', async (_request: FastifyRequest, reply: FastifyReply) => {
|
fastify.get('/api/dashboard/ui', async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||||
try {
|
return serveDashboardHtml(reply, 'dashboard.html', '/api/dashboard/ui');
|
||||||
const { fileURLToPath } = await import('url');
|
});
|
||||||
const { dirname, join } = await import('path');
|
|
||||||
const { readFileSync, existsSync } = await import('fs');
|
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
fastify.get('/api/dashboard/v2', async (_request: FastifyRequest, reply: FastifyReply) => {
|
||||||
const __dirname = dirname(__filename);
|
return serveDashboardHtml(reply, 'dashboard-v2.html', '/api/dashboard/v2');
|
||||||
const publicDir = join(__dirname, '..', '..', 'public');
|
|
||||||
const dashboardPath = join(publicDir, 'dashboard.html');
|
|
||||||
|
|
||||||
if (!existsSync(dashboardPath)) {
|
|
||||||
logger.warn({ path: dashboardPath }, 'dashboard.html not found at /api/dashboard/ui');
|
|
||||||
return reply.status(404).send({ error: 'dashboard.html not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
const content = readFileSync(dashboardPath, 'utf-8');
|
|
||||||
const timestamp = Date.now();
|
|
||||||
logger.info({ size: content.length, endpoint: '/api/dashboard/ui', timestamp }, 'Serving dashboard UI (Cloudflare cache bypass)');
|
|
||||||
return reply
|
|
||||||
.header('Cache-Control', 'no-cache, no-store, must-revalidate, max-age=0, public')
|
|
||||||
.header('Pragma', 'no-cache')
|
|
||||||
.header('Expires', '0')
|
|
||||||
.header('ETag', `"ui-${timestamp}"`)
|
|
||||||
.header('X-Cache-Bypass', 'true')
|
|
||||||
.type('text/html; charset=utf-8')
|
|
||||||
.send(content);
|
|
||||||
} catch (error) {
|
|
||||||
logger.error({ error }, 'Failed to serve dashboard UI');
|
|
||||||
return reply.status(500).send({ error: 'Failed to serve dashboard UI' });
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@ -78,8 +78,9 @@ export async function healthRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
// Check if this is a dashboard UI request with ?ui=1 or ?dashboard=1
|
// Check if this is a dashboard UI request with ?ui=1 or ?dashboard=1
|
||||||
const query = request.query as any;
|
const query = request.query as any;
|
||||||
const isDashboardRequest = query.ui || query.dashboard;
|
const isDashboardRequest = query.ui || query.dashboard;
|
||||||
|
const acceptsHtml = String(request.headers.accept ?? '').includes('text/html');
|
||||||
|
|
||||||
if (isDashboardRequest) {
|
if (isDashboardRequest || acceptsHtml) {
|
||||||
try {
|
try {
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
|
|||||||
@ -126,10 +126,10 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
|
|||||||
*/
|
*/
|
||||||
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
|
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
|
||||||
server.addHook('onSend', async (request, reply) => {
|
server.addHook('onSend', async (request, reply) => {
|
||||||
// Content Security Policy - strict, no inline scripts
|
// Content Security Policy for the self-contained dashboard UI.
|
||||||
reply.header(
|
reply.header(
|
||||||
'Content-Security-Policy',
|
'Content-Security-Policy',
|
||||||
"default-src 'self'; script-src 'self'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
|
"default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
|
||||||
);
|
);
|
||||||
|
|
||||||
// Prevent clickjacking
|
// Prevent clickjacking
|
||||||
|
|||||||
@ -77,6 +77,7 @@ async function buildServer() {
|
|||||||
directives: {
|
directives: {
|
||||||
defaultSrc: ["'self'"],
|
defaultSrc: ["'self'"],
|
||||||
scriptSrc: ["'self'", "'unsafe-inline'"],
|
scriptSrc: ["'self'", "'unsafe-inline'"],
|
||||||
|
styleSrc: ["'self'", "'unsafe-inline'"],
|
||||||
objectSrc: ["'none'"],
|
objectSrc: ["'none'"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user