@@ -1883,6 +2140,9 @@
el.innerHTML = clients.map(client => {
const lastSeen = client.lastSeen ? new Date(client.lastSeen).toLocaleString() : 'never';
const callerList = client.callers?.length ? client.callers.join(', ') : 'no caller id seen';
+ const bridgeState = client.bridgeProvider
+ ? `${client.bridgeProvider}: ${client.bridgeStatus || 'not configured'}${client.bridgeDetail ? ` (${client.bridgeDetail})` : ''}`
+ : 'bridge: OpenAI-compatible / manual client config';
return `
@@ -2137,6 +2398,60 @@
`;
}
+ // ─── Full Discovery: CLIs + Local LLMs + API Keys ────────────────────
+ document.getElementById('discoverFullBtn')?.addEventListener('click', async () => {
+ const btn = document.getElementById('discoverFullBtn');
+ const wrap = document.getElementById('discoverReportWrap');
+ const meta = document.getElementById('discoverReportMeta');
+ btn.disabled = true;
+ const orig = btn.textContent;
+ btn.textContent = '⏳ scanning…';
+ try {
+ const res = await apiFetch(`${API_BASE}/api/dashboard/discover`, { method: 'POST' });
+ const payload = await res.json();
+ if (!payload.success) throw new Error(payload.error || 'discovery failed');
+ const r = payload.data.report;
+ const spawnedCount = payload.data.spawnedCount;
+
+ wrap.style.display = 'block';
+ meta.textContent = `host: ${r.host} · scanned: ${new Date(r.generatedAt).toLocaleTimeString()} · ${spawnedCount} bridges spawned · ${r.summary.totalProviders} total providers, ${r.summary.totalRoutableModels} models`;
+
+ // CLI subscriptions
+ document.getElementById('discCntSubs').textContent = r.subscriptions.detected;
+ document.getElementById('discListSubs').innerHTML = r.subscriptions.items.map(s => `
+
+ ${s.descriptor.label}
+ ${s.installed ? (s.authenticated === true ? '✓ auth' : (s.authenticated === false ? '⚠ unauth' : '?')) : '—'}
+
+ `).join('');
+
+ // Local LLM servers
+ document.getElementById('discCntLocal').textContent = r.localLLMs.detected;
+ document.getElementById('discListLocal').innerHTML = r.localLLMs.items.map(l => `
+
+ ${l.label}
${l.url}
+ ${l.detected ? `✓ ${l.models.length} models · ${l.latencyMs}ms` : '— offline'}
+
+ `).join('');
+
+ // API-key providers
+ document.getElementById('discCntKeys').textContent = r.apiKeys.configured;
+ document.getElementById('discListKeys').innerHTML = r.apiKeys.items.map(k => `
+
+ ${k.label}
${k.envKey}
+ ${k.configured ? '✓ set' : '— missing'}
+
+ `).join('');
+
+ btn.textContent = `✓ found ${r.summary.totalProviders}`;
+ await loadSubscriptions();
+ } catch (e) {
+ btn.textContent = `✗ ${e.message}`;
+ } finally {
+ setTimeout(() => { btn.disabled = false; btn.textContent = orig; }, 3000);
+ }
+ });
+
document.getElementById('subsSpawnBtn').addEventListener('click', async () => {
const btn = document.getElementById('subsSpawnBtn');
btn.disabled = true;
@@ -2235,7 +2550,7 @@
document.getElementById('routingModeBadge').textContent = s.routingMode;
// UI mode toggles
- const ui = s.ui ?? { simpleMode: true, hideEmptyProviders: true, showTooltips: true };
+ const ui = s.ui ?? { simpleMode: false, hideEmptyProviders: true, showTooltips: true };
document.getElementById('uiSimpleMode').checked = !!ui.simpleMode;
document.getElementById('uiHideEmpty').checked = !!ui.hideEmptyProviders;
document.getElementById('uiTooltips').checked = !!ui.showTooltips;
@@ -2394,6 +2709,11 @@
document.getElementById('cacheEntries').textContent = formatNumber(s.uniqueEntries);
document.getElementById('tokensPrevented').textContent = formatNumber(s.totalTokensSaved);
document.getElementById('cacheHitRate').innerHTML = s.hitRatePercent.toFixed(1) + '
%';
+ const sr = s.sinceRestart || {};
+ document.getElementById('compressedSinceRestart').textContent = formatNumber(sr.tokensSaved || 0);
+ const sinceLabel = sr.sinceISO ? new Date(sr.sinceISO).toLocaleString() : '—';
+ const pctTxt = (sr.savingsPct || 0).toFixed(1) + '%';
+ document.getElementById('compressedSinceRestartMeta').textContent = pctTxt + ' · ' + (sr.operations || 0) + ' ops · since ' + sinceLabel;
// Tab badge
document.getElementById('savingsTabBadge').textContent = s.totalHits > 0 ? formatCost(s.totalCostSaved) : '·';
@@ -2618,9 +2938,9 @@
`;
}
- // Try to fetch Lean-CTX stats from localhost:3333 (browser-side, not server-side)
- // Returns null if Lean-CTX not running OR dashboard browsed from different machine.
- async function fetchLeanCtxStats() {
+ // Try to fetch external tool stats from localhost:3333 (legacy compat) (browser-side, not server-side)
+ // Returns null if no external tool runs there.
+ async function fetchExternalToolStats() {
try {
const ctrl = new AbortController();
setTimeout(() => ctrl.abort(), 1500);
@@ -2645,20 +2965,20 @@
document.getElementById('heroCacheHits').textContent = s.totalHits;
document.getElementById('heroSavingsRate').textContent = `${s.hitRatePercent || 0}%`;
- // Lean-CTX integration: pull from localhost:3333 if available
- const leanCtx = await fetchLeanCtxStats();
- const combined = gatewayTokens + (leanCtx?.saved || 0);
+ // Optional external-tool integration: pull from localhost:3333 if running
+ const externalTool = await fetchExternalToolStats();
+ const combined = gatewayTokens + (externalTool?.saved || 0);
document.getElementById('heroTokensSavedCombined').textContent = formatNumber(combined);
- if (leanCtx) {
- document.getElementById('heroLeanCtxRow').style.display = 'flex';
- document.getElementById('heroLeanCtxTokens').textContent = formatNumber(leanCtx.saved);
+ if (externalTool) {
+ document.getElementById('heroExternalToolRow').style.display = 'flex';
+ document.getElementById('heroExternalToolTokens').textContent = formatNumber(externalTool.saved);
} else {
- document.getElementById('heroLeanCtxRow').style.display = 'none';
+ document.getElementById('heroExternalToolRow').style.display = 'none';
}
document.getElementById('costWithout').textContent = formatCost(c.costWithoutGateway || 0);
document.getElementById('costWith').textContent = formatCost(c.costWithGateway || 0);
const saved = (c.costWithoutGateway || 0) - (c.costWithGateway || 0);
- document.getElementById('costSavedLine').textContent = formatCost(saved);
+ document.getElementById('costSavedLine').textContent = (saved < 0 ? '-$' : '$') + Math.abs(saved).toFixed(2);
document.getElementById('costSavedPercent').textContent = `${(c.effectiveSavingsPercent || 0).toFixed(1)}%`;
// 5-axis savings
@@ -3070,9 +3390,98 @@
if (target === 'memory') loadMemoryGraph();
if (target === 'leaderboard') loadLeaderboard();
if (target === 'share') refreshShareCard();
+ if (target === 'api') refreshApiBridgeStatus();
});
});
+ // ─── API Tab — copy buttons, try-it-out, bridge status ────────────────
+ function copyToClipboard(text) {
+ if (navigator.clipboard?.writeText) return navigator.clipboard.writeText(text);
+ const ta = document.createElement('textarea');
+ ta.value = text; document.body.appendChild(ta); ta.select();
+ document.execCommand('copy'); document.body.removeChild(ta);
+ return Promise.resolve();
+ }
+ document.querySelectorAll('.api-copy').forEach(btn => {
+ btn.addEventListener('click', async () => {
+ const targetId = btn.dataset.target;
+ const snippet = document.getElementById(targetId)?.innerText || '';
+ await copyToClipboard(snippet);
+ const orig = btn.textContent;
+ btn.textContent = 'copied ✓';
+ setTimeout(() => { btn.textContent = orig; }, 1400);
+ });
+ });
+
+ document.getElementById('apiTryRun')?.addEventListener('click', async () => {
+ const endpoint = document.getElementById('apiTryEndpoint').value;
+ const model = document.getElementById('apiTryModel').value || 'claude-sonnet-4.6';
+ const prompt = document.getElementById('apiTryPrompt').value || '';
+ const status = document.getElementById('apiTryStatus');
+ const meta = document.getElementById('apiTryMeta');
+ const wrap = document.getElementById('apiTryResultWrap');
+ const out = document.getElementById('apiTryResult');
+ if (!prompt.trim()) { status.textContent = 'add a prompt first'; return; }
+
+ let body;
+ if (endpoint === '/v1/completion') {
+ body = { caller: 'dashboard-tryout', task_type: 'generic_qa', input: prompt, options: { compression: { enabled: true, mode: 'auto' } } };
+ } else if (endpoint === '/v1/chat/completions') {
+ body = { model, messages: [{ role: 'user', content: prompt }] };
+ } else {
+ body = { model, messages: [{ role: 'user', content: prompt }], max_tokens: 1024 };
+ }
+
+ status.textContent = 'sending…';
+ const t0 = performance.now();
+ try {
+ const res = await fetch((API_BASE || location.origin) + endpoint, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(body),
+ });
+ const dtMs = Math.round(performance.now() - t0);
+ const json = await res.json().catch(() => ({}));
+ status.textContent = `${res.status} ${res.statusText} · ${dtMs} ms`;
+ const c = json?.compression || (json?.metadata?.compression) || null;
+ if (c) {
+ meta.textContent = `compression: applied=${c.applied} · method=${c.method} · before=${c.tokens_before} after=${c.tokens_after} saved=${c.tokens_saved}`;
+ } else {
+ meta.textContent = 'no compression metadata in response';
+ }
+ out.textContent = JSON.stringify(json, null, 2);
+ wrap.style.display = 'block';
+ } catch (err) {
+ status.textContent = 'error: ' + (err.message || err);
+ }
+ });
+
+ async function refreshApiBridgeStatus() {
+ try {
+ const res = await fetch((API_BASE || location.origin) + '/api/dashboard/providers');
+ if (!res.ok) return;
+ const json = await res.json();
+ const allProviders = [
+ ...((json?.data?.grouped?.subscription) || []),
+ ...((json?.data?.grouped?.local) || []),
+ ];
+ document.querySelectorAll('.api-bridge-status').forEach(cell => {
+ const name = cell.dataset.bridge;
+ const p = allProviders.find(x => x.name === name);
+ if (!p) { cell.textContent = 'unknown'; cell.classList.add('err'); return; }
+ if (p.enabled && p.status === 'configured') {
+ cell.textContent = '✓ online';
+ cell.classList.add('ok');
+ } else {
+ cell.textContent = p.status || 'disabled';
+ cell.classList.add('err');
+ }
+ });
+ } catch {
+ /* silent */
+ }
+ }
+
// ─── Init ────────────────────────────────────────────────────────────
async function init() {
await checkHealth();
@@ -3090,7 +3499,7 @@
if (payload.success) {
document.getElementById('routingModeBadge').textContent = payload.data.routingMode;
// Apply UI mode (Simple Mode etc.) immediately on load
- applyUiMode(payload.data.ui ?? { simpleMode: true, hideEmptyProviders: true, showTooltips: true });
+ applyUiMode(payload.data.ui ?? { simpleMode: false, hideEmptyProviders: true, showTooltips: true });
}
} catch (e) { /* non-fatal */ }
diff --git a/packages/gateway/src/config/models.yaml b/packages/gateway/src/config/models.yaml
index 4a3e46c..7f028e3 100644
--- a/packages/gateway/src/config/models.yaml
+++ b/packages/gateway/src/config/models.yaml
@@ -1,7 +1,7 @@
# LLM Gateway Model Configuration
# Ollama base URL: http://192.168.178.169:11434
-ollama_base_url: "https://ollama.fichtmueller.org"
+ollama_base_url: "http://127.0.0.1:11434"
tiers:
fast:
@@ -26,7 +26,7 @@ models:
qwen2.5:3b:
tier: fast
context_length: 32768
- strengths: [classification, short_text, routing]
+ strengths: [classification, summarization, routing]
max_tokens_default: 512
qwen2.5:7b:
@@ -35,83 +35,58 @@ models:
strengths: [classification, summarization, short_analysis]
max_tokens_default: 1024
- phi3.5:3.8b:
+ qwen2.5:7b-instruct:
tier: fast
- context_length: 128000
- strengths: [classification, summarization]
+ context_length: 32768
+ strengths: [classification, summarization, short_analysis]
+ max_tokens_default: 1024
+
+ qwen2.5-coder:7b-instruct:
+ tier: fast
+ context_length: 32768
+ strengths: [code_generation, technical_analysis, routing]
max_tokens_default: 512
# ─── MAGATAMA — Fine-tuned Security Intelligence (Context X) ─────────────────
magatama:32b:
tier: large
context_length: 131072
- strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting]
+ strengths: [security_analysis, threat_intelligence, compliance, bgp_security, incident_response, nis2, ciso_reporting, complex_writing, deep_analysis, technical]
max_tokens_default: 4096
description: "MAGATAMA まがたま — TEPPEKI 7-pillar security AI, fine-tuned on Qwen2.5-32B"
- # Custom fine-tuned models (Context X)
- ctxhealer:latest:
- tier: medium
- context_length: 32768
- strengths: [infrastructure_diagnosis, root_cause_analysis, remediation_steps]
- max_tokens_default: 1024
-
- llama-guard3:1b:
- tier: fast
- context_length: 8192
- strengths: [safety_classification, threat_detection]
- max_tokens_default: 256
-
# Medium tier
qwen2.5:14b:
tier: medium
context_length: 131072
- strengths: [general, writing, analysis, coding]
+ strengths: [general, writing, analysis, coding, dialogue]
max_tokens_default: 2048
- mistral:7b:
+ magatama-llm-v2-0:latest:
tier: medium
- context_length: 32768
- strengths: [general, writing]
+ context_length: 131072
+ strengths: [general, writing, analysis, coding, dialogue]
max_tokens_default: 2048
- llama3.2:8b:
- tier: medium
- context_length: 128000
- strengths: [general, chat, analysis]
- max_tokens_default: 2048
-
- deepseek-r1:8b:
+ magatama-coder:latest:
tier: medium
context_length: 65536
- strengths: [reasoning, analysis, coding]
+ strengths: [code_generation, technical_analysis, debugging]
max_tokens_default: 2048
# Large tier
qwen2.5:32b:
tier: large
context_length: 131072
- strengths: [complex_writing, deep_analysis, technical]
- max_tokens_default: 4096
-
- llama3.3:70b:
- tier: large
- context_length: 128000
- strengths: [complex_reasoning, long_form, research]
- max_tokens_default: 4096
-
- deepseek-r1:32b:
- tier: large
- context_length: 131072
- strengths: [chain_of_thought, complex_reasoning]
+ strengths: [complex_writing, deep_analysis, technical, security_analysis]
max_tokens_default: 4096
# Fallback chains per tier
fallback_chains:
- fast: [qwen2.5:3b, qwen2.5:7b, phi3.5:3.8b]
- medium: [qwen2.5:14b, mistral:7b, llama3.2:8b]
- large: [qwen2.5:32b, llama3.3:70b, deepseek-r1:32b]
- code_generation: [deepseek-r1:32b, qwen2.5:32b, llama3.3:70b]
+ fast: [qwen2.5:7b-instruct, qwen2.5-coder:7b-instruct]
+ medium: [magatama-llm-v2-0:latest, magatama-coder:latest, qwen2.5:7b-instruct]
+ large: [magatama:32b, magatama-llm-v2-0:latest]
+ code_generation: [magatama-coder:latest, qwen2.5-coder:7b-instruct]
# Cross-tier fallback when primary tier fails
tier_fallback:
diff --git a/packages/gateway/src/config/routing-rules.yaml b/packages/gateway/src/config/routing-rules.yaml
index c35cca2..aa6d6b6 100644
--- a/packages/gateway/src/config/routing-rules.yaml
+++ b/packages/gateway/src/config/routing-rules.yaml
@@ -1110,7 +1110,7 @@ routing_rules:
# ─── CONTENT / LINKEDIN ──────────────────────────────────────────────────────
linkedin_post:
- model: qwen2.5:32b
+ model: fo-blog-v10
tier: large
prompt_template: linkedin_post
temperature: 0.7
@@ -1118,7 +1118,7 @@ routing_rules:
output_format: text
requires_fact_check: false
validators: [banlist, language, length, question_closer]
- callers: [n8n, internal]
+ callers: [n8n, internal, linkedin-distributor]
linkedin_comment:
model: qwen2.5:14b
diff --git a/packages/gateway/src/db/migrations/002-tokenvault-cost-tracking.sql b/packages/gateway/src/db/migrations/002-tokenvault-cost-tracking.sql
index b3d85b7..c8ba81c 100644
--- a/packages/gateway/src/db/migrations/002-tokenvault-cost-tracking.sql
+++ b/packages/gateway/src/db/migrations/002-tokenvault-cost-tracking.sql
@@ -3,7 +3,7 @@
-- Purpose: Track token compression and cost analytics
-- PostgreSQL compatible version (version 16+)
--- Table: Token compression metrics (LeanCTX, RTK)
+-- Table: Token compression metrics (LLM Gateway)
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
id SERIAL PRIMARY KEY,
file_path VARCHAR(255),
diff --git a/packages/gateway/src/db/schema-extensions.sql b/packages/gateway/src/db/schema-extensions.sql
index 4efdba7..714a203 100644
--- a/packages/gateway/src/db/schema-extensions.sql
+++ b/packages/gateway/src/db/schema-extensions.sql
@@ -1,12 +1,12 @@
-- Tokenvault & Cost Tracking Schema Extensions
-- Created: 2026-04-19
--- Purpose: Track token compression (LeanCTX + RTK) and cost analytics
+-- Purpose: Track token compression (LLM Gateway) and cost analytics
--- Table: Token compression metrics (LeanCTX, RTK)
+-- Table: Token compression metrics (LLM Gateway)
CREATE TABLE IF NOT EXISTS tokenvault_metrics (
id SERIAL PRIMARY KEY,
file_path VARCHAR(255),
- mode VARCHAR(50), -- 'lean-aggressive', 'lean-map', 'rtk-max', etc.
+ mode VARCHAR(50), -- 'gateway-aggressive', 'gateway-map', 'gateway-trim', etc.
tokens_before INT,
tokens_after INT,
savings_pct DECIMAL(5,2),
@@ -26,7 +26,7 @@ CREATE TABLE IF NOT EXISTS cost_analytics (
agent_id VARCHAR(50), -- 'claude-code', 'qwen-reviewer', etc.
tokens_in INT,
tokens_out INT,
- tokens_compressed INT, -- After LeanCTX + RTK
+ tokens_compressed INT, -- After LLM Gateway compression
cost_usd DECIMAL(10,6),
cost_saved_usd DECIMAL(10,6),
provider VARCHAR(50), -- 'ollama', 'cerebras', 'groq', 'claude', etc.
diff --git a/packages/gateway/src/observability/cost-calculator.ts b/packages/gateway/src/observability/cost-calculator.ts
index 4ca992b..59df50a 100644
--- a/packages/gateway/src/observability/cost-calculator.ts
+++ b/packages/gateway/src/observability/cost-calculator.ts
@@ -101,7 +101,7 @@ export function calculateCost(
/**
* Calculate cost savings from compression
* @param model Model identifier
- * @param tokensBeforeCompression Tokens before LeanCTX + RTK
+ * @param tokensBeforeCompression Tokens before LLM Gateway compression
* @param tokensAfterCompression Tokens after compression
* @returns Savings in USD
*/
diff --git a/packages/gateway/src/pipeline/external-providers.ts b/packages/gateway/src/pipeline/external-providers.ts
index 305cdae..aaf3643 100644
--- a/packages/gateway/src/pipeline/external-providers.ts
+++ b/packages/gateway/src/pipeline/external-providers.ts
@@ -47,7 +47,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
enabled: true,
models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
- { id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
+ { id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
],
},
@@ -174,7 +174,7 @@ const PROVIDERS: readonly ExternalProvider[] = [
enabled: true,
models: [
{ id: 'claude-opus-4-1', tier: 'reasoning', contextLength: 200000 },
- { id: 'claude-sonnet-4-1', tier: 'large', contextLength: 200000 },
+ { id: 'claude-sonnet-4-6', tier: 'large', contextLength: 200000 },
{ id: 'claude-haiku-3', tier: 'fast', contextLength: 200000 },
],
},
diff --git a/packages/gateway/src/pipeline/request-scorer.ts b/packages/gateway/src/pipeline/request-scorer.ts
index 6f81d25..7998c10 100644
--- a/packages/gateway/src/pipeline/request-scorer.ts
+++ b/packages/gateway/src/pipeline/request-scorer.ts
@@ -728,6 +728,36 @@ function handleFormalLogicOverride(
return result;
}
+// ── Helper: Code Generation Intent Override ───────────────────────────────
+
+const CODE_GENERATION_PATTERNS = [
+ /\bwrite\s+(?:a\s+)?(?:typescript|javascript|python|go|rust|react|next\.js|node)?\s*(?:function|class|script|module|component|test|handler|middleware)\b/i,
+ /\b(?:implement|create|build|generate|scaffold)\b[\s\S]{0,160}\b(?:api|endpoint|function|class|component|service|schema|migration|crud|jwt|test|project|module)\b/i,
+ /\b(?:rest|graphql)\s+api\b[\s\S]{0,160}\b(?:implement|create|build|endpoint|authentication|jwt)\b/i,
+];
+
+function handleCodeGenerationOverride(
+ fullText: string,
+ input: ScorerInput,
+ userMessages: readonly WeightedMessage[],
+): ScoringResult | null {
+ if (!CODE_GENERATION_PATTERNS.some((pattern) => pattern.test(fullText))) {
+ return null;
+ }
+
+ const dimensions = computeAllDimensions(input, userMessages, fullText);
+ const result: ScoringResult = {
+ tier: 'code_generation',
+ score: 0.62,
+ confidence: 0.86,
+ reason: 'code generation intent detected',
+ dimensions,
+ };
+ recordSessionTier('code_generation');
+ logger.debug({ tier: 'code_generation', reason: 'code_generation_override' }, 'Request scored via code generation override');
+ return result;
+}
+
// ── Helper: Apply Score Overrides ──────────────────────────────────────────
interface ScoreOverridesInput {
@@ -754,6 +784,7 @@ function applyScoreOverrides(
const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
if (codeGenDim && codeGenDim.rawScore > 0.25) {
tier = 'code_generation';
+ confidence = Math.max(confidence, 0.78);
reason = 'code generation keywords detected';
}
@@ -771,7 +802,7 @@ function applyScoreOverrides(
}
// Ambiguity check
- if (confidence < 0.45) {
+ if (confidence < 0.45 && tier !== 'code_generation' && tier !== 'reasoning') {
tier = 'medium';
reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
}
@@ -795,6 +826,9 @@ export function scoreRequest(
const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages);
if (formalLogicResult) return formalLogicResult;
+ const codeGenerationResult = handleCodeGenerationOverride(fullText, input, userMessages);
+ if (codeGenerationResult) return codeGenerationResult;
+
const dimensions = computeAllDimensions(input, userMessages, fullText);
let rawScore = 0;
for (const dim of dimensions) {
diff --git a/packages/gateway/src/pipeline/router.ts b/packages/gateway/src/pipeline/router.ts
index 6b03d6e..0f76eee 100644
--- a/packages/gateway/src/pipeline/router.ts
+++ b/packages/gateway/src/pipeline/router.ts
@@ -184,14 +184,14 @@ export function getOllamaBaseUrl(): string {
/**
* Maps a scorer tier to the best primary model and its fallback chain.
* The 'reasoning' tier uses llama3.3:70b (complex_reasoning strength) from the large tier.
- * The 'code_generation' tier uses OpenAI Codex (gpt-4-turbo) as primary via external provider.
+ * The 'code_generation' tier uses OpenAI Codex as primary via external provider.
*/
const TIER_MODEL_MAP: Record
= {
fast: { primary: 'qwen2.5:3b', configTier: 'fast' },
medium: { primary: 'qwen2.5:14b', configTier: 'medium' },
large: { primary: 'qwen2.5:32b', configTier: 'large' },
reasoning: { primary: 'llama3.3:70b', configTier: 'large' },
- code_generation: { primary: 'gpt-4-turbo', configTier: 'large', provider: 'openai-codex' },
+ code_generation: { primary: 'gpt-5.1-codex-mini', configTier: 'large', provider: 'openai-codex' },
};
function buildMediumTierFallback(
@@ -223,7 +223,8 @@ function buildScoredFallbackChain(
models: ModelsYaml,
): string[] {
if (tier === 'reasoning' || tier === 'code_generation') {
- return [selectedModel, ...buildFallbackChain(selectedModel, configTier, models).filter((m) => m !== selectedModel)];
+ const fallbackTier = tier === 'code_generation' ? 'code_generation' : configTier;
+ return [selectedModel, ...buildFallbackChain(selectedModel, fallbackTier, models).filter((m) => m !== selectedModel)];
}
return buildFallbackChain(selectedModel, configTier, models);
}
@@ -302,7 +303,7 @@ export function routeByScore(
const mapping = TIER_MODEL_MAP[scoringResult.tier];
const selectedModel = mapping.primary;
const configTier = mapping.configTier;
- const tierConfig = models.tiers[configTier];
+ const tierConfig = models.tiers[scoringResult.tier] ?? models.tiers[configTier];
if (!tierConfig) {
logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium');
diff --git a/packages/gateway/src/routes/completion.ts b/packages/gateway/src/routes/completion.ts
index f52a966..ad1d2ec 100644
--- a/packages/gateway/src/routes/completion.ts
+++ b/packages/gateway/src/routes/completion.ts
@@ -127,6 +127,27 @@ function shouldBypassResponseCache(caller: string): boolean {
|| normalized.includes('copilot');
}
+function inputForPromptGuard(input: string): string {
+ const cleaned = input.replace(/^(user|assistant|system|developer):\s*/gim, '').trim();
+ return cleaned || input;
+}
+
+function shouldRunPromptGuard(input: string, scan: InjectionScanResult): boolean {
+ if (scan.matches.length > 0) return true;
+
+ const cleaned = inputForPromptGuard(input).normalize('NFKC');
+ return [
+ /\b(?:ignore|disregard|forget|override|bypass|jailbreak)\b[\s\S]{0,120}\b(?:instructions?|rules?|prompt|policy|safety)\b/i,
+ /\b(?:you\s+are\s+now|act\s+as|pretend\s+to\s+be|developer\s+mode|root\s+administrator|runtime\s+controller|security\s+auditor)\b/i,
+ /\b(?:show|print|dump|reveal|output)\b[\s\S]{0,160}\b(?:system\s+prompt|developer\s+prompt|hidden|runtime|memory|tools?|filters?|policy|classifier|chain-of-thought|reasoning)\b/i,
+ /\b(?:passwords?|passw(?:o|ö)rter|credentials?|api\s*keys?|tokens?|secrets?)\b[\s\S]{0,160}\b(?:print|show|write|paste|send|share|reveal|chat|anmelden|log\s*in)\b/i,
+ /\b(?:base64|rot13|hex\s+encoded|decode|execute|run\s+this)\b/i,
+ /[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/,
+ /\b[A-Za-z0-9+/]{40,}={0,2}\b/,
+ /\b(?:[0-9a-fA-F]{2}){16,}\b/,
+ ].some((pattern) => pattern.test(cleaned));
+}
+
const ChatMessageSchema = z.object({
role: z.string().min(1),
content: z.union([z.string(), z.array(z.unknown()), z.null()]).optional(),
@@ -428,8 +449,8 @@ async function executeCompletion(body: CompletionRequest, startMs: number, callI
}
// ─── Layer 2: ML classifier (Prompt-Guard sidecar) ────────────────────
- if (!injectionScan.detected && isPromptGuardConfigured() && body.input.length >= getPromptGuardMinLen()) {
- const pg = await callPromptGuard(body.input);
+ if (!injectionScan.detected && isPromptGuardConfigured() && body.input.length >= getPromptGuardMinLen() && shouldRunPromptGuard(body.input, injectionScan)) {
+ const pg = await callPromptGuard(inputForPromptGuard(body.input));
if (pg.available && pg.label === 'INJECTION' && pg.score >= getPromptGuardThreshold()) {
logger.warn(
{ caller, callId, pg_score: pg.score, pg_latency_ms: pg.latencyMs },
diff --git a/packages/gateway/src/routes/dashboard.ts b/packages/gateway/src/routes/dashboard.ts
index 2ce21ab..5caf176 100644
--- a/packages/gateway/src/routes/dashboard.ts
+++ b/packages/gateway/src/routes/dashboard.ts
@@ -9,6 +9,7 @@ import { createRequestLogger } from '../modules/request-logger.js';
import { globalRequestStream } from '../modules/request-stream.js';
import { getAvailableProviders, getAllProviders } from '../pipeline/external-providers.js';
import { discoverSubscriptions } from '../modules/subscription-discovery.js';
+import { runDiscovery, runDiscoveryAndSpawn } from '../modules/auto-discovery.js';
import { getRunningBridges, spawnDetectedBridges } from '../modules/bridge-spawner.js';
import { getPublicSettings, saveSettings, SettingsPatchSchema } from '../modules/settings-store.js';
import {
@@ -18,6 +19,10 @@ import {
pruneStaleCacheEntries,
} from '../modules/response-cache.js';
import { getComprehensiveSavings } from '../modules/savings-calculator.js';
+
+// Captured once at module load — represents the gateway-process start time
+// for the 'compressed since last restart' tile in the dashboard.
+const SERVER_STARTED_AT_ISO = new Date().toISOString();
import {
getBuddyState,
getAchievements,
@@ -106,6 +111,22 @@ type ProviderRuntime = {
};
const CLIENT_CATALOG = [
+ {
+ id: 'macbook-claude-code',
+ label: 'MacBook (Claude Code)',
+ patterns: ['claude-code-laptop'],
+ commands: [],
+ paths: [],
+ processPatterns: [],
+ },
+ {
+ id: 'macstudio-claude-code',
+ label: 'Mac Studio (Claude Code)',
+ patterns: ['claude-code-macstudio', 'claude-code-studio'],
+ commands: [],
+ paths: [],
+ processPatterns: [],
+ },
{
id: 'codex-desktop',
label: 'Codex Desktop / CLI',
@@ -158,6 +179,17 @@ const CLIENT_CATALOG = [
type ClientStatus = 'live' | 'running' | 'installed' | 'not-connected';
+const CLIENT_BRIDGE_PROVIDERS: Record<(typeof CLIENT_CATALOG)[number]['id'], string | undefined> = {
+ 'macbook-claude-code': undefined,
+ 'macstudio-claude-code': undefined,
+ 'codex-desktop': 'codex',
+ 'claude-desktop': 'claude-code',
+ 'microsoft-copilot': 'm365-copilot-bridge',
+ 'github-copilot': 'copilot-bridge',
+ 'openai-compatible': undefined,
+ 'chatgpt': 'codex-bridge',
+};
+
function expandUserPath(path: string): string {
return path.startsWith('~/') ? `${homedir()}/${path.slice(2)}` : path;
}
@@ -217,8 +249,22 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise> {
const detections = await getLocalDesktopDetections();
+ const bridgeRuntimes = Object.fromEntries(await Promise.all(CLIENT_CATALOG.map(async (client) => {
+ const providerName = CLIENT_BRIDGE_PROVIDERS[client.id];
+ return [
+ client.id,
+ {
+ providerName,
+ ...(providerName ? await providerRuntime(providerName) : {}),
+ },
+ ] as const;
+ })));
let callers: Array<{ caller: string; requestCount: number; lastSeen?: string; tokensIn: number; tokensSaved: number }> = [];
try {
@@ -257,12 +303,19 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise();
return CLIENT_CATALOG.map((client) => {
const detection = detections[client.id];
+ const bridgeRuntime = bridgeRuntimes[client.id];
const matched = callers.filter((row) => {
+ if (assignedCallers.has(row.caller)) return false;
const caller = row.caller.toLowerCase();
return client.patterns.some((pattern) => caller.includes(pattern));
});
+ matched.forEach((row) => assignedCallers.add(row.caller));
const requestCount = matched.reduce((sum, row) => sum + row.requestCount, 0);
const tokensIn = matched.reduce((sum, row) => sum + row.tokensIn, 0);
const tokensSaved = matched.reduce((sum, row) => sum + row.tokensSaved, 0);
@@ -283,6 +336,10 @@ async function getGatewayClientCoverage(hoursBack: number = 24): Promise 0 ? 'gateway' : detection?.installed ? 'local-detection' : 'none',
detectionSignals: detection?.signals ?? [],
+ bridgeProvider: bridgeRuntime?.providerName,
+ bridgeStatus: bridgeRuntime?.runtimeStatus,
+ bridgeHealthy: bridgeRuntime?.runtimeHealthy,
+ bridgeDetail: bridgeRuntime?.runtimeDetail,
};
});
}
@@ -291,8 +348,6 @@ function bridgeHealthUrl(providerName: string): string | undefined {
const bridgeUrls: Record = {
'claude-bridge': process.env['CLAUDE_BRIDGE_URL'],
'claude-code': process.env['CLAUDE_CODE_URL'] || process.env['CLAUDE_BRIDGE_URL'],
- 'openai-bridge': process.env['OPENAI_BRIDGE_URL'],
- 'chatgpt-bridge': process.env['CHATGPT_BRIDGE_URL'] || process.env['OPENAI_BRIDGE_URL'],
'copilot-bridge': process.env['COPILOT_BRIDGE_URL'],
'm365-copilot-bridge': process.env['M365_COPILOT_BRIDGE_URL'],
'openai-codex': process.env['OPENAI_CODEX_URL'] || process.env['CODEX_BRIDGE_URL'],
@@ -575,7 +630,7 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise {
const configuredProviders = providers.filter((provider) => provider.enabled && !!process.env[provider.envKey]);
const localProviders = providers.filter((provider) => provider.name.toLowerCase().includes('ollama'));
const subscriptionProviders = providers.filter((provider) =>
- ['claude-bridge', 'claude-code', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge', 'm365-copilot-bridge', 'codex', 'openai-codex']
+ ['claude-bridge', 'copilot-bridge', 'm365-copilot-bridge', 'openai-codex']
.includes(provider.name)
);
@@ -883,12 +938,10 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise {
const displayLabels: Record = {
'claude-bridge': 'Claude Code Subscription (Bridge)',
'claude-code': 'Claude Code Direct',
- 'openai-bridge': 'OpenAI ChatGPT Subscription (Bridge)',
- 'chatgpt-bridge': 'ChatGPT Plus Subscription (Bridge)',
'copilot-bridge': 'GitHub Copilot Subscription',
'm365-copilot-bridge': 'Microsoft 365 Copilot Subscription',
- 'codex': 'GitHub Copilot Codex (Inner API)',
- 'openai-codex': 'OpenAI API (Codex / GPT)',
+ 'copilot-codex': 'GitHub Copilot (Codex Inner API)',
+ 'openai-codex': 'OpenAI (ChatGPT + Codex)',
'cerebras': 'Cerebras (Free Tier)',
'groq': 'Groq (Free Tier)',
'mistral': 'Mistral AI (Free Tier)',
@@ -898,9 +951,8 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise {
// Subscription providers (paid via login/subscription, NOT free-tier API)
const subscriptionNames = new Set([
- 'claude-bridge', 'claude-code',
- 'openai-bridge', 'chatgpt-bridge',
- 'copilot-bridge', 'm365-copilot-bridge', 'codex', 'openai-codex'
+ 'claude-bridge',
+ 'copilot-bridge', 'm365-copilot-bridge', 'openai-codex'
]);
// Categorize all providers (independent of API-key presence)
@@ -1073,6 +1125,36 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise {
}
});
+ // ─── Full-System Auto-Discovery ─────────────────────────────────────────
+ // GET /api/dashboard/discover → unified report (read-only)
+ // POST /api/dashboard/discover → discover + spawn bridges
+ fastify.get('/api/dashboard/discover', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
+ try {
+ const report = await runDiscovery();
+ return reply.send({ success: true, data: report });
+ } catch (error) {
+ logger.error({ error }, 'Discovery scan failed');
+ return reply.status(500).send({ success: false, error: 'Discovery scan failed' });
+ }
+ });
+
+ fastify.post('/api/dashboard/discover', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
+ try {
+ const result = await runDiscoveryAndSpawn();
+ return reply.send({
+ success: true,
+ data: {
+ report: result.report,
+ spawned: result.spawned,
+ spawnedCount: result.spawned.length,
+ },
+ });
+ } catch (error) {
+ logger.error({ error }, 'Discovery + spawn failed');
+ return reply.status(500).send({ success: false, error: 'Discovery + spawn failed' });
+ }
+ });
+
// POST /api/dashboard/subscriptions/spawn — trigger auto-spawn of detected bridges.
// Returns the list of bridges that were spawned (or already running).
fastify.post('/api/dashboard/subscriptions/spawn', dashboardAuth, async (_request: FastifyRequest, reply: FastifyReply) => {
@@ -1180,7 +1262,8 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise {
effectiveSavingsPercent,
totals: comprehensive.totals,
},
- },
+ // Compression since this gateway process started — resets at each restart.
+ },
series,
},
meta: { hours, bucket_minutes: bucketMin, timestamp: new Date().toISOString() },
@@ -1638,4 +1721,45 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise {
return reply.status(500).send({ error: 'Failed to serve dashboard UI' });
}
});
+
+ // Passive usage import: lets clients that talk DIRECTLY to a provider (e.g. the
+ // laptop's Claude Code -> api.anthropic.com) report their usage so they appear in
+ // clients/costs WITHOUT routing traffic through the gateway. A caller containing
+ // 'claude-code' matches the CLIENT_CATALOG 'claude-desktop' entry.
+ fastify.post('/api/dashboard/usage/report', dashboardAuth, async (request: FastifyRequest, reply: FastifyReply) => {
+ try {
+ const body = (request.body ?? {}) as Record;
+ const caller = String(body.caller ?? 'claude-code-laptop').slice(0, 120);
+ const model = String(body.model ?? 'claude-code').slice(0, 120);
+ const tokensIn = Math.max(0, Math.floor(Number(body.tokens_in) || 0));
+ const tokensOut = Math.max(0, Math.floor(Number(body.tokens_out) || 0));
+ const costUsd = Math.max(0, Number(body.cost_usd) || 0);
+ const day = String(body.day ?? new Date().toISOString().slice(0, 10)).slice(0, 32);
+ if (tokensIn === 0 && tokensOut === 0) {
+ return reply.status(400).send({ success: false, error: 'tokens_in or tokens_out required' });
+ }
+ // Stamp the row with the ACTUAL usage day so lastSeen = when tokens were
+ // used, not when the export ran. Cap at "now" so today's still-growing day
+ // reads as current/live.
+ const dayEnd = new Date(`${day}T23:59:59Z`);
+ const usedAt = dayEnd.getTime() > Date.now() ? new Date() : dayEnd;
+ const db = getPool();
+ const requestId = `usage-import:${caller}:${model}:${day}`;
+ // Upsert by request_id (one row per caller/model/day): re-reporting an
+ // in-progress day updates its totals instead of creating duplicates.
+ const updated = await db.query(
+ `UPDATE request_tracking SET tokens_in=$1, tokens_out=$2, cost_usd=$3, created_at=$4 WHERE request_id=$5`,
+ [tokensIn, tokensOut, costUsd, usedAt, requestId]
+ );
+ if (updated.rowCount === 0) {
+ const requestLogger = createRequestLogger(db);
+ await requestLogger.logRequest(requestId, caller, 'usage_import', model, 'approved', tokensIn, tokensOut, costUsd, 0);
+ await db.query(`UPDATE request_tracking SET created_at=$1 WHERE request_id=$2`, [usedAt, requestId]);
+ }
+ return reply.status(200).send({ success: true, imported: { caller, model, day, tokensIn, tokensOut, costUsd, usedAt } });
+ } catch (error) {
+ logger.error({ error }, 'Failed to import usage report');
+ return reply.status(500).send({ success: false, error: 'Failed to import usage report' });
+ }
+ });
}
diff --git a/packages/gateway/src/routes/health.ts b/packages/gateway/src/routes/health.ts
index 9aa93b0..e7c546e 100644
--- a/packages/gateway/src/routes/health.ts
+++ b/packages/gateway/src/routes/health.ts
@@ -38,22 +38,40 @@ async function checkOllama(baseUrl: string): Promise<{ status: 'ok' | 'down'; la
async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> {
try {
- await query('SELECT 1');
+ await withTimeout(query('SELECT 1'), 2500, 'database check timed out');
return { status: 'ok' };
} catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
}
}
+async function withTimeout(promise: Promise, timeoutMs: number, message: string): Promise {
+ let timer: NodeJS.Timeout | undefined;
+ try {
+ return await Promise.race([
+ promise,
+ new Promise((_resolve, reject) => {
+ timer = setTimeout(() => reject(new Error(message)), timeoutMs);
+ }),
+ ]);
+ } finally {
+ if (timer) clearTimeout(timer);
+ }
+}
+
async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> {
const boss = getPgBoss();
if (!boss) return { status: 'unknown' };
try {
- const [queued, active] = await Promise.all([
- boss.getQueueSize('llm-batch', { before: 'completed' }),
- boss.getQueueSize('llm-batch', { before: 'active' }),
- ]);
+ const [queued, active] = await withTimeout(
+ Promise.all([
+ boss.getQueueSize('llm-batch', { before: 'completed' }),
+ boss.getQueueSize('llm-batch', { before: 'active' }),
+ ]),
+ 2500,
+ 'queue check timed out',
+ );
return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) };
} catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
@@ -62,8 +80,10 @@ async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?
async function getReviewQueueCount(): Promise {
try {
- const result = await query<{ count: string }>(
- 'SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL',
+ const result = await withTimeout(
+ query<{ count: string }>('SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL'),
+ 2500,
+ 'review queue check timed out',
);
return parseInt(result.rows[0]?.count ?? '0', 10);
} catch {
@@ -109,8 +129,8 @@ export async function healthRoute(fastify: FastifyInstance): Promise {
const breakerStates = getAllBreakerStates();
- const isDown = ollamaCheck.status === 'down' || dbCheck.status === 'down';
- const isDegraded = queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
+ const isDown = dbCheck.status === 'down';
+ const isDegraded = ollamaCheck.status === 'down' || queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok';
diff --git a/packages/gateway/src/security/tls-config.ts b/packages/gateway/src/security/tls-config.ts
index 354102c..74d253a 100644
--- a/packages/gateway/src/security/tls-config.ts
+++ b/packages/gateway/src/security/tls-config.ts
@@ -107,9 +107,22 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
return;
}
- // Skip for localhost/loopback callers (infra-health, fix-engine, internal services)
- const reqHost = String(request.headers['host'] ?? '');
- if (reqHost.startsWith('localhost') || reqHost.startsWith('127.0.0.1')) {
+ const hostHeader = String(request.headers['host'] ?? '');
+ const forwardedHost = String(request.headers['x-forwarded-host'] ?? '');
+ const remoteAddress = request.ip ?? '';
+ const host = forwardedHost || hostHeader;
+ const isLoopbackHost =
+ /^localhost(?::\d+)?$/i.test(host) ||
+ /^127\.0\.0\.1(?::\d+)?$/.test(host) ||
+ /^\[::1\](?::\d+)?$/.test(host);
+ const isLoopbackRemote =
+ remoteAddress === '127.0.0.1' ||
+ remoteAddress === '::1' ||
+ remoteAddress === '::ffff:127.0.0.1';
+
+ // Internal loopback callers such as Magatama Core run behind the same host
+ // and must not be redirected to HTTPS unless the Gateway actually serves TLS.
+ if (isLoopbackHost || isLoopbackRemote) {
return;
}
@@ -120,7 +133,6 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
(request.headers['x-forwarded-proto'] === 'https');
if (!isSecure && process.env['NODE_ENV'] === 'production') {
- const host = request.headers['x-forwarded-host'] || request.headers['host'];
return reply.redirect(`https://${host}${request.url}`);
}
});
@@ -132,14 +144,11 @@ export async function registerHTTPSRedirectMiddleware(server: FastifyInstance) {
*/
export async function registerSecurityHeadersMiddleware(server: FastifyInstance) {
server.addHook('onSend', async (request, reply) => {
- // Content Security Policy — route handlers may set a narrower CSP before this hook.
- // Default allows 'unsafe-inline' for the dashboard UI.
- if (!reply.getHeader('Content-Security-Policy')) {
- reply.header(
- 'Content-Security-Policy',
- "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
- );
- }
+ // Content Security Policy for the self-contained dashboard UI.
+ reply.header(
+ 'Content-Security-Policy',
+ "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
+ );
// Prevent clickjacking
reply.header('X-Frame-Options', 'DENY');
diff --git a/packages/gateway/src/server.ts b/packages/gateway/src/server.ts
index 33ac14b..0d5626e 100644
--- a/packages/gateway/src/server.ts
+++ b/packages/gateway/src/server.ts
@@ -12,11 +12,23 @@ import { dashboardRoute } from './routes/dashboard.js';
import { streamRoute } from './routes/stream.js';
import { learningInsightsRoute } from './routes/learning-insights.js';
import { staticRoute } from './routes/static.js';
+import tenantAuth from './security/tenant-auth.js';
+import { internalRoute } from './routes/internal.js';
import { getPool } from './db/client.js';
import { runMigrations } from './db/migrate.js';
import { initPgBoss } from './queue/pg-boss-client.js';
import { logger } from './observability/logger.js';
import { scheduleLearningCycles } from './learning/learning-engine.js';
+import { autoSpawnOnBoot } from './modules/auto-discovery.js';
+import { embeddingsRoute } from './routes/embeddings.js';
+import { replayRoute } from './routes/replay.js';
+import { audioRoute } from './routes/audio.js';
+import { mcpRoute } from './modules/mcp-server.js';
+import { loadWorkspacePreset, applyWorkspaceDefaults } from './modules/workspace-presets.js';
+import { loadPlugins } from './modules/plugin-system.js';
+import { ingestPeerStats, scheduleFederationPublisher, buildStats } from './modules/federated-stats.js';
+import { scheduleAdaptiveLearner, getAllRecommendations } from './modules/adaptive-routing.js';
+import { startBridgeWatchdog } from './modules/bridge-watchdog.js';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { readFileSync, existsSync } from 'fs';
@@ -93,9 +105,11 @@ async function buildServer() {
'http://192.168.178.196:3000',
/^http:\/\/192\.168\.178\.\d+/,
/^https:\/\/.*\.context-x\.org$/,
+ /^https:\/\/(www\.)?runwerk\.app$/,
+ /^https:\/\/.*\.runwerk\.app$/,
],
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
- allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID'],
+ allowedHeaders: ['Content-Type', 'Authorization', 'X-Caller-ID', 'X-Runwerk-Caller', 'X-Runwerk-Privacy', 'X-Runwerk-Tier', 'X-Runwerk-Purpose'],
credentials: true,
});
@@ -114,7 +128,17 @@ async function buildServer() {
}),
});
+ await server.register(tenantAuth);
+ await server.register(internalRoute);
await server.register(completionRoute, { prefix: '/v1' });
+ await server.register(embeddingsRoute, { prefix: '/v1' });
+ await server.register(replayRoute, { prefix: '/v1' });
+ await server.register(audioRoute, { prefix: '/v1' });
+ await server.register(mcpRoute);
+ server.post('/v1/federation/ingest', async (request, reply) => {
+ const result = ingestPeerStats(request.body as never);
+ return reply.send({ success: true, ...result });
+ });
await server.register(batchRoute, { prefix: '/v1' });
await server.register(classifyRoute, { prefix: '/v1' });
await server.register(reviewRoute, { prefix: '/v1' });
@@ -193,9 +217,54 @@ async function main() {
} catch (pgErr) {
logger.warn({ pgErr }, 'PgBoss init failed - continuing without queue');
}
+ // Workspace preset (apply env defaults from workspace.yaml if present)
+ try {
+ const preset = await loadWorkspacePreset();
+ if (preset) applyWorkspaceDefaults(preset);
+ } catch (err) {
+ logger.warn({ err }, 'Workspace preset load failed (non-fatal)');
+ }
+
+ // Plugin system (load pre/post hooks from PLUGINS_DIR)
+ try {
+ await loadPlugins();
+ } catch (err) {
+ logger.warn({ err }, 'Plugin loading failed (non-fatal)');
+ }
+
scheduleLearningCycles();
await server.listen({ port, host });
logger.info({ port, host }, 'LLM Gateway started');
+
+ // Auto-spawn detected subscription bridges if AUTO_SPAWN_BRIDGES=1
+ void autoSpawnOnBoot();
+
+ // Bridge watchdog (opt-in via WATCHDOG_ENABLED=1)
+ try {
+ startBridgeWatchdog();
+ } catch (err) {
+ logger.warn({ err }, 'Bridge watchdog start failed');
+ }
+
+ // Adaptive routing learner (opt-in via ADAPTIVE_ROUTING_ENABLED=1)
+ try {
+ const pool = getPool();
+ scheduleAdaptiveLearner(pool as never);
+ } catch (err) {
+ logger.warn({ err }, 'Adaptive learner scheduling failed');
+ }
+
+ // Federation publisher (opt-in via FEDERATION_ENABLED=1)
+ scheduleFederationPublisher(async () => {
+ const recos = getAllRecommendations();
+ return buildStats(recos.map((r) => ({
+ task_type: r.taskType,
+ model_used: r.preferredModel,
+ samples: r.rationale.samples,
+ success_rate: r.rationale.successRate,
+ avg_latency_ms: r.rationale.avgLatencyMs,
+ })));
+ });
} catch (err) {
logger.error({ err }, 'Failed to start server');
process.exit(1);
diff --git a/packages/gateway/src/utils/tokenvault-hooks.ts b/packages/gateway/src/utils/tokenvault-hooks.ts
index 688e2d0..56460fc 100644
--- a/packages/gateway/src/utils/tokenvault-hooks.ts
+++ b/packages/gateway/src/utils/tokenvault-hooks.ts
@@ -1,5 +1,5 @@
// Tokenvault Integration Hooks
-// Instruments LeanCTX and RTK compression tracking
+// Instruments LLM Gateway compression tracking (legacy hook names retained for backward compat)
// Updated: 2026-04-19
import { Pool, QueryResult } from 'pg';
@@ -62,13 +62,13 @@ export function estimateTokens(text: string | object): number {
}
/**
- * Log compression ratio for RTK output
+ * Log compression ratio for token-trim output
*/
-export async function logRTKCompression(
+export async function logGatewayTrimCompression(
db: Pool,
rawOutput: string,
compressedOutput: string,
- toolUsed: string = 'rtk'
+ toolUsed: string = 'llm-gateway-trim'
): Promise {
const tokensBefore = estimateTokens(rawOutput);
const tokensAfter = estimateTokens(compressedOutput);
@@ -93,9 +93,9 @@ export async function logRTKCompression(
}
/**
- * Track LeanCTX file read operations
+ * Track gateway file-read operations
*/
-export async function logLeanCTXRead(
+export async function logGatewayFileRead(
db: Pool,
filePath: string,
mode: string,
@@ -115,7 +115,7 @@ export async function logLeanCTXRead(
tokensBefore: rawTokens,
tokensAfter: compressedTokens,
savingsPct,
- toolUsed: 'lean-ctx'
+ toolUsed: 'llm-gateway'
};
await logCompressionMetric(db, metric);
@@ -207,7 +207,7 @@ export async function getCompressionStats(
tool_used,
COUNT(*) as count
FROM tokenvault_metrics
- WHERE created_at > NOW() - INTERVAL $1 HOUR
+ WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
GROUP BY tool_used`,
[hoursBack]
);
@@ -270,7 +270,7 @@ export async function getCostSummary(
project,
SUM(CASE WHEN cost_usd > 0 THEN 1 ELSE 0 END) as paid_tasks
FROM cost_analytics
- WHERE created_at > NOW() - INTERVAL $1 HOUR
+ WHERE created_at > NOW() - ($1 * INTERVAL '1 hour')
GROUP BY project`,
[hoursBack]
);