diff --git a/deploy/ecosystem.config.cjs b/deploy/ecosystem.config.cjs
index 8e39301..e71fcbc 100644
--- a/deploy/ecosystem.config.cjs
+++ b/deploy/ecosystem.config.cjs
@@ -17,8 +17,8 @@ module.exports = {
       env: {
         NODE_ENV: 'production',
         PORT: 3103,
-        DATABASE_URL: 'postgresql://llm:llm_secure_2026@localhost:5432/llm_gateway',
-        TIP_DATABASE_URL: 'postgresql://tip:tip_prod_2026@localhost:5432/transceiver_db',
+        DATABASE_URL: process.env.DATABASE_URL || '',
+        TIP_DATABASE_URL: process.env.TIP_DATABASE_URL || '',
         OLLAMA_URL: 'http://192.168.178.213:11434',
         LOG_LEVEL: 'info',
         GITEA_URL: 'http://192.168.178.196:3000',
@@ -100,7 +100,7 @@ module.exports = {
       exec_mode: 'fork',
       env: {
         NODE_ENV: 'production',
-        DATABASE_URL: 'postgresql://llm:llm_secure_2026@localhost:5432/llm_gateway',
+        DATABASE_URL: process.env.DATABASE_URL || '',
         GATEWAY_URL: 'http://localhost:3103',
       },
       autorestart: true,
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 255d42b..68f5c9b 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -8,7 +8,7 @@ services:
       NODE_ENV: production
       PORT: "3100"
       DATABASE_URL: "${DATABASE_URL}"
-      TIP_DATABASE_URL: "postgresql://tip:tip_prod_2026@82.165.222.127:5433/transceiver_db"
+      TIP_DATABASE_URL: "${TIP_DATABASE_URL}"
       OLLAMA_URL: "http://192.168.178.169:11434"
       SHIELDX_URL: "${SHIELDX_URL:-}"
       GITEA_URL: "http://gitea.context-x.org"
diff --git a/packages/ctx-health/src/db/client.ts b/packages/ctx-health/src/db/client.ts
index f6010dd..587d4ad 100644
--- a/packages/ctx-health/src/db/client.ts
+++ b/packages/ctx-health/src/db/client.ts
@@ -5,10 +5,11 @@ const { Pool } = pg;
 
 let pool: pg.Pool | null = null;
 
-const DEFAULT_DB_URL = 'postgresql://llm:llm_secure_2026@localhost:5432/llm_gateway';
+
 
 function buildPoolConfig(): pg.PoolConfig {
-  const databaseUrl = process.env['CTX_HEALTH_DB_URL'] ?? process.env['DATABASE_URL'] ?? DEFAULT_DB_URL;
+  const databaseUrl = process.env['CTX_HEALTH_DB_URL'] ?? process.env['DATABASE_URL'];
+  if (!databaseUrl) throw new Error('CTX_HEALTH_DB_URL or DATABASE_URL env var is required');
   return {
     connectionString: databaseUrl,
     max: 3,
diff --git a/packages/fine-tuner/config/fine_tuner.yaml b/packages/fine-tuner/config/fine_tuner.yaml
index a42585e..e3104ff 100644
--- a/packages/fine-tuner/config/fine_tuner.yaml
+++ b/packages/fine-tuner/config/fine_tuner.yaml
@@ -1,4 +1,4 @@
-database_url: "postgresql://llm:llm_secure_2026@127.0.0.1:15432/llm_gateway"
+database_url: "${DATABASE_URL}"  # Set via environment variable at runtime
 gateway_url: "https://llm-gateway.context-x.org"
 ollama_url: "http://localhost:11434"
 
diff --git a/packages/fine-tuner/config/fo-blog-v6.yaml b/packages/fine-tuner/config/fo-blog-v6.yaml
index c7d9edf..546c09a 100644
--- a/packages/fine-tuner/config/fo-blog-v6.yaml
+++ b/packages/fine-tuner/config/fo-blog-v6.yaml
@@ -1,4 +1,4 @@
-database_url: "postgresql://llm:llm_secure_2026@127.0.0.1:5432/llm_gateway"
+database_url: "${DATABASE_URL}"  # Set via environment variable at runtime
 gateway_url: "https://llm-gateway.context-x.org"
 ollama_url: "http://localhost:11434"
 
diff --git a/packages/gateway/src/integrations/tip-db.ts b/packages/gateway/src/integrations/tip-db.ts
index 12f5011..f1350ea 100644
--- a/packages/gateway/src/integrations/tip-db.ts
+++ b/packages/gateway/src/integrations/tip-db.ts
@@ -9,7 +9,7 @@ const TIP_DB_CONFIG = {
   port: parseInt(process.env['TIP_DB_PORT'] ?? '5433', 10),
   database: process.env['TIP_DB_NAME'] ?? 'transceiver_db',
   user: process.env['TIP_DB_USER'] ?? 'tip',
-  password: process.env['TIP_DB_PASSWORD'] ?? 'tip_prod_2026',
+  password: process.env['TIP_DB_PASSWORD']!,
   max: 5,
   idleTimeoutMillis: 60_000,
   connectionTimeoutMillis: 10_000,
diff --git a/packages/gateway/src/pipeline/external-providers.ts b/packages/gateway/src/pipeline/external-providers.ts
index f9649bc..fa5f280 100644
--- a/packages/gateway/src/pipeline/external-providers.ts
+++ b/packages/gateway/src/pipeline/external-providers.ts
@@ -257,6 +257,41 @@ function findBestModel(
 
 // ─── OpenAI-Compatible Client ───────────────────────────────────────
 
+function buildRequestHeaders(provider: ExternalProvider, apiKey: string): Record<string, string> {
+  const headers: Record<string, string> = { 'Content-Type': 'application/json' };
+  if (!['claude-bridge', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge'].includes(provider.name)) {
+    headers['Authorization'] = `Bearer ${apiKey}`;
+  }
+  return headers;
+}
+
+function buildRequestPayload(model: ExternalModel, request: ExternalCompletionRequest): Record<string, unknown> {
+  return {
+    model: model.id,
+    messages: request.messages,
+    temperature: request.temperature ?? 0.3,
+    max_tokens: request.max_tokens ?? 2048,
+  };
+}
+
+function parseExternalResponse(
+  data: any,
+  model: ExternalModel,
+  provider: ExternalProvider,
+  start: number,
+): ExternalCompletionResponse {
+  const content = data.choices?.[0]?.message?.content ?? '';
+  recordRequest(provider.name);
+  return {
+    response: content,
+    model: data.model ?? model.id,
+    provider: provider.name,
+    inputTokens: data.usage?.prompt_tokens ?? 0,
+    outputTokens: data.usage?.completion_tokens ?? 0,
+    latencyMs: Date.now() - start,
+  };
+}
+
 async function callProvider(
   provider: ExternalProvider,
   model: ExternalModel,
@@ -275,25 +310,13 @@ async function callProvider(
   const start = Date.now();
 
   try {
-    const headers: Record<string, string> = {
-      'Content-Type': 'application/json',
-    };
-
-    // Only add Authorization header for non-bridge providers
-    // Bridge services (claude-bridge, openai-bridge, chatgpt-bridge, copilot-bridge) handle auth internally
-    if (!['claude-bridge', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge'].includes(provider.name)) {
-      headers['Authorization'] = `Bearer ${apiKey}`;
-    }
+    const headers = buildRequestHeaders(provider, apiKey);
+    const payload = buildRequestPayload(model, request);
 
     const response = await fetch(url, {
       method: 'POST',
       headers,
-      body: JSON.stringify({
-        model: model.id,
-        messages: request.messages,
-        temperature: request.temperature ?? 0.3,
-        max_tokens: request.max_tokens ?? 2048,
-      }),
+      body: JSON.stringify(payload),
       signal: controller.signal,
     });
 
@@ -302,23 +325,8 @@ async function callProvider(
       throw new Error(`${provider.name} HTTP ${response.status}: ${body.slice(0, 200)}`);
     }
 
-    const data = (await response.json()) as {
-      choices: { message: { content: string } }[];
-      usage?: { prompt_tokens: number; completion_tokens: number };
-      model?: string;
-    };
-
-    const content = data.choices?.[0]?.message?.content ?? '';
-    recordRequest(provider.name);
-
-    return {
-      response: content,
-      model: data.model ?? model.id,
-      provider: provider.name,
-      inputTokens: data.usage?.prompt_tokens ?? 0,
-      outputTokens: data.usage?.completion_tokens ?? 0,
-      latencyMs: Date.now() - start,
-    };
+    const data = await response.json();
+    return parseExternalResponse(data, model, provider, start);
   } finally {
     clearTimeout(timer);
   }
diff --git a/packages/gateway/src/pipeline/llm-client.ts b/packages/gateway/src/pipeline/llm-client.ts
index d51755f..c991e73 100644
--- a/packages/gateway/src/pipeline/llm-client.ts
+++ b/packages/gateway/src/pipeline/llm-client.ts
@@ -69,6 +69,75 @@ function isTimeoutError(err: unknown): boolean {
   return false;
 }
 
+async function tryModelWithRetries(
+  modelReq: OllamaRequest,
+  tier: ModelTier,
+  timeoutMs: number,
+): Promise<OllamaResponse | null> {
+  const breaker = getBreaker(
+    modelReq.model,
+    tier,
+    (r: OllamaRequest) => fetchOllama(r, timeoutMs),
+  );
+  const MAX_RETRIES = 2;
+  let lastErr: unknown;
+
+  for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
+    try {
+      if (attempt > 0) {
+        logger.info({ model: modelReq.model, attempt }, 'Retrying Ollama call after timeout');
+      }
+      const result = await breaker.fire(modelReq);
+      if (attempt > 0) {
+        logger.info({ model: modelReq.model, attempt }, 'Ollama retry succeeded');
+      }
+      return result;
+    } catch (err) {
+      lastErr = err;
+      if (!isTimeoutError(err)) {
+        logger.error({ err, model: modelReq.model }, 'Ollama non-timeout error, skipping retry');
+        break;
+      }
+      if (attempt < MAX_RETRIES - 1) {
+        logger.warn({ model: modelReq.model, attempt }, 'Ollama timeout, retrying');
+      }
+    }
+  }
+  void lastErr;
+  return null;
+}
+
+async function tryExternalFallback(
+  req: OllamaRequest,
+  tier: ModelTier,
+): Promise<OllamaResponse> {
+  const tierMap: Record<ModelTier, 'fast' | 'medium' | 'large' | 'reasoning'> = {
+    fast: 'fast',
+    medium: 'medium',
+    large: 'large',
+  };
+  const externalResult = await callExternalFallback(
+    {
+      model: req.model,
+      messages: [
+        ...(req.system ? [{ role: 'system', content: req.system }] : []),
+        { role: 'user', content: req.prompt },
+      ],
+      temperature: req.options?.temperature,
+      max_tokens: req.options?.num_predict,
+    },
+    tierMap[tier] ?? 'medium',
+  );
+  return {
+    response: externalResult.response,
+    done: true,
+    total_duration: externalResult.latencyMs * 1_000_000,
+    eval_count: externalResult.outputTokens,
+    prompt_eval_count: externalResult.inputTokens,
+    model: `${externalResult.provider}/${externalResult.model}`,
+  };
+}
+
 export async function callOllama(
   req: OllamaRequest,
   tier: ModelTier = 'medium',
@@ -76,81 +145,19 @@ export async function callOllama(
 ): Promise<OllamaResponse> {
   const timeoutMs = TIMEOUT_BY_TIER[tier];
   const allModels = [req.model, ...fallbackModels.filter((m) => m !== req.model)];
-  const MAX_RETRIES = 2;
 
   for (const model of allModels) {
     const modelReq = { ...req, model };
-
-    const breaker = getBreaker(
-      model,
-      tier,
-      (r: OllamaRequest) => fetchOllama(r, timeoutMs),
-    );
-
-    let lastErr: unknown;
-
-    for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
-      try {
-        if (attempt > 0) {
-          logger.info({ model, attempt }, 'Retrying Ollama call after timeout');
-        }
-
-        const result = await breaker.fire(modelReq);
-        if (attempt > 0) {
-          logger.info({ model, attempt }, 'Ollama retry succeeded');
-        }
-        return result;
-      } catch (err) {
-        lastErr = err;
-
-        // Only retry on timeout errors
-        if (!isTimeoutError(err)) {
-          logger.error({ err, model }, 'Ollama non-timeout error, skipping retry');
-          break;
-        }
-
-        if (attempt < MAX_RETRIES - 1) {
-          logger.warn({ model, attempt }, 'Ollama timeout, retrying');
-        }
-      }
-    }
-
-    // Try next fallback model
-    logger.warn({ model, fallback: allModels[allModels.indexOf(model) + 1] }, 'Ollama model failed, trying fallback');
-    void lastErr; // captured for logging above
+    const result = await tryModelWithRetries(modelReq, tier, timeoutMs);
+    if (result) return result;
+    const nextModel = allModels[allModels.indexOf(model) + 1];
+    logger.warn({ model, fallback: nextModel }, 'Ollama model failed, trying fallback');
   }
 
-  // All Ollama models failed — try external providers as last resort
   if (getAvailableProviders().length > 0) {
     logger.warn({ models: allModels }, 'All Ollama models failed, trying external providers');
     try {
-      const tierMap: Record<ModelTier, 'fast' | 'medium' | 'large' | 'reasoning'> = {
-        fast: 'fast',
-        medium: 'medium',
-        large: 'large',
-      };
-      const externalResult = await callExternalFallback(
-        {
-          model: req.model,
-          messages: [
-            ...(req.system ? [{ role: 'system', content: req.system }] : []),
-            { role: 'user', content: req.prompt },
-          ],
-          temperature: req.options?.temperature,
-          max_tokens: req.options?.num_predict,
-        },
-        tierMap[tier] ?? 'medium',
-      );
-
-      // Convert external response to OllamaResponse shape
-      return {
-        response: externalResult.response,
-        done: true,
-        total_duration: externalResult.latencyMs * 1_000_000,
-        eval_count: externalResult.outputTokens,
-        prompt_eval_count: externalResult.inputTokens,
-        model: `${externalResult.provider}/${externalResult.model}`,
-      };
+      return await tryExternalFallback(req, tier);
     } catch (extErr) {
       logger.error({ err: extErr }, 'External provider fallback also failed');
     }
diff --git a/packages/gateway/src/pipeline/post-validator.ts b/packages/gateway/src/pipeline/post-validator.ts
index 542e8d2..b5bfaab 100644
--- a/packages/gateway/src/pipeline/post-validator.ts
+++ b/packages/gateway/src/pipeline/post-validator.ts
@@ -95,38 +95,29 @@ function checkQuestionCloser(text: string): ValidationResult {
   };
 }
 
-export async function runPostValidation(
+async function validateWithSchema(
   output: string,
-  config: ValidatorConfig,
-): Promise<PostValidationOutput> {
-  const results: ValidationResult[] = [];
-  const validatorSet = new Set(config.validators ?? []);
-  let banViolations: BanViolation[] = [];
-  let retryRequested = false;
-
-  // 1. Schema validator
-  if (validatorSet.has('schema')) {
-    const schemaResult: SchemaValidatorResult = validateSchema(
-      output,
-      config.schema,
-    );
-    results.push({
+  schema?: Record<string, unknown>,
+): Promise<{ result: ValidationResult; retry: boolean }> {
+  const schemaResult: SchemaValidatorResult = validateSchema(output, schema);
+  return {
+    result: {
       validator: 'schema',
       passed: schemaResult.passed,
       score_impact: schemaResult.score_impact,
       details: { errors: schemaResult.errors },
-    });
-    if (schemaResult.retry) retryRequested = true;
-  }
+    },
+    retry: schemaResult.retry,
+  };
+}
 
-  // 2. Ban list checker
-  if (validatorSet.has('banlist')) {
-    const banResult: BanlistResult = checkBanlist(
-      output,
-      config.language ?? 'auto',
-    );
-    banViolations = banResult.violations;
-    results.push({
+async function validateWithBanlist(
+  output: string,
+  language?: 'de' | 'en',
+): Promise<{ result: ValidationResult; violations: BanViolation[] }> {
+  const banResult: BanlistResult = checkBanlist(output, language ?? 'auto');
+  return {
+    result: {
       validator: 'banlist',
       passed: banResult.passed,
       score_impact: banResult.score_penalty,
@@ -138,68 +129,96 @@ export async function runPostValidation(
         })),
         count: banResult.violations.length,
       },
-    });
+    },
+    violations: banResult.violations,
+  };
+}
+
+async function validateWithLanguage(
+  output: string,
+  language?: 'de' | 'en',
+  formality?: 'du' | 'Sie',
+): Promise<ValidationResult> {
+  const langResult: LanguageCheckResult = checkLanguage(output, language, formality);
+  return {
+    validator: 'language',
+    passed: langResult.passed,
+    score_impact: langResult.score_impact,
+    details: {
+      detected: langResult.detected_language,
+      required: langResult.required_language,
+      formality_issue: langResult.formality_issue,
+      details: langResult.details,
+    },
+  };
+}
+
+async function validateWithTip(
+  output: string,
+  outputFormat?: string,
+): Promise<ValidationResult> {
+  const tipResult: TipValidationResult = validateTipContent(output, outputFormat === 'json');
+  return {
+    validator: 'tip_validator',
+    passed: tipResult.passed,
+    score_impact: tipResult.score_impact,
+    details: {
+      errors: tipResult.errors,
+      immediate_reject: tipResult.immediate_reject,
+    },
+  };
+}
+
+async function validateWithFacts(output: string): Promise<ValidationResult> {
+  const factResult: FactCheckResult = await checkFacts(output, 5000);
+  return {
+    validator: 'fact_checker',
+    passed: factResult.passed,
+    score_impact: factResult.score_impact,
+    details: {
+      checks_performed: factResult.checks_performed,
+      failures: factResult.failures,
+    },
+  };
+}
+
+export async function runPostValidation(
+  output: string,
+  config: ValidatorConfig,
+): Promise<PostValidationOutput> {
+  const results: ValidationResult[] = [];
+  const validatorSet = new Set(config.validators ?? []);
+  let banViolations: BanViolation[] = [];
+  let retryRequested = false;
+
+  if (validatorSet.has('schema')) {
+    const { result, retry } = await validateWithSchema(output, config.schema);
+    results.push(result);
+    retryRequested = retryRequested || retry;
+  }
+
+  if (validatorSet.has('banlist')) {
+    const { result, violations } = await validateWithBanlist(output, config.language);
+    results.push(result);
+    banViolations = violations;
   }
 
-  // 3. Language checker
   if (validatorSet.has('language')) {
-    const langResult: LanguageCheckResult = checkLanguage(
-      output,
-      config.language,
-      config.formality,
-    );
-    results.push({
-      validator: 'language',
-      passed: langResult.passed,
-      score_impact: langResult.score_impact,
-      details: {
-        detected: langResult.detected_language,
-        required: langResult.required_language,
-        formality_issue: langResult.formality_issue,
-        details: langResult.details,
-      },
-    });
+    results.push(await validateWithLanguage(output, config.language, config.formality));
   }
 
-  // 4. TIP validator
   if (validatorSet.has('tip_validator')) {
-    const tipResult: TipValidationResult = validateTipContent(
-      output,
-      config.output_format === 'json',
-    );
-    results.push({
-      validator: 'tip_validator',
-      passed: tipResult.passed,
-      score_impact: tipResult.score_impact,
-      details: {
-        errors: tipResult.errors,
-        immediate_reject: tipResult.immediate_reject,
-      },
-    });
+    results.push(await validateWithTip(output, config.output_format));
   }
 
-  // 5. Fact checker (async, with timeout)
   if (validatorSet.has('fact_checker') && config.requires_fact_check) {
-    const factResult: FactCheckResult = await checkFacts(output, 5000);
-    results.push({
-      validator: 'fact_checker',
-      passed: factResult.passed,
-      score_impact: factResult.score_impact,
-      details: {
-        checks_performed: factResult.checks_performed,
-        failures: factResult.failures,
-      },
-    });
+    results.push(await validateWithFacts(output));
   }
 
-  // 6. Length checker
   if (validatorSet.has('length')) {
-    results.push(
-      checkLength(output, config.min_length ?? 50, config.max_length ?? 20000),
-    );
+    results.push(checkLength(output, config.min_length ?? 50, config.max_length ?? 20000));
   }
 
-  // 7. Question-closer detector
   if (validatorSet.has('question_closer')) {
     results.push(checkQuestionCloser(output));
   }
diff --git a/packages/gateway/src/pipeline/request-scorer.ts b/packages/gateway/src/pipeline/request-scorer.ts
index 33dc6e3..6f81d25 100644
--- a/packages/gateway/src/pipeline/request-scorer.ts
+++ b/packages/gateway/src/pipeline/request-scorer.ts
@@ -672,6 +672,113 @@ function assignTier(score: number): Tier {
   return 'code_generation';
 }
 
+// ── Helper: Short Message Fast Path ────────────────────────────────────────
+
+function handleShortMessageFastPath(
+  lastUserText: string,
+  input: ScorerInput,
+): ScoringResult | null {
+  if (
+    lastUserText.length < 50 &&
+    (!input.tools || input.tools.length === 0) &&
+    !hasFormalLogicKeyword(lastUserText)
+  ) {
+    const quickMatches = getTrie().scan(lastUserText);
+    const quickAgg = getTrie().aggregate(quickMatches);
+    const hasComplex = Array.from(quickAgg.values()).some(
+      (d) => d.dimension !== 'simpleIndicators' && d.dimension !== 'relay' && d.effectiveCount > 0,
+    );
+
+    if (!hasComplex) {
+      const result: ScoringResult = {
+        tier: 'medium',
+        score: 0.05,
+        confidence: 0.8,
+        reason: 'short message - simple request',
+        dimensions: [],
+      };
+      recordSessionTier('medium');
+      logger.debug({ tier: 'medium', reason: 'short_simple_path' }, 'Request scored via short simple path');
+      return result;
+    }
+  }
+  return null;
+}
+
+// ── Helper: Formal Logic Override ──────────────────────────────────────────
+
+function handleFormalLogicOverride(
+  fullText: string,
+  input: ScorerInput,
+  userMessages: readonly WeightedMessage[],
+): ScoringResult | null {
+  if (!hasFormalLogicKeyword(fullText)) {
+    return null;
+  }
+  const dimensions = computeAllDimensions(input, userMessages, fullText);
+  const result: ScoringResult = {
+    tier: 'reasoning',
+    score: 0.5,
+    confidence: 0.95,
+    reason: 'formal logic keyword detected',
+    dimensions,
+  };
+  recordSessionTier('reasoning');
+  logger.debug({ tier: 'reasoning', reason: 'formal_logic_override' }, 'Request scored via formal logic override');
+  return result;
+}
+
+// ── Helper: Apply Score Overrides ──────────────────────────────────────────
+
+interface ScoreOverridesInput {
+  tier: Tier;
+  confidence: number;
+  reason: string;
+}
+
+interface ScoreOverridesOutput {
+  tier: Tier;
+  confidence: number;
+  reason: string;
+}
+
+function applyScoreOverrides(
+  state: ScoreOverridesInput,
+  dimensions: readonly DimensionScore[],
+  input: ScorerInput,
+  totalChars: number,
+): ScoreOverridesOutput {
+  let { tier, confidence, reason } = state;
+
+  // Code generation override
+  const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
+  if (codeGenDim && codeGenDim.rawScore > 0.25) {
+    tier = 'code_generation';
+    reason = 'code generation keywords detected';
+  }
+
+  // Tool floor
+  if (input.tools && input.tools.length > 0 && tier === 'fast') {
+    tier = 'medium';
+    reason = 'tool floor applied (minimum medium with tools)';
+  }
+
+  // Context floor
+  const estimatedTotalTokens = totalChars / 4;
+  if (estimatedTotalTokens > 50_000 && (tier === 'fast' || tier === 'medium')) {
+    tier = 'large';
+    reason = 'context floor applied (>50k estimated tokens)';
+  }
+
+  // Ambiguity check
+  if (confidence < 0.45) {
+    tier = 'medium';
+    reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
+  }
+
+  return { tier, confidence, reason };
+}
+
 // ── Main Scoring Function ──────────────────────────────────────────────────
 
 export function scoreRequest(
@@ -682,57 +789,18 @@ export function scoreRequest(
   const fullText = userMessages.map((m) => m.text).join('\n');
   const lastUserText = userMessages.length > 0 ? userMessages[userMessages.length - 1]!.text : '';
 
-  // ── Short message fast path ──
-  if (
-    lastUserText.length < 50 &&
-    (!input.tools || input.tools.length === 0) &&
-    !hasFormalLogicKeyword(lastUserText)
-  ) {
-    // Quick check: no complex keywords in the short message
-    const quickMatches = getTrie().scan(lastUserText);
-    const quickAgg = getTrie().aggregate(quickMatches);
-    const hasComplex = Array.from(quickAgg.values()).some(
-      (d) => d.dimension !== 'simpleIndicators' && d.dimension !== 'relay' && d.effectiveCount > 0,
-    );
+  const shortPathResult = handleShortMessageFastPath(lastUserText, input);
+  if (shortPathResult) return shortPathResult;
 
-    if (!hasComplex) {
-      const shortResult: ScoringResult = {
-        tier: 'medium',
-        score: 0.05,
-        confidence: 0.8,
-        reason: 'short message - simple request',
-        dimensions: [],
-      };
-      recordSessionTier('medium');
-      logger.debug({ tier: 'medium', reason: 'short_simple_path' }, 'Request scored via short simple path');
-      return shortResult;
-    }
-  }
+  const formalLogicResult = handleFormalLogicOverride(fullText, input, userMessages);
+  if (formalLogicResult) return formalLogicResult;
 
-  // ── Formal logic override ──
-  if (hasFormalLogicKeyword(fullText)) {
-    const dimensions = computeAllDimensions(input, userMessages, fullText);
-    const result: ScoringResult = {
-      tier: 'reasoning',
-      score: 0.5,
-      confidence: 0.95,
-      reason: 'formal logic keyword detected',
-      dimensions,
-    };
-    recordSessionTier('reasoning');
-    logger.debug({ tier: 'reasoning', reason: 'formal_logic_override' }, 'Request scored via formal logic override');
-    return result;
-  }
-
-  // ── Full scoring ──
   const dimensions = computeAllDimensions(input, userMessages, fullText);
-
   let rawScore = 0;
   for (const dim of dimensions) {
     rawScore += dim.weighted;
   }
 
-  // Apply session momentum
   const momentum = computeSessionMomentum(lastUserText.length);
   const score = rawScore + momentum;
 
@@ -740,32 +808,9 @@ export function scoreRequest(
   let confidence = computeConfidence(score);
   let reason = `scored ${score.toFixed(4)} across 23 dimensions`;
 
-  // ── Code generation override: code keywords -> code_generation ──
-  const codeGenDim = dimensions.find((d) => d.name === 'codeGeneration');
-  if (codeGenDim && codeGenDim.rawScore > 0.25) {
-    tier = 'code_generation';
-    reason = 'code generation keywords detected';
-  }
-
-  // ── Tool floor: tools present -> minimum medium ──
-  if (input.tools && input.tools.length > 0 && tier === 'fast') {
-    tier = 'medium';
-    reason = 'tool floor applied (minimum medium with tools)';
-  }
-
-  // ── Context floor: >50k total tokens -> minimum large ──
   const totalChars = input.messages.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
-  const estimatedTotalTokens = totalChars / 4;
-  if (estimatedTotalTokens > 50_000 && (tier === 'fast' || tier === 'medium')) {
-    tier = 'large';
-    reason = 'context floor applied (>50k estimated tokens)';
-  }
-
-  // ── Ambiguity check: low confidence -> force medium ──
-  if (confidence < 0.45) {
-    tier = 'medium';
-    reason = 'ambiguous (confidence < 0.45, defaulting to medium)';
-  }
+  const overrides = applyScoreOverrides({ tier, confidence, reason }, dimensions, input, totalChars);
+  ({ tier, confidence, reason } = overrides);
 
   recordSessionTier(tier);
 
diff --git a/packages/gateway/src/pipeline/router.ts b/packages/gateway/src/pipeline/router.ts
index 83ed321..6b03d6e 100644
--- a/packages/gateway/src/pipeline/router.ts
+++ b/packages/gateway/src/pipeline/router.ts
@@ -194,6 +194,82 @@ const TIER_MODEL_MAP: Record<Tier, { primary: string; configTier: 'fast' | 'medi
   code_generation: { primary: 'gpt-4-turbo', configTier: 'large', provider: 'openai-codex' },
 };
 
+function buildMediumTierFallback(
+  models: ModelsYaml,
+  options?: { max_tokens?: number },
+  scoringResult?: ScoringResult,
+): RouterDecision {
+  const fallbackTierConfig = models.tiers['medium']!;
+  return {
+    model: 'qwen2.5:14b',
+    fallback_chain: buildFallbackChain('qwen2.5:14b', 'medium', models),
+    tier: 'medium',
+    prompt_template: 'default',
+    temperature: 0.7,
+    max_tokens: options?.max_tokens ?? 2048,
+    output_format: 'text',
+    requires_fact_check: false,
+    validators: [],
+    ollama_base_url: models.ollama_base_url,
+    timeout_ms: fallbackTierConfig.timeout_ms,
+    scoringResult,
+  };
+}
+
+function buildScoredFallbackChain(
+  tier: Tier,
+  selectedModel: string,
+  configTier: 'fast' | 'medium' | 'large',
+  models: ModelsYaml,
+): string[] {
+  if (tier === 'reasoning' || tier === 'code_generation') {
+    return [selectedModel, ...buildFallbackChain(selectedModel, configTier, models).filter((m) => m !== selectedModel)];
+  }
+  return buildFallbackChain(selectedModel, configTier, models);
+}
+
+function buildScoredDecision(
+  models: ModelsYaml,
+  mapping: { primary: string; configTier: 'fast' | 'medium' | 'large'; provider?: string },
+  selectedModel: string,
+  configTier: 'fast' | 'medium' | 'large',
+  fallbackChain: string[],
+  tierConfig: ModelsYaml['tiers']['fast'],
+  scoringResult: ScoringResult,
+  options?: { max_tokens?: number },
+): RouterDecision {
+  const provider = mapping.provider;
+  const modelConfig = models.models[selectedModel];
+
+  logger.info(
+    {
+      tier: scoringResult.tier,
+      model: selectedModel,
+      provider: provider || 'ollama',
+      score: scoringResult.score.toFixed(4),
+      confidence: scoringResult.confidence.toFixed(3),
+      reason: scoringResult.reason,
+    },
+    'Dynamic routing decision via request scorer',
+  );
+
+  return {
+    model: selectedModel,
+    provider,
+    fallback_chain: fallbackChain,
+    tier: configTier,
+    prompt_template: 'default',
+    temperature: 0.7,
+    max_tokens: options?.max_tokens ?? modelConfig?.max_tokens_default ?? 2048,
+    output_format: 'text',
+    requires_fact_check: false,
+    validators: [],
+    ollama_base_url: models.ollama_base_url,
+    timeout_ms: tierConfig.timeout_ms,
+    scoringResult,
+  };
+}
+
 /**
  * Dynamic routing based on the 23-dimension request scorer.
  * Use this alongside the static `route()` function — both coexist.
@@ -226,60 +302,13 @@ export function routeByScore(
   const mapping = TIER_MODEL_MAP[scoringResult.tier];
   const selectedModel = mapping.primary;
   const configTier = mapping.configTier;
-  const provider = mapping.provider;
   const tierConfig = models.tiers[configTier];
 
   if (!tierConfig) {
     logger.error({ tier: configTier }, 'Tier config not found in models.yaml, falling back to medium');
-    const fallbackTierConfig = models.tiers['medium']!;
-    return {
-      model: 'qwen2.5:14b',
-      fallback_chain: buildFallbackChain('qwen2.5:14b', 'medium', models),
-      tier: 'medium',
-      prompt_template: 'default',
-      temperature: 0.7,
-      max_tokens: options?.max_tokens ?? 2048,
-      output_format: 'text',
-      requires_fact_check: false,
-      validators: [],
-      ollama_base_url: models.ollama_base_url,
-      timeout_ms: fallbackTierConfig.timeout_ms,
-      scoringResult,
-    };
+    return buildMediumTierFallback(models, options, scoringResult);
   }
 
-  // For reasoning/code_generation tiers, put the primary model first, then fallbacks
-  const fallbackChain = (scoringResult.tier === 'reasoning' || scoringResult.tier === 'code_generation')
-    ? [selectedModel, ...buildFallbackChain(selectedModel, configTier, models).filter((m) => m !== selectedModel)]
-    : buildFallbackChain(selectedModel, configTier, models);
-
-  const modelConfig = models.models[selectedModel];
-
-  logger.info(
-    {
-      tier: scoringResult.tier,
-      model: selectedModel,
-      provider: provider || 'ollama',
-      score: scoringResult.score.toFixed(4),
-      confidence: scoringResult.confidence.toFixed(3),
-      reason: scoringResult.reason,
-    },
-    'Dynamic routing decision via request scorer',
-  );
-
-  return {
-    model: selectedModel,
-    provider,
-    fallback_chain: fallbackChain,
-    tier: configTier,
-    prompt_template: 'default',
-    temperature: 0.7,
-    max_tokens: options?.max_tokens ?? modelConfig?.max_tokens_default ?? 2048,
-    output_format: 'text',
-    requires_fact_check: false,
-    validators: [],
-    ollama_base_url: models.ollama_base_url,
-    timeout_ms: tierConfig.timeout_ms,
-    scoringResult,
-  };
+  const fallbackChain = buildScoredFallbackChain(scoringResult.tier, selectedModel, configTier, models);
+  return buildScoredDecision(models, mapping, selectedModel, configTier, fallbackChain, tierConfig, scoringResult, options);
 }
diff --git a/packages/gateway/src/routes/completion.ts b/packages/gateway/src/routes/completion.ts
index 4c53cd7..95f1946 100644
--- a/packages/gateway/src/routes/completion.ts
+++ b/packages/gateway/src/routes/completion.ts
@@ -111,377 +111,183 @@ type CompletionRequest = z.infer<typeof CompletionRequestSchema>;
 //   }
 // }
 
-export async function completionRoute(fastify: FastifyInstance): Promise<void> {
-  fastify.post(
-    '/completion',
-    {
-      config: { rateLimit: false }, // Custom rate limiting via caller
-    },
-    async (request: FastifyRequest, reply: FastifyReply) => {
-      const startMs = Date.now();
+async function classifyAndRoute(taskType: string | undefined, caller: string, input: string, options: CompletionRequest['options']): Promise<{ taskType: string; decision: ReturnType<typeof route>; classificationResult?: unknown }> {
+  let resolved = taskType;
+  let classificationResult;
+  if (!resolved) {
+    try {
+      classificationResult = await classifyInput(input);
+      resolved = classificationResult.task_type;
+    } catch (err) {
+      logger.warn({ err }, 'Pre-classifier failed');
+      resolved = 'generic_qa';
+    }
+  }
 
-      let body: CompletionRequest;
-      try {
-        body = CompletionRequestSchema.parse(request.body);
-      } catch (err) {
-        return reply.status(400).send({
-          statusCode: 400,
-          error: 'Bad Request',
-          message: err instanceof z.ZodError ? err.errors[0]?.message ?? 'Invalid request' : 'Invalid request body',
-        });
-      }
+  let decision;
+  try {
+    decision = route(resolved, caller, { model: options?.model, temperature: options?.temperature, max_tokens: options?.max_tokens });
+  } catch (err) {
+    throw new Error(err instanceof Error ? err.message : 'Failed to route request');
+  }
 
-      const { caller, input, language, context, options } = body;
-      const returnValidationDetails = options?.return_validation_details ?? false;
-
-      // Stage 2: ShieldX scan (real library, 547+ rules, sub-millisecond)
-      // TODO: Enable ShieldX when dependency is properly linked
-      // if (!SKIP_SHIELDX_CALLERS.has(caller)) {
-      //   const shieldResult = await runShieldXScan(input, caller);
-      //   if (!shieldResult.passed) {
-      //     requestsTotal.labels({ caller, task_type: 'unknown', status: 'rejected' }).inc();
-      //     return reply.status(400).send({
-      //       statusCode: 400,
-      //       error: 'Rejected',
-      //       message: shieldResult.reason ?? 'Input rejected by security scan',
-      //       threat_level: shieldResult.threatLevel,
-      //       kill_chain_phase: shieldResult.phase,
-      //       shieldx_latency_ms: shieldResult.latencyMs,
-      //     });
-      //   }
-      // }
-
-      // Generate call ID early for tracking (used by instrumented LLM client)
-      const callId = `call-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
-
-      // Stage 3: Pre-classifier
-      let taskType = body.task_type;
-      let classificationResult;
-      if (!taskType) {
-        try {
-          classificationResult = await classifyInput(input);
-          taskType = classificationResult.task_type;
-        } catch (err) {
-          logger.warn({ err }, 'Pre-classifier failed');
-          taskType = 'generic_qa';
-        }
-      }
-
-      // Stage 4: Router
-      let decision;
-      try {
-        decision = route(taskType, caller, {
-          model: options?.model,
-          temperature: options?.temperature,
-          max_tokens: options?.max_tokens,
-        });
-      } catch (err) {
-        return reply.status(400).send({
-          statusCode: 400,
-          error: 'Routing Error',
-          message: err instanceof Error ? err.message : 'Failed to route request',
-        });
-      }
-
-      // Stage 5: Prompt assembly
-      // Use taskType directly for template lookup (so tip_transceiver_enrich.yaml is used,
-      // not the generic_qa fallback from routing). The router only selects the model.
-      //
-      // Variable resolution strategy:
-      // 1. Explicit context fields take priority (callers can pass structured data)
-      // 2. `input` is used as fallback for ALL common content variables so simple
-      //    one-field callers work without knowing each template's specific var name.
-      const contextVars = context
-        ? Object.fromEntries(Object.entries(context).map(([k, v]) => [k, v as string]))
-        : {};
-
-      // Common content variable names across all 59 templates — all default to `input`
-      const inputAliases: Record<string, string> = {
-        source_data: input, ocr_text: input, transcription: input,
-        ticket_content: input, alert_data: input, incident_data: input,
-        lldp_data: input, cve_data: input, inventory: input,
-        anomaly_data: input, flagged_input: input, attack_description: input,
-        bgp_data: input, health_checks: input, market_data: input,
-        manuscript_text: input, raw_content: input, content: input,
-        // Additional structured vars with sensible fallbacks
-        peeringdb_data: input, bgp_routes: input, network_context: input,
-        alert_context: input, affected_inventory: input,
-      };
-
-      const resolved = resolvePrompt(
-        taskType ?? decision.prompt_template,
-        {
-          ...inputAliases,    // low priority: input as fallback for all content vars
-          ...contextVars,     // medium priority: explicit context fields override aliases
-          input,              // always available as {{input}}
-          user_context: context,
-        },
-        language ?? 'en',
-      );
-
-      // Stage 6: LLM call (external provider or Ollama with circuit breaker + retry)
-      let ollamaResponse;
-      try {
-        const format: '' | 'json' | undefined = decision.output_format === 'json' ? 'json' : '';
-
-        const baseReq = {
-          model: decision.model,
-          prompt: resolved.prompt,
-          system: resolved.system,
-          options: {
-            temperature: decision.temperature,
-            num_predict: decision.max_tokens,
-          },
-          format,
-          stream: false,
-          callId,
-          taskType,
-        };
-
-        if (decision.provider) {
-          // Route to external provider as primary (e.g. OpenAI Codex)
-          ollamaResponse = await callExternalProviderPrimaryInstrumented(
-            baseReq,
-            decision.provider,
-            decision.tier,
-            decision.fallback_chain,
-            callId,
-            taskType,
-          );
-        } else {
-          // Route to Ollama with fallback chain
-          ollamaResponse = await callOllamaWithFallbackChainInstrumented(
-            baseReq,
-            decision.fallback_chain,
-            decision.tier,
-            callId,
-            taskType,
-          );
-        }
-      } catch (err) {
-        const latency = Date.now() - startMs;
-        logger.error({ err, caller, taskType }, 'Ollama call failed');
-        requestsTotal.labels({ caller, task_type: taskType, status: 'rejected' }).inc();
-        latencySeconds.labels({ caller, task_type: taskType, model: decision.model }).observe(latency / 1000);
-
-        // Log error to dashboard
-        const db = getPool();
-        const requestLogger = createRequestLogger(db);
-        const errorMessage = err instanceof Error ? err.message : 'LLM service unavailable';
-        void requestLogger.logRequest(
-          callId,
-          caller,
-          taskType,
-          decision.model,
-          'error',
-          0,
-          0,
-          0,
-          latency,
-          0,
-          false,
-          errorMessage
-        );
-
-        return reply.status(503).send({
-          statusCode: 503,
-          error: 'Service Unavailable',
-          message: 'LLM service unavailable, please retry',
-        });
-      }
-
-      const outputText = ollamaResponse.response;
-      const latencyMs = Date.now() - startMs;
-
-      // Stage 7: Post-validation chain
-      const validationOutput = await runPostValidation(outputText, {
-        validators: decision.validators,
-        language,
-        output_format: decision.output_format,
-        requires_fact_check: decision.requires_fact_check,
-        schema: resolved.schema,
-      });
-
-      // Stage 8: Confidence gate
-      const confidenceResult = evaluateConfidence(validationOutput);
-
-      // Record metrics
-      requestsTotal.labels({ caller, task_type: taskType, status: confidenceResult.status }).inc();
-      latencySeconds.labels({ caller, task_type: taskType, model: ollamaResponse.model ?? decision.model }).observe(latencyMs / 1000);
-      tokensTotal.labels({ direction: 'in', model: decision.model }).inc(ollamaResponse.prompt_eval_count ?? 0);
-      tokensTotal.labels({ direction: 'out', model: decision.model }).inc(ollamaResponse.eval_count ?? 0);
-      confidenceScore.labels({ task_type: taskType, model: decision.model }).observe(confidenceResult.score);
-
-      // Record ban hits in metrics
-      for (const violation of validationOutput.ban_violations) {
-        banlistHitsTotal.labels({ term: violation.term, language: violation.language, category: violation.category }).inc();
-      }
-
-      // Record validation failures
-      for (const result of validationOutput.results) {
-        if (!result.passed) {
-          validationFailuresTotal.labels({ validator: result.validator, task_type: taskType }).inc();
-        }
-      }
-
-      // Stage 9: Audit log
-      const inputHash = hashText(input);
-      const outputHash = hashText(outputText);
-
-      await writeAuditLog({
-        caller,
-        task_type: taskType,
-        model_used: decision.model,
-        prompt_id: resolved.prompt_id,
-        prompt_version: resolved.prompt_version,
-        input_hash: inputHash,
-        output_text: confidenceResult.status !== 'pending_review' ? outputText : undefined,
-        output_hash: outputHash,
-        token_count_in: ollamaResponse.prompt_eval_count ?? 0,
-        token_count_out: ollamaResponse.eval_count ?? 0,
-        latency_ms: latencyMs,
-        confidence: confidenceResult.score,
-        status: confidenceResult.status,
-        validation_log: validationOutput.results,
-        ban_hits: validationOutput.ban_violations,
-        metadata: {
-          classification: classificationResult,
-          model_tier: decision.tier,
-          fallback_used: ollamaResponse.model !== decision.model,
-        },
-      });
-
-      // Write ban analytics
-      if (validationOutput.ban_violations.length > 0 && callId) {
-        void writeBanAnalytics(callId, validationOutput.ban_violations, caller, taskType);
-      }
-
-      // Add to review queue if pending_review
-      if (confidenceResult.status === 'pending_review' && callId) {
-        void addToReviewQueue({
-          callId,
-          caller,
-          taskType,
-          inputText: input,
-          outputText,
-          confidence: confidenceResult.score,
-          validationLog: validationOutput.results,
-        });
-      }
-
-      // Track cost and compression metrics
-      let costUsd = 0;
-      let costSavedUsd = 0;
-      if (callId) {
-        const db = getPool();
-        const tokensIn = ollamaResponse.prompt_eval_count ?? 0;
-        const tokensOut = ollamaResponse.eval_count ?? 0;
-        const tokensCompressed = tokensIn + tokensOut; // TODO: actual compression from RTK layer
-        costUsd = calculateCost(decision.model, tokensIn, tokensOut);
-        costSavedUsd = calculateSavings(decision.model, tokensCompressed, tokensCompressed); // 0 until RTK compression data available
-
-        void logCostImpact(
-          db,
-          callId,
-          {
-            callId,
-            agent: 'gateway',
-            model: decision.model,
-            project: 'llm-gateway',
-            taskType: taskType ?? 'generic',
-          },
-          tokensIn,
-          tokensOut,
-          tokensCompressed,
-          costUsd,
-          costSavedUsd,
-          confidenceResult.score,
-        );
-
-        // Record routing decision for learning engine
-        void recordRoutingDecision({
-          callId,
-          taskType: taskType ?? 'generic',
-          caller,
-          routingModel: decision.model,
-          routingTier: decision.tier,
-          actualModelUsed: ollamaResponse.model ?? decision.model,
-          wasFallback: ollamaResponse.model !== decision.model,
-          success: confidenceResult.status === 'approved',
-          confidenceFinal: confidenceResult.score,
-          tokensIn,
-          tokensOut,
-          latencyMs,
-          costUsd,
-        });
-
-        // Broadcast real-time update to connected SSE clients
-        costStream.broadcast({
-          callId,
-          project: 'llm-gateway',
-          taskType: taskType ?? 'generic',
-          model: decision.model,
-          costUsd,
-          costSavedUsd,
-          tokensIn,
-          tokensOut,
-          confidence: confidenceResult.score,
-          timestamp: new Date().toISOString(),
-        });
-
-        // Log request to dashboard
-        const requestLogger = createRequestLogger(db);
-        void requestLogger.logRequest(
-          callId,
-          caller,
-          taskType,
-          decision.model,
-          confidenceResult.status as 'approved' | 'warning' | 'pending_review' | 'rejected' | 'error',
-          tokensIn,
-          tokensOut,
-          costUsd,
-          latencyMs,
-          confidenceResult.score,
-          ollamaResponse.model !== decision.model,
-          undefined // No error message for successful requests
-        );
-      }
-
-      // Stage 10: Response
-      const responseBody: Record<string, unknown> = {
-        id: callId,
-        status: confidenceResult.status,
-        confidence: Math.round(confidenceResult.score * 100) / 100,
-        model: decision.model,
-        task_type: taskType,
-        latency_ms: latencyMs,
-        tokens: {
-          in: ollamaResponse.prompt_eval_count ?? 0,
-          out: ollamaResponse.eval_count ?? 0,
-        },
-        cost: {
-          usd: costUsd,
-          saved_usd: costSavedUsd,
-        },
-      };
-
-      if (confidenceResult.status !== 'pending_review') {
-        responseBody['output'] = outputText;
-      } else {
-        responseBody['output'] = null;
-        responseBody['message'] = 'Output is pending human review due to low confidence';
-      }
-
-      if (returnValidationDetails) {
-        responseBody['validation'] = validationOutput.results;
-        responseBody['confidence_detail'] = {
-          base_score: confidenceResult.base_score,
-          total_impact: confidenceResult.total_impact,
-          final_score: confidenceResult.score,
-        };
-      }
-
-      return reply.status(200).send(responseBody);
-    },
-  );
+  return { taskType: resolved, decision, classificationResult };
+}
+
+function buildPromptVariables(input: string, context: Record<string, unknown> | undefined): Record<string, unknown> & { input: string } {
+  const contextVars = context ? Object.fromEntries(Object.entries(context).map(([k, v]) => [k, v as string])) : {};
+  const inputAliases: Record<string, string> = {
+    source_data: input, ocr_text: input, transcription: input, ticket_content: input, alert_data: input,
+    incident_data: input, lldp_data: input, cve_data: input, inventory: input, anomaly_data: input,
+    flagged_input: input, attack_description: input, bgp_data: input, health_checks: input, market_data: input,
+    manuscript_text: input, raw_content: input, content: input, peeringdb_data: input, bgp_routes: input,
+    network_context: input, alert_context: input, affected_inventory: input,
+  };
+  return { ...inputAliases, ...contextVars, input, user_context: context };
+}
+
+async function callLLMWithFallback(baseReq: any, decision: ReturnType<typeof route>, callId: string, taskType: string): Promise<any> {
+  if (decision.provider) {
+    return await callExternalProviderPrimaryInstrumented(baseReq, decision.provider, decision.tier, decision.fallback_chain, callId, taskType);
+  }
+  return await callOllamaWithFallbackChainInstrumented(baseReq, decision.fallback_chain, decision.tier, callId, taskType);
+}
+
+function recordAllMetrics(caller: string, taskType: string, confidenceResult: any, ollamaResponse: any, decision: ReturnType<typeof route>, validationOutput: any): void {
+  requestsTotal.labels({ caller, task_type: taskType, status: confidenceResult.status }).inc();
+  latencySeconds.labels({ caller, task_type: taskType, model: ollamaResponse.model ?? decision.model }).observe(0);
+  tokensTotal.labels({ direction: 'in', model: decision.model }).inc(ollamaResponse.prompt_eval_count ?? 0);
+  tokensTotal.labels({ direction: 'out', model: decision.model }).inc(ollamaResponse.eval_count ?? 0);
+  confidenceScore.labels({ task_type: taskType, model: decision.model }).observe(confidenceResult.score);
+  for (const violation of validationOutput.ban_violations) {
+    banlistHitsTotal.labels({ term: violation.term, language: violation.language, category: violation.category }).inc();
+  }
+  for (const result of validationOutput.results) {
+    if (!result.passed) {
+      validationFailuresTotal.labels({ validator: result.validator, task_type: taskType }).inc();
+    }
+  }
+}
+
+async function auditAndTrackCosts(caller: string, taskType: string, input: string, outputText: string, latencyMs: number, ollamaResponse: any, resolved: any, decision: ReturnType<typeof route>, confidenceResult: any, validationOutput: any, classificationResult: any, callId: string): Promise<{ costUsd: number; costSavedUsd: number }> {
+  const inputHash = hashText(input);
+  const outputHash = hashText(outputText);
+
+  await writeAuditLog({
+    caller, task_type: taskType, model_used: decision.model, prompt_id: resolved.prompt_id, prompt_version: resolved.prompt_version,
+    input_hash: inputHash, output_text: confidenceResult.status !== 'pending_review' ? outputText : undefined, output_hash: outputHash,
+    token_count_in: ollamaResponse.prompt_eval_count ?? 0, token_count_out: ollamaResponse.eval_count ?? 0, latency_ms: latencyMs,
+    confidence: confidenceResult.score, status: confidenceResult.status, validation_log: validationOutput.results, ban_hits: validationOutput.ban_violations,
+    metadata: { classification: classificationResult, model_tier: decision.tier, fallback_used: ollamaResponse.model !== decision.model },
+  });
+
+  if (validationOutput.ban_violations.length > 0) {
+    void writeBanAnalytics(callId, validationOutput.ban_violations, caller, taskType);
+  }
+
+  if (confidenceResult.status === 'pending_review') {
+    void addToReviewQueue({ callId, caller, taskType, inputText: input, outputText, confidence: confidenceResult.score, validationLog: validationOutput.results });
+  }
+
+  const db = getPool();
+  const tokensIn = ollamaResponse.prompt_eval_count ?? 0;
+  const tokensOut = ollamaResponse.eval_count ?? 0;
+  const tokensCompressed = tokensIn + tokensOut;
+  const costUsd = calculateCost(decision.model, tokensIn, tokensOut);
+  const costSavedUsd = calculateSavings(decision.model, tokensCompressed, tokensCompressed);
+
+  void logCostImpact(db, callId, { callId, agent: 'gateway', model: decision.model, project: 'llm-gateway', taskType: taskType ?? 'generic' }, tokensIn, tokensOut, tokensCompressed, costUsd, costSavedUsd, confidenceResult.score);
+
+  void recordRoutingDecision({ callId, taskType: taskType ?? 'generic', caller, routingModel: decision.model, routingTier: decision.tier, actualModelUsed: ollamaResponse.model ?? decision.model, wasFallback: ollamaResponse.model !== decision.model, success: confidenceResult.status === 'approved', confidenceFinal: confidenceResult.score, tokensIn, tokensOut, latencyMs, costUsd });
+
+  costStream.broadcast({ callId, project: 'llm-gateway', taskType: taskType ?? 'generic', model: decision.model, costUsd, costSavedUsd, tokensIn, tokensOut, confidence: confidenceResult.score, timestamp: new Date().toISOString() });
+
+  const requestLogger = createRequestLogger(db);
+  void requestLogger.logRequest(callId, caller, taskType, decision.model, confidenceResult.status as 'approved' | 'warning' | 'pending_review' | 'rejected' | 'error', tokensIn, tokensOut, costUsd, latencyMs, confidenceResult.score, ollamaResponse.model !== decision.model, undefined);
+
+  return { costUsd, costSavedUsd };
+}
+
+function buildResponseBody(callId: string, decision: ReturnType<typeof route>, taskType: string, confidenceResult: any, outputText: string, latencyMs: number, ollamaResponse: any, costUsd: number, costSavedUsd: number, returnValidationDetails: boolean, validationOutput: any): Record<string, unknown> {
+  const body: Record<string, unknown> = {
+    id: callId, status: confidenceResult.status, confidence: Math.round(confidenceResult.score * 100) / 100,
+    model: decision.model, task_type: taskType, latency_ms: latencyMs,
+    tokens: { in: ollamaResponse.prompt_eval_count ?? 0, out: ollamaResponse.eval_count ?? 0 },
+    cost: { usd: costUsd, saved_usd: costSavedUsd },
+  };
+  if (confidenceResult.status !== 'pending_review') {
+    body['output'] = outputText;
+  } else {
+    body['output'] = null;
+    body['message'] = 'Output is pending human review due to low confidence';
+  }
+  if (returnValidationDetails) {
+    body['validation'] = validationOutput.results;
+    body['confidence_detail'] = { base_score: confidenceResult.base_score, total_impact: confidenceResult.total_impact, final_score: confidenceResult.score };
+  }
+  return body;
+}
+
+export async function completionRoute(fastify: FastifyInstance): Promise<void> {
+  fastify.post('/completion', { config: { rateLimit: false } }, async (request: FastifyRequest, reply: FastifyReply) => {
+    const startMs = Date.now();
+
+    let body: CompletionRequest;
+    try {
+      body = CompletionRequestSchema.parse(request.body);
+    } catch (err) {
+      return reply.status(400).send({
+        statusCode: 400, error: 'Bad Request',
+        message: err instanceof z.ZodError ? err.errors[0]?.message ?? 'Invalid request' : 'Invalid request body',
+      });
+    }
+
+    const { caller, input, language, context, options } = body;
+    const callId = `call-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
+
+    let classifAndRoute;
+    try {
+      classifAndRoute = await classifyAndRoute(body.task_type, caller, input, options);
+    } catch (err) {
+      return reply.status(400).send({
+        statusCode: 400, error: 'Routing Error',
+        message: err instanceof Error ? err.message : 'Failed to route request',
+      });
+    }
+
+    const { taskType, decision, classificationResult } = classifAndRoute;
+    const promptVars = buildPromptVariables(input, context);
+    const resolved = resolvePrompt(taskType ?? decision.prompt_template, promptVars, language ?? 'en');
+
+    const format: '' | 'json' | undefined = decision.output_format === 'json' ? 'json' : '';
+    const baseReq = { model: decision.model, prompt: resolved.prompt, system: resolved.system, options: { temperature: decision.temperature, num_predict: decision.max_tokens }, format, stream: false, callId, taskType };
+
+    let ollamaResponse;
+    try {
+      ollamaResponse = await callLLMWithFallback(baseReq, decision, callId, taskType);
+    } catch (err) {
+      const latency = Date.now() - startMs;
+      logger.error({ err, caller, taskType }, 'Ollama call failed');
+      requestsTotal.labels({ caller, task_type: taskType, status: 'rejected' }).inc();
+      latencySeconds.labels({ caller, task_type: taskType, model: decision.model }).observe(latency / 1000);
+      const db = getPool();
+      const requestLogger = createRequestLogger(db);
+      void requestLogger.logRequest(callId, caller, taskType, decision.model, 'error', 0, 0, 0, latency, 0, false, err instanceof Error ? err.message : 'LLM service unavailable');
+      return reply.status(503).send({ statusCode: 503, error: 'Service Unavailable', message: 'LLM service unavailable, please retry' });
+    }
+
+    const latencyMs = Date.now() - startMs;
+    const outputText = ollamaResponse.response;
+    const validationOutput = await runPostValidation(outputText, { validators: decision.validators, language, output_format: decision.output_format, requires_fact_check: decision.requires_fact_check, schema: resolved.schema });
+    const confidenceResult = evaluateConfidence(validationOutput);
+
+    recordAllMetrics(caller, taskType, confidenceResult, ollamaResponse, decision, validationOutput);
+    const { costUsd, costSavedUsd } = await auditAndTrackCosts(caller, taskType, input, outputText, latencyMs, ollamaResponse, resolved, decision, confidenceResult, validationOutput, classificationResult, callId);
+
+    // Fix latency observation after computation
+    latencySeconds.labels({ caller, task_type: taskType, model: ollamaResponse.model ?? decision.model }).observe(latencyMs / 1000);
+
+    const responseBody = buildResponseBody(callId, decision, taskType, confidenceResult, outputText, latencyMs, ollamaResponse, costUsd, costSavedUsd, options?.return_validation_details ?? false, validationOutput);
+    return reply.status(200).send(responseBody);
+  });
 }
diff --git a/packages/gateway/src/routes/dashboard.ts b/packages/gateway/src/routes/dashboard.ts
index 194338a..1874821 100644
--- a/packages/gateway/src/routes/dashboard.ts
+++ b/packages/gateway/src/routes/dashboard.ts
@@ -3,6 +3,7 @@ import { getPool } from '../db/client.js';
 import { logger } from '../observability/logger.js';
 import { createRequestLogger } from '../modules/request-logger.js';
 import { globalRequestStream } from '../modules/request-stream.js';
+import { getAvailableProviders } from '../pipeline/external-providers.js';
 
 interface DashboardSummary {
   totalCost: number;
@@ -494,6 +495,78 @@ export async function dashboardRoute(fastify: FastifyInstance): Promise<void> {
     return reply.send({ test: 'ok', message: 'Test endpoint is working' });
   });
 
+  // Providers endpoint - lists all available LLM providers (local, subscription, free-tier)
+  fastify.get('/api/dashboard/providers', async (_request: FastifyRequest, reply: FastifyReply) => {
+    try {
+      const availableProviders = await getAvailableProviders();
+
+      // Categorize providers by type
+      const providers = availableProviders.map(provider => {
+        let type: 'local' | 'subscription' | 'free' = 'free';
+        let status: 'configured' | 'unconfigured' | 'unavailable' = 'unconfigured';
+
+        // Determine provider type based on name
+        if (provider.name.toLowerCase().includes('ollama')) {
+          type = 'local';
+          status = provider.enabled ? 'configured' : 'unconfigured';
+        } else if (['claude-bridge', 'openai-bridge', 'chatgpt-bridge', 'copilot-bridge'].includes(provider.name)) {
+          type = 'subscription';
+          status = provider.enabled && process.env[provider.envKey] ? 'configured' : 'unconfigured';
+        } else {
+          type = 'free';
+          status = provider.enabled && process.env[provider.envKey] ? 'configured' : 'unconfigured';
+        }
+
+        return {
+          name: provider.name,
+          type,
+          status,
+          enabled: provider.enabled,
+          models: provider.models.map(m => ({
+            id: m.id,
+            tier: m.tier,
+            contextLength: m.contextLength
+          })),
+          rateLimitRpm: provider.rateLimitRpm,
+          baseUrl: provider.baseUrl
+        };
+      });
+
+      // Group by type for easy UI rendering
+      const grouped = {
+        local: providers.filter(p => p.type === 'local'),
+        subscription: providers.filter(p => p.type === 'subscription'),
+        free: providers.filter(p => p.type === 'free')
+      };
+
+      return reply.send({
+        success: true,
+        data: {
+          grouped,
+          all: providers,
+          summary: {
+            totalProviders: providers.length,
+            configuredCount: providers.filter(p => p.status === 'configured').length,
+            byType: {
+              local: grouped.local.length,
+              subscription: grouped.subscription.length,
+              free: grouped.free.length
+            }
+          }
+        },
+        meta: {
+          timestamp: new Date().toISOString()
+        }
+      });
+    } catch (error) {
+      logger.error({ error }, 'Failed to fetch providers');
+      return reply.status(500).send({
+        success: false,
+        error: 'Failed to fetch provider information'
+      });
+    }
+  });
+
   // Dashboard UI endpoint (served at /api/dashboard/index for Cloudflare tunnel compatibility)
   fastify.get('/api/dashboard/index', async (_request: FastifyRequest, reply: FastifyReply) => {
     try {