Rene Fichtmueller 773fd368e0 fix: parse DATABASE_URL in pool clients + extend Ollama health timeout to 15s
Gateway and learning DB clients now prefer DATABASE_URL connection string
over individual DB_* env vars — matches ecosystem.config.cjs convention.
Ollama health check timeout increased 5→15s for Cloudflare tunnel latency.
2026-04-02 23:03:31 +02:00

132 lines
4.3 KiB
TypeScript

import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { getOllamaBaseUrl } from '../pipeline/router.js';
import { getAllBreakerStates } from '../circuit-breaker/ollama-breaker.js';
import { query } from '../db/client.js';
import { getPgBoss } from '../queue/pg-boss-client.js';
import { logger } from '../observability/logger.js';
interface HealthStatus {
status: 'ok' | 'degraded' | 'down';
timestamp: string;
checks: {
ollama: { status: 'ok' | 'down'; latency_ms?: number; error?: string };
database: { status: 'ok' | 'down'; error?: string };
queue: { status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string };
review_queue: { unreviewed_count: number };
circuit_breakers: Record<string, 'closed' | 'open' | 'half-open'>;
};
}
async function checkOllama(baseUrl: string): Promise<{ status: 'ok' | 'down'; latency_ms?: number; error?: string }> {
const start = Date.now();
try {
const response = await fetch(`${baseUrl}/api/tags`, {
signal: AbortSignal.timeout(15000),
});
const latency_ms = Date.now() - start;
if (!response.ok) {
return { status: 'down', error: `HTTP ${response.status}`, latency_ms };
}
return { status: 'ok', latency_ms };
} catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
}
}
async function checkDatabase(): Promise<{ status: 'ok' | 'down'; error?: string }> {
try {
await query('SELECT 1');
return { status: 'ok' };
} catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
}
}
async function checkQueue(): Promise<{ status: 'ok' | 'down' | 'unknown'; depth?: number; error?: string }> {
const boss = getPgBoss();
if (!boss) return { status: 'unknown' };
try {
const [queued, active] = await Promise.all([
boss.getQueueSize('llm-batch', { before: 'completed' }),
boss.getQueueSize('llm-batch', { before: 'active' }),
]);
return { status: 'ok', depth: (queued ?? 0) + (active ?? 0) };
} catch (err) {
return { status: 'down', error: err instanceof Error ? err.message : 'Unknown error' };
}
}
async function getReviewQueueCount(): Promise<number> {
try {
const result = await query<{ count: string }>(
'SELECT COUNT(*) as count FROM review_queue WHERE decision IS NULL',
);
return parseInt(result.rows[0]?.count ?? '0', 10);
} catch {
return 0;
}
}
export async function healthRoute(fastify: FastifyInstance): Promise<void> {
fastify.get(
'/health',
async (_request: FastifyRequest, reply: FastifyReply) => {
const ollamaBaseUrl = getOllamaBaseUrl();
const [ollamaCheck, dbCheck, queueCheck, reviewCount] = await Promise.all([
checkOllama(ollamaBaseUrl),
checkDatabase(),
checkQueue(),
getReviewQueueCount(),
]);
const breakerStates = getAllBreakerStates();
const isDown = ollamaCheck.status === 'down' || dbCheck.status === 'down';
const isDegraded = queueCheck.status === 'down' || Object.values(breakerStates).some((s) => s === 'open');
const status: HealthStatus['status'] = isDown ? 'down' : isDegraded ? 'degraded' : 'ok';
const health: HealthStatus = {
status,
timestamp: new Date().toISOString(),
checks: {
ollama: ollamaCheck,
database: dbCheck,
queue: queueCheck,
review_queue: { unreviewed_count: reviewCount },
circuit_breakers: breakerStates,
},
};
const statusCode = isDown ? 503 : 200;
if (status !== 'ok') {
logger.warn({ status, checks: health.checks }, 'Health check degraded');
}
return reply.status(statusCode).send(health);
},
);
// Kubernetes-style liveness probe (minimal check)
fastify.get(
'/health/live',
async (_request: FastifyRequest, reply: FastifyReply) => {
return reply.send({ status: 'alive', ts: Date.now() });
},
);
// Kubernetes-style readiness probe
fastify.get(
'/health/ready',
async (_request: FastifyRequest, reply: FastifyReply) => {
const dbCheck = await checkDatabase();
if (dbCheck.status === 'down') {
return reply.status(503).send({ status: 'not ready', reason: 'database unavailable' });
}
return reply.send({ status: 'ready' });
},
);
}