/** * Multi-Model Race Mode * * Sends the same prompt to N models in parallel and returns according to * the chosen strategy: * * • 'first' — first non-error response wins. Cancels in-flight losers. * • 'best' — wait for all (or timeout), pick highest confidence score. * • 'consensus' — wait for all, return majority answer + agreement score. * * All candidate runs are audited to `race_mode_results` for analysis — * which model is actually fastest, which gives the highest confidence, etc. */ import type { Pool } from 'pg'; import { logger } from '../observability/logger.js'; export type RaceStrategy = 'first' | 'best' | 'consensus'; export interface RaceCandidateResult { model: string; status: 'ok' | 'error'; output?: string; confidence?: number; cost?: number; latencyMs: number; errorMessage?: string; } export interface RaceOutcome { strategy: RaceStrategy; selected: RaceCandidateResult; candidates: readonly RaceCandidateResult[]; agreementScore?: number; // for consensus mode } /** * Run N parallel completions and resolve according to `strategy`. * The `runner` callback is responsible for actually invoking the gateway * pipeline — this module is strategy-only and stays decoupled. */ export async function runRace( models: readonly string[], runner: (model: string, signal: AbortSignal) => Promise, strategy: RaceStrategy, opts: { timeoutMs?: number } = {} ): Promise<{ outcome: RaceOutcome; results: R[] }> { if (models.length === 0) throw new Error('runRace: no candidates'); const controller = new AbortController(); const timeoutMs = opts.timeoutMs ?? 60_000; const timeout = setTimeout(() => controller.abort(), timeoutMs); const promises: Array> = models.map((model) => runner(model, controller.signal).catch( (err): R => ({ model, status: 'error', errorMessage: err instanceof Error ? err.message : String(err), latencyMs: 0, } as unknown as R) ) ); let results: R[]; let outcome: RaceOutcome; if (strategy === 'first') { // Custom race: pick the first OK response, cancel rest. const firstOk = await new Promise((resolve, reject) => { let pending = promises.length; let firstError: R | null = null; promises.forEach((p) => { p.then((r) => { if (r.status === 'ok') { resolve(r); } else { if (!firstError) firstError = r; pending -= 1; if (pending === 0) reject(new Error('all candidates errored')); } }); }); // Backstop on overall timeout setTimeout(() => { if (firstError) resolve(firstError); else reject(new Error('race timeout')); }, timeoutMs); }); results = await Promise.all(promises); controller.abort(); outcome = { strategy, selected: firstOk, candidates: results }; } else if (strategy === 'best') { results = await Promise.all(promises); const ok = results.filter((r) => r.status === 'ok'); const winner = ok.length > 0 ? ok.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))[0] : results[0]; outcome = { strategy, selected: winner, candidates: results }; } else { // 'consensus' — group identical normalised outputs, pick majority results = await Promise.all(promises); const ok = results.filter((r) => r.status === 'ok'); const buckets = new Map(); for (const r of ok) { const key = (r.output ?? '').trim().toLowerCase().replace(/\s+/g, ' ').slice(0, 256); const arr = buckets.get(key); if (arr) arr.push(r); else buckets.set(key, [r]); } const sorted = [...buckets.entries()].sort((a, b) => b[1].length - a[1].length); const winnerBucket = sorted[0]?.[1]; const winner = winnerBucket && winnerBucket.length > 0 ? winnerBucket.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))[0] : results[0]; const agreementScore = ok.length > 0 ? (winnerBucket?.length ?? 0) / ok.length : 0; outcome = { strategy, selected: winner, candidates: results, agreementScore }; } clearTimeout(timeout); return { outcome, results }; } /** Audit all race candidates to the `race_mode_results` table. */ export async function auditRaceResults( db: Pool, callId: string, callerId: string, taskType: string, outcome: RaceOutcome ): Promise { const firstFinishedModel = outcome.strategy === 'first' ? outcome.selected.model : outcome.candidates.reduce( (best: RaceCandidateResult, c: RaceCandidateResult) => c.status === 'ok' && c.latencyMs < (best.latencyMs || Infinity) ? c : best, outcome.candidates[0] ).model; for (const c of outcome.candidates) { try { await db.query( ` INSERT INTO race_mode_results ( call_id, caller_id, task_type, strategy, candidate_model, finished_first, selected, latency_ms, confidence, cost_usd, error_message, output_preview ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) `, [ callId, callerId.toLowerCase(), taskType, outcome.strategy, c.model, c.model === firstFinishedModel, c.model === outcome.selected.model, c.latencyMs, c.confidence ?? null, c.cost ?? null, c.errorMessage ?? null, c.output?.slice(0, 512) ?? null, ] ); } catch (err) { logger.warn({ err, model: c.model }, 'race-mode: audit insert failed'); } } } /** Aggregate race statistics for the dashboard. */ export async function getRaceStats( db: Pool, hoursBack: number = 24 ): Promise<{ totalRaces: number; byStrategy: Record; fastestModel: { model: string; wins: number } | null; highestConfidenceModel: { model: string; avg: number } | null; }> { try { const [total, byStrategy, fastest, byConfidence] = await Promise.all([ db.query( `SELECT COUNT(DISTINCT call_id)::INT AS n FROM race_mode_results WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1)`, [hoursBack] ), db.query( `SELECT strategy, COUNT(DISTINCT call_id)::INT AS n FROM race_mode_results WHERE created_at > NOW() - MAKE_INTERVAL(hours => $1) GROUP BY strategy`, [hoursBack] ), db.query( `SELECT candidate_model AS model, COUNT(*)::INT AS wins FROM race_mode_results WHERE finished_first = true AND created_at > NOW() - MAKE_INTERVAL(hours => $1) GROUP BY candidate_model ORDER BY wins DESC LIMIT 1`, [hoursBack] ), db.query( `SELECT candidate_model AS model, AVG(confidence)::NUMERIC(4,2) AS avg FROM race_mode_results WHERE confidence IS NOT NULL AND created_at > NOW() - MAKE_INTERVAL(hours => $1) GROUP BY candidate_model ORDER BY avg DESC LIMIT 1`, [hoursBack] ), ]); const byStrategyMap: Record = {}; for (const row of byStrategy.rows) byStrategyMap[row.strategy] = parseInt(row.n, 10) || 0; return { totalRaces: parseInt(total.rows[0]?.n ?? '0', 10), byStrategy: byStrategyMap, fastestModel: fastest.rows[0] ? { model: fastest.rows[0].model, wins: parseInt(fastest.rows[0].wins, 10) } : null, highestConfidenceModel: byConfidence.rows[0] ? { model: byConfidence.rows[0].model, avg: parseFloat(byConfidence.rows[0].avg) } : null, }; } catch (err) { logger.warn({ err }, 'race-mode: stats failed (table missing?)'); return { totalRaces: 0, byStrategy: {}, fastestModel: null, highestConfidenceModel: null }; } }