fix: serialize Claude API calls via queue to prevent 429 rate-limit spam

Tier-1 Anthropic API has 40K TPM — with ~20K tokens per pipeline step,
concurrent calls immediately hit the limit. enqueueClaude() serializes
all generateClaude() calls so only one runs at a time, eliminating
the flood of 429-retry-429-retry loops.
This commit is contained in:
Rene Fichtmueller 2026-04-06 00:57:03 +02:00
parent cf04549b1b
commit b6928265bf

View File

@ -31,6 +31,16 @@ function sleep(ms: number): Promise<void> {
// ANTHROPIC CLAUDE PROVIDER
// ═══════════════════════════════════════════════════════
// Serialize Claude API calls to stay within TPM limits
// Tier-1 has 40,000 TPM — with ~20K tokens/step, only 1 concurrent call safe
let claudeQueue: Promise<unknown> = Promise.resolve();
function enqueueClaude<T>(fn: () => Promise<T>): Promise<T> {
const result = claudeQueue.then(() => fn());
claudeQueue = result.catch(() => {});
return result;
}
async function generateClaude(
systemPrompt: string,
userPrompt: string,
@ -40,6 +50,7 @@ async function generateClaude(
throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider");
}
return enqueueClaude(async () => {
const startTime = Date.now();
const resp = await fetch("https://api.anthropic.com/v1/messages", {
@ -90,6 +101,7 @@ async function generateClaude(
totalDuration: duration * 1_000_000, // ns for compat
evalCount: data.usage.output_tokens,
};
}); // end enqueueClaude
}
// ═══════════════════════════════════════════════════════