fix: serialize Claude API calls via queue to prevent 429 rate-limit spam

Tier-1 Anthropic API has 40K TPM — with ~20K tokens per pipeline step,
concurrent calls immediately hit the limit. enqueueClaude() serializes
all generateClaude() calls so only one runs at a time, eliminating
the flood of 429-retry-429-retry loops.
This commit is contained in:
Rene Fichtmueller 2026-04-06 00:57:03 +02:00
parent b2f3a4c450
commit 4e813024f1

View File

@ -31,6 +31,16 @@ function sleep(ms: number): Promise<void> {
// ANTHROPIC CLAUDE PROVIDER // ANTHROPIC CLAUDE PROVIDER
// ═══════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════
// Serialize Claude API calls to stay within TPM limits
// Tier-1 has 40,000 TPM — with ~20K tokens/step, only 1 concurrent call safe
let claudeQueue: Promise<unknown> = Promise.resolve();
function enqueueClaude<T>(fn: () => Promise<T>): Promise<T> {
const result = claudeQueue.then(() => fn());
claudeQueue = result.catch(() => {});
return result;
}
async function generateClaude( async function generateClaude(
systemPrompt: string, systemPrompt: string,
userPrompt: string, userPrompt: string,
@ -40,6 +50,7 @@ async function generateClaude(
throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider"); throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider");
} }
return enqueueClaude(async () => {
const startTime = Date.now(); const startTime = Date.now();
const resp = await fetch("https://api.anthropic.com/v1/messages", { const resp = await fetch("https://api.anthropic.com/v1/messages", {
@ -90,6 +101,7 @@ async function generateClaude(
totalDuration: duration * 1_000_000, // ns for compat totalDuration: duration * 1_000_000, // ns for compat
evalCount: data.usage.output_tokens, evalCount: data.usage.output_tokens,
}; };
}); // end enqueueClaude
} }
// ═══════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════