fix: serialize Claude API calls via queue to prevent 429 rate-limit spam
Tier-1 Anthropic API has 40K TPM — with ~20K tokens per pipeline step, concurrent calls immediately hit the limit. enqueueClaude() serializes all generateClaude() calls so only one runs at a time, eliminating the flood of 429-retry-429-retry loops.
This commit is contained in:
parent
cf04549b1b
commit
b6928265bf
@ -31,6 +31,16 @@ function sleep(ms: number): Promise<void> {
|
||||
// ANTHROPIC CLAUDE PROVIDER
|
||||
// ═══════════════════════════════════════════════════════
|
||||
|
||||
// Serialize Claude API calls to stay within TPM limits
|
||||
// Tier-1 has 40,000 TPM — with ~20K tokens/step, only 1 concurrent call safe
|
||||
let claudeQueue: Promise<unknown> = Promise.resolve();
|
||||
|
||||
function enqueueClaude<T>(fn: () => Promise<T>): Promise<T> {
|
||||
const result = claudeQueue.then(() => fn());
|
||||
claudeQueue = result.catch(() => {});
|
||||
return result;
|
||||
}
|
||||
|
||||
async function generateClaude(
|
||||
systemPrompt: string,
|
||||
userPrompt: string,
|
||||
@ -40,6 +50,7 @@ async function generateClaude(
|
||||
throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider");
|
||||
}
|
||||
|
||||
return enqueueClaude(async () => {
|
||||
const startTime = Date.now();
|
||||
|
||||
const resp = await fetch("https://api.anthropic.com/v1/messages", {
|
||||
@ -90,6 +101,7 @@ async function generateClaude(
|
||||
totalDuration: duration * 1_000_000, // ns for compat
|
||||
evalCount: data.usage.output_tokens,
|
||||
};
|
||||
}); // end enqueueClaude
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user