fix: serialize Claude API calls via queue to prevent 429 rate-limit spam
Tier-1 Anthropic API has 40K TPM — with ~20K tokens per pipeline step, concurrent calls immediately hit the limit. enqueueClaude() serializes all generateClaude() calls so only one runs at a time, eliminating the flood of 429-retry-429-retry loops.
This commit is contained in:
parent
b2f3a4c450
commit
4e813024f1
@ -31,6 +31,16 @@ function sleep(ms: number): Promise<void> {
|
|||||||
// ANTHROPIC CLAUDE PROVIDER
|
// ANTHROPIC CLAUDE PROVIDER
|
||||||
// ═══════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
// Serialize Claude API calls to stay within TPM limits
|
||||||
|
// Tier-1 has 40,000 TPM — with ~20K tokens/step, only 1 concurrent call safe
|
||||||
|
let claudeQueue: Promise<unknown> = Promise.resolve();
|
||||||
|
|
||||||
|
function enqueueClaude<T>(fn: () => Promise<T>): Promise<T> {
|
||||||
|
const result = claudeQueue.then(() => fn());
|
||||||
|
claudeQueue = result.catch(() => {});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
async function generateClaude(
|
async function generateClaude(
|
||||||
systemPrompt: string,
|
systemPrompt: string,
|
||||||
userPrompt: string,
|
userPrompt: string,
|
||||||
@ -40,6 +50,7 @@ async function generateClaude(
|
|||||||
throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider");
|
throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return enqueueClaude(async () => {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
const resp = await fetch("https://api.anthropic.com/v1/messages", {
|
const resp = await fetch("https://api.anthropic.com/v1/messages", {
|
||||||
@ -90,6 +101,7 @@ async function generateClaude(
|
|||||||
totalDuration: duration * 1_000_000, // ns for compat
|
totalDuration: duration * 1_000_000, // ns for compat
|
||||||
evalCount: data.usage.output_tokens,
|
evalCount: data.usage.output_tokens,
|
||||||
};
|
};
|
||||||
|
}); // end enqueueClaude
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user