New OEM transceiver seed scrapers (94 cron-scheduled, 24/7): - Media/Broadcast: Evertz, Grass Valley, Haivision, Viasat - Asian Optical: FiberHome, Oplink, Accelink, Hisense Broadband - Optical Mfrs: Lumentum, II-VI/Coherent, Source Photonics, O-Net, InnoLight, AOI, Sumitomo Electric, NeoPhotonics - Industrial: GE Grid, Schweitzer, Moxa Industrial, Cisco IE, Phoenix Contact, Beckhoff, Omron, ABB, Siemens, Schneider, Rockwell, Belden - Enterprise/DC: Arista, Pica8, Pluribus, DriveNets, Cisco (Meraki/Catalyst/Nexus/ASR) - Cloud: AWS, Azure, Google Cloud, Meta - Storage: NetApp, Pure Storage, HPE Storage, IBM Storage, Dell Storage, Hitachi Vantara - 5G/RAN: Samsung Networks, Nokia AirScale, Ericsson RAN, Mavenir - Security: Check Point, Barracuda, Fortinet, Palo Alto - Telecom Optical: ADVA, PacketLight, FiberHome, Accelink, Hisense API: tip-llm-guided inference layer (strict schema + repair-retry + safe fallback) - POST /api/tip-llm/infer|research-plan|extract|finding|health - Hard JSON schema enforcement, create_finding=false on empty evidence - Confidence gate (>= 0.4), validation with consistency check Build: added incremental=true to scraper tsconfig (OOM prevention) Scheduler: 87 → 94 registered workers
359 lines
13 KiB
TypeScript
359 lines
13 KiB
TypeScript
/**
|
|
* LLM client for blog generation — supports Ollama (local), Anthropic Claude (API),
|
|
* and Claude-Code (flat-rate via claude-bridge on Erik).
|
|
*
|
|
* Provider selection:
|
|
* BLOG_LLM_PROVIDER=claude-code → Claude via claude-bridge (flat-rate, recommended)
|
|
* BLOG_LLM_PROVIDER=anthropic → Claude Sonnet/Haiku via Anthropic API
|
|
* BLOG_LLM_PROVIDER=ollama → local adapter bridge / Ollama-compatible endpoint (default)
|
|
*
|
|
* Claude-code is preferred: uses Claude Code subscription (flat-rate), no API costs.
|
|
* The default local blog model is the latest RunPod-trained FO_BlogLLM adapter.
|
|
*/
|
|
|
|
const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434";
|
|
const LLM_MODEL = process.env.OLLAMA_LLM_MODEL || "fo-blog-v7";
|
|
|
|
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || "";
|
|
const ANTHROPIC_MODEL = process.env.ANTHROPIC_MODEL || "claude-sonnet-4-20250514";
|
|
const BLOG_LLM_PROVIDER = process.env.BLOG_LLM_PROVIDER || "ollama";
|
|
const CLAUDE_BRIDGE_URL = process.env.CLAUDE_BRIDGE_URL || "http://localhost:3250";
|
|
|
|
interface LlmResponse {
|
|
text: string;
|
|
model: string;
|
|
totalDuration: number;
|
|
evalCount: number;
|
|
}
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════
|
|
// ANTHROPIC CLAUDE PROVIDER
|
|
// ═══════════════════════════════════════════════════════
|
|
|
|
// Serialize Claude API calls to stay within TPM limits
|
|
// Tier-1 has 40,000 TPM — with ~20K tokens/step, only 1 concurrent call safe
|
|
let claudeQueue: Promise<unknown> = Promise.resolve();
|
|
let claudeQueueEnqueueTime = 0;
|
|
|
|
export function resetClaudeQueue(): void {
|
|
claudeQueue = Promise.resolve();
|
|
claudeQueueEnqueueTime = 0;
|
|
console.log("[LLM] Claude queue reset — previous stuck requests cleared");
|
|
}
|
|
|
|
function enqueueClaude<T>(fn: () => Promise<T>): Promise<T> {
|
|
claudeQueueEnqueueTime = Date.now();
|
|
const result = claudeQueue.then(() => {
|
|
// Auto-reset if queue has been stalled > 15 minutes (prevents deadlock on stuck requests)
|
|
if (Date.now() - claudeQueueEnqueueTime > 900000) {
|
|
console.warn("[LLM] Claude queue auto-reset after 15min stall");
|
|
return Promise.reject(new Error("Claude queue auto-reset: previous request timed out"));
|
|
}
|
|
return fn();
|
|
});
|
|
claudeQueue = result.catch(() => {});
|
|
return result;
|
|
}
|
|
|
|
// Direct API call without going through the serialization queue — used for 429 retries
|
|
// to avoid the circular-promise deadlock that recursive enqueueClaude creates
|
|
async function claudeApiCall(
|
|
systemPrompt: string,
|
|
userPrompt: string,
|
|
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
|
|
retryCount = 0,
|
|
): Promise<LlmResponse> {
|
|
const startTime = Date.now();
|
|
const MAX_RETRIES = 3;
|
|
const RETRY_DELAYS = [10000, 30000, 60000];
|
|
|
|
const resp = await fetch("https://api.anthropic.com/v1/messages", {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
"x-api-key": ANTHROPIC_API_KEY,
|
|
"anthropic-version": "2023-06-01",
|
|
},
|
|
body: JSON.stringify({
|
|
model: ANTHROPIC_MODEL,
|
|
max_tokens: options?.maxTokens ?? 4096,
|
|
temperature: options?.temperature ?? 0.7,
|
|
system: systemPrompt,
|
|
messages: [{ role: "user", content: userPrompt }],
|
|
}),
|
|
signal: AbortSignal.timeout(options?.timeoutMs ?? 300000),
|
|
});
|
|
|
|
if (!resp.ok) {
|
|
const errText = await resp.text();
|
|
if (resp.status === 429 && retryCount < MAX_RETRIES) {
|
|
const delay = RETRY_DELAYS[retryCount] ?? 60000;
|
|
console.log(`[LLM] Claude 429 — retrying in ${delay / 1000}s (attempt ${retryCount + 1}/${MAX_RETRIES})...`);
|
|
await sleep(delay);
|
|
return claudeApiCall(systemPrompt, userPrompt, options, retryCount + 1);
|
|
}
|
|
throw new Error(`Claude API failed: ${resp.status} ${errText.slice(0, 200)}`);
|
|
}
|
|
|
|
const data = await resp.json() as {
|
|
content: Array<{ type: string; text: string }>;
|
|
model: string;
|
|
usage: { input_tokens: number; output_tokens: number };
|
|
};
|
|
|
|
const text = data.content
|
|
.filter((c) => c.type === "text")
|
|
.map((c) => c.text)
|
|
.join("");
|
|
|
|
const duration = Date.now() - startTime;
|
|
console.log(`[LLM] Claude ${data.model}: ${data.usage.input_tokens}+${data.usage.output_tokens} tokens, ${duration}ms`);
|
|
|
|
return {
|
|
text,
|
|
model: data.model,
|
|
totalDuration: duration * 1_000_000, // ns for compat
|
|
evalCount: data.usage.output_tokens,
|
|
};
|
|
}
|
|
|
|
async function generateClaude(
|
|
systemPrompt: string,
|
|
userPrompt: string,
|
|
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
|
|
): Promise<LlmResponse> {
|
|
if (!ANTHROPIC_API_KEY) {
|
|
throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider");
|
|
}
|
|
// Use enqueueClaude for serialization, but call claudeApiCall (not generateClaude)
|
|
// for retries to avoid circular-promise deadlock
|
|
return enqueueClaude(() => claudeApiCall(systemPrompt, userPrompt, options));
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════
|
|
// OLLAMA PROVIDER (existing)
|
|
// ═══════════════════════════════════════════════════════
|
|
|
|
let ollamaQueue: Promise<unknown> = Promise.resolve();
|
|
let queueDepth = 0;
|
|
let lastQueueEnqueueTime = 0;
|
|
|
|
export function resetOllamaQueue(): void {
|
|
ollamaQueue = Promise.resolve();
|
|
queueDepth = 0;
|
|
console.log("[LLM] Queue reset — previous stuck requests cleared");
|
|
}
|
|
|
|
export function getQueueDepth(): number { return queueDepth; }
|
|
|
|
function enqueueOllama<T>(fn: () => Promise<T>): Promise<T> {
|
|
queueDepth++;
|
|
lastQueueEnqueueTime = Date.now();
|
|
const result = ollamaQueue.then(() => {
|
|
if (Date.now() - lastQueueEnqueueTime > 900000) {
|
|
console.warn("[LLM] Queue auto-reset after 15min stall");
|
|
queueDepth = Math.max(0, queueDepth - 1);
|
|
return Promise.reject(new Error("Queue auto-reset: previous request timed out"));
|
|
}
|
|
return fn();
|
|
});
|
|
ollamaQueue = result.catch(() => {}).then(() => { queueDepth = Math.max(0, queueDepth - 1); });
|
|
return result;
|
|
}
|
|
|
|
async function generateOllama(
|
|
systemPrompt: string,
|
|
userPrompt: string,
|
|
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
|
|
): Promise<LlmResponse> {
|
|
return enqueueOllama(async () => {
|
|
const RETRY_DELAYS = [15000, 30000, 60000];
|
|
|
|
for (let attempt = 0; attempt <= RETRY_DELAYS.length; attempt++) {
|
|
if (attempt > 0) {
|
|
const delay = RETRY_DELAYS[attempt - 1];
|
|
console.log(`Blog LLM: 429 rate-limit — retrying in ${delay / 1000}s (attempt ${attempt}/${RETRY_DELAYS.length})`);
|
|
await sleep(delay);
|
|
}
|
|
|
|
const resp = await fetch(`${OLLAMA_URL}/api/generate`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
model: LLM_MODEL,
|
|
prompt: userPrompt,
|
|
system: systemPrompt,
|
|
stream: false,
|
|
options: {
|
|
temperature: options?.temperature ?? 0.7,
|
|
num_predict: options?.maxTokens ?? 4096,
|
|
},
|
|
}),
|
|
signal: AbortSignal.timeout(options?.timeoutMs ?? 300000),
|
|
});
|
|
|
|
if (resp.status === 429) {
|
|
if (attempt < RETRY_DELAYS.length) continue;
|
|
throw new Error(`Ollama generate failed: 429 Too Many Requests (all retries exhausted)`);
|
|
}
|
|
|
|
if (!resp.ok) {
|
|
const errText = await resp.text();
|
|
throw new Error(`Ollama generate failed: ${resp.status} ${errText}`);
|
|
}
|
|
|
|
const data = await resp.json() as {
|
|
response: string;
|
|
model: string;
|
|
total_duration: number;
|
|
eval_count: number;
|
|
};
|
|
|
|
return {
|
|
text: data.response,
|
|
model: data.model,
|
|
totalDuration: data.total_duration,
|
|
evalCount: data.eval_count,
|
|
};
|
|
}
|
|
|
|
throw new Error("Ollama generate: unreachable");
|
|
});
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════
|
|
// CLAUDE-CODE PROVIDER (claude-bridge — flat-rate via Claude Code subscription)
|
|
// ═══════════════════════════════════════════════════════
|
|
|
|
async function generateClaudeBridge(
|
|
systemPrompt: string,
|
|
userPrompt: string,
|
|
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
|
|
): Promise<LlmResponse> {
|
|
const startTime = Date.now();
|
|
// claude-bridge expects combined prompt — system + user joined with double newline
|
|
const fullPrompt = `${systemPrompt}\n\n${userPrompt}`;
|
|
|
|
const resp = await fetch(`${CLAUDE_BRIDGE_URL}/api/generate`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ prompt: fullPrompt }),
|
|
signal: AbortSignal.timeout(options?.timeoutMs ?? 300000),
|
|
});
|
|
|
|
if (!resp.ok) {
|
|
const errText = await resp.text();
|
|
throw new Error(`Claude bridge failed: ${resp.status} ${errText.slice(0, 200)}`);
|
|
}
|
|
|
|
const data = await resp.json() as { success: boolean; content?: string; error?: string };
|
|
|
|
if (!data.success || !data.content) {
|
|
throw new Error(`Claude bridge returned empty response: ${JSON.stringify(data)}`);
|
|
}
|
|
|
|
const duration = Date.now() - startTime;
|
|
console.log(`[LLM] Claude-bridge: ${data.content.length} chars, ${duration}ms`);
|
|
|
|
return {
|
|
text: data.content,
|
|
model: "claude-code",
|
|
totalDuration: duration * 1_000_000, // ns for compat with Ollama callers
|
|
evalCount: Math.ceil(data.content.length / 4), // approx tokens
|
|
};
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════
|
|
// PUBLIC API — auto-routes to configured provider
|
|
// ═══════════════════════════════════════════════════════
|
|
|
|
export async function generate(
|
|
systemPrompt: string,
|
|
userPrompt: string,
|
|
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
|
|
): Promise<LlmResponse> {
|
|
if (BLOG_LLM_PROVIDER === "claude-code") {
|
|
return generateClaudeBridge(systemPrompt, userPrompt, options);
|
|
}
|
|
if (BLOG_LLM_PROVIDER === "anthropic" && ANTHROPIC_API_KEY) {
|
|
return generateClaude(systemPrompt, userPrompt, options);
|
|
}
|
|
return generateOllama(systemPrompt, userPrompt, options);
|
|
}
|
|
|
|
/** Chat-style generation with message history (Ollama only for now) */
|
|
export async function chat(
|
|
messages: ReadonlyArray<{ role: "system" | "user" | "assistant"; content: string }>,
|
|
options?: { temperature?: number; maxTokens?: number },
|
|
): Promise<LlmResponse> {
|
|
return enqueueOllama(async () => {
|
|
const resp = await fetch(`${OLLAMA_URL}/api/chat`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
model: LLM_MODEL,
|
|
messages,
|
|
stream: false,
|
|
options: {
|
|
temperature: options?.temperature ?? 0.7,
|
|
num_predict: options?.maxTokens ?? 4096,
|
|
},
|
|
}),
|
|
signal: AbortSignal.timeout(300000),
|
|
});
|
|
|
|
if (!resp.ok) {
|
|
const errText = await resp.text();
|
|
throw new Error(`Ollama chat failed: ${resp.status} ${errText}`);
|
|
}
|
|
|
|
const data = await resp.json() as {
|
|
message: { content: string };
|
|
model: string;
|
|
total_duration: number;
|
|
eval_count: number;
|
|
};
|
|
|
|
return {
|
|
text: data.message.content,
|
|
model: data.model,
|
|
totalDuration: data.total_duration,
|
|
evalCount: data.eval_count,
|
|
};
|
|
});
|
|
}
|
|
|
|
/** Check if configured LLM provider is available */
|
|
export async function checkHealth(): Promise<{ ok: boolean; model: string; provider: string; error?: string }> {
|
|
if (BLOG_LLM_PROVIDER === "claude-code") {
|
|
try {
|
|
const resp = await fetch(`${CLAUDE_BRIDGE_URL}/health`, { signal: AbortSignal.timeout(5000) });
|
|
if (!resp.ok) return { ok: false, model: "claude-code", provider: "claude-code", error: `HTTP ${resp.status}` };
|
|
return { ok: true, model: "claude-code", provider: "claude-code" };
|
|
} catch (err) {
|
|
return { ok: false, model: "claude-code", provider: "claude-code", error: (err as Error).message };
|
|
}
|
|
}
|
|
|
|
if (BLOG_LLM_PROVIDER === "anthropic" && ANTHROPIC_API_KEY) {
|
|
// Key presence check only — live API call causes 429 when pipeline is running
|
|
return { ok: true, model: ANTHROPIC_MODEL, provider: "anthropic" };
|
|
}
|
|
|
|
try {
|
|
const resp = await fetch(`${OLLAMA_URL}/api/tags`, { signal: AbortSignal.timeout(5000) });
|
|
if (!resp.ok) return { ok: false, model: LLM_MODEL, provider: "ollama", error: `HTTP ${resp.status}` };
|
|
|
|
const data = await resp.json() as { models: Array<{ name: string }> };
|
|
const hasModel = data.models.some((m) => m.name.includes(LLM_MODEL.split(":")[0]));
|
|
|
|
return { ok: hasModel, model: LLM_MODEL, provider: "ollama", error: hasModel ? undefined : `Model ${LLM_MODEL} not found` };
|
|
} catch (err) {
|
|
return { ok: false, model: LLM_MODEL, provider: "ollama", error: (err as Error).message };
|
|
}
|
|
}
|