Rene Fichtmueller 10d13633fb feat: dynamic hype cycle + market signal engine + eBay/CapEx panels
6-source composite Market Signal Score (0-100) per transceiver technology.
New GET /api/hype-cycle/market-signals blends: Norton-Bass hype_score,
hyperscaler CapEx YoY (MSFT +68.8%, GOOG +107%, META +46.8%), price
observation activity ratio 30d vs prior 30d, AI cluster transceiver demand,
eBay secondary market sell-through velocity, internal fast-mover trend.
All 6 queries run in parallel via Promise.all().

Recommendation engine maps hype phase × capex boom × speed class →
Buy/Hold/Watch labels with tooltips. Dashboard Hype Cycle table now shows
Market Signal ● LIVE column + Recommendation column. Hyperscaler CapEx
panel + eBay panel added to hype tab. Procurement: new eBay Market section.
Sourcing Hype Cycle replaced hardcoded seed with live price observation data.
2026-05-14 16:17:52 +02:00

494 lines
19 KiB
TypeScript

/**
* LLM client for blog generation — supports Ollama (local), Anthropic Claude (API),
* and Claude-Code (flat-rate via claude-bridge on Erik).
*
* Provider selection:
* BLOG_LLM_PROVIDER=claude-code → Claude via claude-bridge (flat-rate, recommended)
* BLOG_LLM_PROVIDER=anthropic → Claude Sonnet/Haiku via Anthropic API
* BLOG_LLM_PROVIDER=ollama → local adapter bridge / Ollama-compatible endpoint (default)
*
* Claude-code is preferred: uses Claude Code subscription (flat-rate), no API costs.
* The default local blog model is the latest RunPod-trained FO_BlogLLM adapter.
*/
import { existsSync, readFileSync, writeFileSync } from "fs";
import { join } from "path";
const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434";
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || "";
const ANTHROPIC_MODEL = process.env.ANTHROPIC_MODEL || "claude-sonnet-4-20250514";
const CLAUDE_BRIDGE_URL = process.env.CLAUDE_BRIDGE_URL || "http://localhost:3250";
// ── Runtime-switchable provider state ──────────────────────────────────────
// Reads from /opt/tip/blog-llm-settings.json if present (written by /api/blog/llm/switch).
// Falls back to process.env, then to defaults. No restart required for switches.
//
// AUTO-DISCOVERY: At startup and on a periodic refresh, the active fo-blog-v* model
// is validated against Ollama's actual model list. If the configured model no longer
// exists (e.g. Magatama trained a new version and Ollama removed older tags), the
// highest available fo-blog-v* version is picked automatically — no manual env or
// settings-file update needed after each training cycle.
const SETTINGS_FILE = join(process.env.TIP_ROOT || "/opt/tip", "blog-llm-settings.json");
const STATIC_FALLBACK_MODEL = "fo-blog-v10";
const DISCOVERY_REFRESH_MS = Number.parseInt(process.env.BLOG_LLM_DISCOVERY_REFRESH_MS || "", 10) || 10 * 60_000;
interface LlmSettings { provider: string; ollamaModel: string }
function loadSettingsRaw(): LlmSettings {
try {
if (existsSync(SETTINGS_FILE)) {
const raw = JSON.parse(readFileSync(SETTINGS_FILE, "utf8")) as Partial<LlmSettings>;
return {
provider: raw.provider || process.env.BLOG_LLM_PROVIDER || "ollama",
ollamaModel: raw.ollamaModel || process.env.OLLAMA_LLM_MODEL || STATIC_FALLBACK_MODEL,
};
}
} catch { /* ignore corrupt file */ }
return {
provider: process.env.BLOG_LLM_PROVIDER || "ollama",
ollamaModel: process.env.OLLAMA_LLM_MODEL || STATIC_FALLBACK_MODEL,
};
}
/**
* Sort fo-blog-v{N}[-r{M}] tags newest-first.
*
* Magatama convention (confirmed by /api/llm/status?lane=fo_blogllm):
* - `fo-blog-vN` ← base tag, the active production model after adoption
* - `fo-blog-vN-rM` ← revision metadata, intermediate adapter save
*
* So within the same major N, the BASE tag wins over -rM revisions.
*
* Order: higher N > lower N; within same N, base ("no -r") > any -rM revision.
*/
function compareFoBlogVersionsDesc(a: string, b: string): number {
const re = /^fo-blog-v(\d+)(?:-r(\d+))?$/;
const ma = re.exec(a);
const mb = re.exec(b);
if (!ma || !mb) return a.localeCompare(b);
const va = Number.parseInt(ma[1], 10);
const vb = Number.parseInt(mb[1], 10);
if (va !== vb) return vb - va;
// Same major version: base tag (no -r suffix) wins over any -rM revision
const aIsBase = ma[2] === undefined;
const bIsBase = mb[2] === undefined;
if (aIsBase !== bIsBase) return aIsBase ? -1 : 1;
// Both have -rM: higher M is newer
const ra = ma[2] ? Number.parseInt(ma[2], 10) : 0;
const rb = mb[2] ? Number.parseInt(mb[2], 10) : 0;
return rb - ra;
}
interface OllamaTag { name: string }
interface OllamaTagsResponse { models: OllamaTag[] }
/** Probe Ollama for available fo-blog-v* models. Returns [] on any error (non-fatal). */
async function fetchOllamaFoBlogTags(): Promise<string[]> {
try {
const resp = await fetch(`${OLLAMA_URL}/api/tags`, { signal: AbortSignal.timeout(5000) });
if (!resp.ok) return [];
const data = await resp.json() as OllamaTagsResponse;
return (data.models || [])
.map(m => m.name.replace(/:latest$/, ""))
.filter(n => /^fo-blog-v\d+(?:-r\d+)?$/.test(n));
} catch {
return [];
}
}
/**
* Reconcile configured model against Ollama reality.
*
* Priority:
* 1. Configured model (env or settings file) — if Ollama actually serves it
* 2. Highest fo-blog-v* version Ollama actually serves — auto-discovered
* 3. Static fallback STATIC_FALLBACK_MODEL — last resort
*
* Non-blocking: any Ollama failure leaves _settings untouched.
*/
async function reconcileWithOllama(): Promise<void> {
const configured = _settings.ollamaModel;
if (!configured.startsWith("fo-blog-v")) return; // only manage fo-blog-* lane
const available = await fetchOllamaFoBlogTags();
if (available.length === 0) return;
if (available.includes(configured)) return; // configured model still exists
const sorted = [...available].sort(compareFoBlogVersionsDesc);
const winner = sorted[0];
if (!winner || winner === configured) return;
console.log(`[LLM] auto-discovery: configured "${configured}" not in Ollama; switching to latest available "${winner}" (candidates: ${sorted.join(", ")})`);
_settings = { ..._settings, ollamaModel: winner };
try { writeFileSync(SETTINGS_FILE, JSON.stringify(_settings, null, 2), "utf8"); } catch { /* non-fatal */ }
}
let _settings = loadSettingsRaw();
// Fire-and-forget initial reconciliation. Subsequent refresh runs every DISCOVERY_REFRESH_MS.
void reconcileWithOllama();
setInterval(() => { void reconcileWithOllama(); }, DISCOVERY_REFRESH_MS).unref();
/** Switch the active LLM provider at runtime. Persists to settings file. */
export function setLlmProvider(provider: string, ollamaModel?: string): void {
_settings = { provider, ollamaModel: ollamaModel || _settings.ollamaModel };
try { writeFileSync(SETTINGS_FILE, JSON.stringify(_settings, null, 2), "utf8"); } catch { /* non-fatal */ }
console.log(`[LLM] Provider switched → ${provider}${ollamaModel ? ` (${ollamaModel})` : ""}`);
}
/** Returns the currently active provider config. */
export function getLlmProvider(): LlmSettings { return { ..._settings }; }
/**
* Force an immediate auto-discovery reconciliation against Ollama.
* Returns the active settings after reconcile.
*/
export async function refreshLlmAutoDiscovery(): Promise<LlmSettings> {
await reconcileWithOllama();
return { ..._settings };
}
// Convenience getters used below (re-read on every call for zero-latency switch)
function provider(): string { return _settings.provider; }
function llmModel(): string { return _settings.ollamaModel; }
interface LlmResponse {
text: string;
model: string;
totalDuration: number;
evalCount: number;
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// ═══════════════════════════════════════════════════════
// ANTHROPIC CLAUDE PROVIDER
// ═══════════════════════════════════════════════════════
// Serialize Claude API calls to stay within TPM limits
// Tier-1 has 40,000 TPM — with ~20K tokens/step, only 1 concurrent call safe
let claudeQueue: Promise<unknown> = Promise.resolve();
let claudeQueueEnqueueTime = 0;
export function resetClaudeQueue(): void {
claudeQueue = Promise.resolve();
claudeQueueEnqueueTime = 0;
console.log("[LLM] Claude queue reset — previous stuck requests cleared");
}
function enqueueClaude<T>(fn: () => Promise<T>): Promise<T> {
claudeQueueEnqueueTime = Date.now();
const result = claudeQueue.then(() => {
// Auto-reset if queue has been stalled > 15 minutes (prevents deadlock on stuck requests)
if (Date.now() - claudeQueueEnqueueTime > 900000) {
console.warn("[LLM] Claude queue auto-reset after 15min stall");
return Promise.reject(new Error("Claude queue auto-reset: previous request timed out"));
}
return fn();
});
claudeQueue = result.catch(() => {});
return result;
}
// Direct API call without going through the serialization queue — used for 429 retries
// to avoid the circular-promise deadlock that recursive enqueueClaude creates
async function claudeApiCall(
systemPrompt: string,
userPrompt: string,
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
retryCount = 0,
): Promise<LlmResponse> {
const startTime = Date.now();
const MAX_RETRIES = 3;
const RETRY_DELAYS = [10000, 30000, 60000];
const resp = await fetch("https://api.anthropic.com/v1/messages", {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-api-key": ANTHROPIC_API_KEY,
"anthropic-version": "2023-06-01",
},
body: JSON.stringify({
model: ANTHROPIC_MODEL,
max_tokens: options?.maxTokens ?? 4096,
temperature: options?.temperature ?? 0.7,
system: systemPrompt,
messages: [{ role: "user", content: userPrompt }],
}),
signal: AbortSignal.timeout(options?.timeoutMs ?? 300000),
});
if (!resp.ok) {
const errText = await resp.text();
if (resp.status === 429 && retryCount < MAX_RETRIES) {
const delay = RETRY_DELAYS[retryCount] ?? 60000;
console.log(`[LLM] Claude 429 — retrying in ${delay / 1000}s (attempt ${retryCount + 1}/${MAX_RETRIES})...`);
await sleep(delay);
return claudeApiCall(systemPrompt, userPrompt, options, retryCount + 1);
}
throw new Error(`Claude API failed: ${resp.status} ${errText.slice(0, 200)}`);
}
const data = await resp.json() as {
content: Array<{ type: string; text: string }>;
model: string;
usage: { input_tokens: number; output_tokens: number };
};
const text = data.content
.filter((c) => c.type === "text")
.map((c) => c.text)
.join("");
const duration = Date.now() - startTime;
console.log(`[LLM] Claude ${data.model}: ${data.usage.input_tokens}+${data.usage.output_tokens} tokens, ${duration}ms`);
return {
text,
model: data.model,
totalDuration: duration * 1_000_000, // ns for compat
evalCount: data.usage.output_tokens,
};
}
async function generateClaude(
systemPrompt: string,
userPrompt: string,
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
): Promise<LlmResponse> {
if (!ANTHROPIC_API_KEY) {
throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider");
}
// Use enqueueClaude for serialization, but call claudeApiCall (not generateClaude)
// for retries to avoid circular-promise deadlock
return enqueueClaude(() => claudeApiCall(systemPrompt, userPrompt, options));
}
// ═══════════════════════════════════════════════════════
// OLLAMA PROVIDER (existing)
// ═══════════════════════════════════════════════════════
let ollamaQueue: Promise<unknown> = Promise.resolve();
let queueDepth = 0;
let lastQueueEnqueueTime = 0;
export function resetOllamaQueue(): void {
ollamaQueue = Promise.resolve();
queueDepth = 0;
console.log("[LLM] Queue reset — previous stuck requests cleared");
}
export function getQueueDepth(): number { return queueDepth; }
function enqueueOllama<T>(fn: () => Promise<T>): Promise<T> {
queueDepth++;
lastQueueEnqueueTime = Date.now();
const result = ollamaQueue.then(() => {
if (Date.now() - lastQueueEnqueueTime > 900000) {
console.warn("[LLM] Queue auto-reset after 15min stall");
queueDepth = Math.max(0, queueDepth - 1);
return Promise.reject(new Error("Queue auto-reset: previous request timed out"));
}
return fn();
});
ollamaQueue = result.catch(() => {}).then(() => { queueDepth = Math.max(0, queueDepth - 1); });
return result;
}
async function generateOllama(
systemPrompt: string,
userPrompt: string,
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
): Promise<LlmResponse> {
return enqueueOllama(async () => {
const RETRY_DELAYS = [15000, 30000, 60000];
for (let attempt = 0; attempt <= RETRY_DELAYS.length; attempt++) {
if (attempt > 0) {
const delay = RETRY_DELAYS[attempt - 1];
console.log(`Blog LLM: 429 rate-limit — retrying in ${delay / 1000}s (attempt ${attempt}/${RETRY_DELAYS.length})`);
await sleep(delay);
}
const resp = await fetch(`${OLLAMA_URL}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: llmModel(),
prompt: userPrompt,
system: systemPrompt,
stream: false,
options: {
temperature: options?.temperature ?? 0.7,
num_predict: options?.maxTokens ?? 4096,
},
}),
signal: AbortSignal.timeout(options?.timeoutMs ?? 300000),
});
if (resp.status === 429) {
if (attempt < RETRY_DELAYS.length) continue;
throw new Error(`Ollama generate failed: 429 Too Many Requests (all retries exhausted)`);
}
if (!resp.ok) {
const errText = await resp.text();
throw new Error(`Ollama generate failed: ${resp.status} ${errText}`);
}
const data = await resp.json() as {
response: string;
model: string;
total_duration: number;
eval_count: number;
};
return {
text: data.response,
model: data.model,
totalDuration: data.total_duration,
evalCount: data.eval_count,
};
}
throw new Error("Ollama generate: unreachable");
});
}
// ═══════════════════════════════════════════════════════
// CLAUDE-CODE PROVIDER (claude-bridge — flat-rate via Claude Code subscription)
// ═══════════════════════════════════════════════════════
async function generateClaudeBridge(
systemPrompt: string,
userPrompt: string,
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
): Promise<LlmResponse> {
const startTime = Date.now();
// claude-bridge expects combined prompt — system + user joined with double newline
const fullPrompt = `${systemPrompt}\n\n${userPrompt}`;
const resp = await fetch(`${CLAUDE_BRIDGE_URL}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ prompt: fullPrompt }),
signal: AbortSignal.timeout(options?.timeoutMs ?? 300000),
});
if (!resp.ok) {
const errText = await resp.text();
throw new Error(`Claude bridge failed: ${resp.status} ${errText.slice(0, 200)}`);
}
const data = await resp.json() as { success: boolean; content?: string; error?: string };
if (!data.success || !data.content) {
throw new Error(`Claude bridge returned empty response: ${JSON.stringify(data)}`);
}
const duration = Date.now() - startTime;
console.log(`[LLM] Claude-bridge: ${data.content.length} chars, ${duration}ms`);
return {
text: data.content,
model: "claude-code",
totalDuration: duration * 1_000_000, // ns for compat with Ollama callers
evalCount: Math.ceil(data.content.length / 4), // approx tokens
};
}
// ═══════════════════════════════════════════════════════
// PUBLIC API — auto-routes to configured provider
// ═══════════════════════════════════════════════════════
export async function generate(
systemPrompt: string,
userPrompt: string,
options?: { temperature?: number; maxTokens?: number; timeoutMs?: number },
): Promise<LlmResponse> {
if (provider() === "claude-code") {
return generateClaudeBridge(systemPrompt, userPrompt, options);
}
if (provider() === "anthropic" && ANTHROPIC_API_KEY) {
return generateClaude(systemPrompt, userPrompt, options);
}
return generateOllama(systemPrompt, userPrompt, options);
}
/** Chat-style generation with message history (Ollama only for now) */
export async function chat(
messages: ReadonlyArray<{ role: "system" | "user" | "assistant"; content: string }>,
options?: { temperature?: number; maxTokens?: number },
): Promise<LlmResponse> {
return enqueueOllama(async () => {
const resp = await fetch(`${OLLAMA_URL}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: llmModel(),
messages,
stream: false,
options: {
temperature: options?.temperature ?? 0.7,
num_predict: options?.maxTokens ?? 4096,
},
}),
signal: AbortSignal.timeout(300000),
});
if (!resp.ok) {
const errText = await resp.text();
throw new Error(`Ollama chat failed: ${resp.status} ${errText}`);
}
const data = await resp.json() as {
message: { content: string };
model: string;
total_duration: number;
eval_count: number;
};
return {
text: data.message.content,
model: data.model,
totalDuration: data.total_duration,
evalCount: data.eval_count,
};
});
}
/** Check if configured LLM provider is available */
export async function checkHealth(): Promise<{ ok: boolean; model: string; provider: string; error?: string }> {
const p = provider();
const m = llmModel();
if (p === "claude-code") {
try {
const resp = await fetch(`${CLAUDE_BRIDGE_URL}/health`, { signal: AbortSignal.timeout(5000) });
if (!resp.ok) return { ok: false, model: "claude-code", provider: "claude-code", error: `HTTP ${resp.status}` };
return { ok: true, model: "claude-code", provider: "claude-code" };
} catch (err) {
return { ok: false, model: "claude-code", provider: "claude-code", error: (err as Error).message };
}
}
if (p === "anthropic" && ANTHROPIC_API_KEY) {
return { ok: true, model: ANTHROPIC_MODEL, provider: "anthropic" };
}
try {
const resp = await fetch(`${OLLAMA_URL}/api/tags`, { signal: AbortSignal.timeout(5000) });
if (!resp.ok) return { ok: false, model: m, provider: "ollama", error: `HTTP ${resp.status}` };
const data = await resp.json() as { models: Array<{ name: string }> };
const hasModel = data.models.some((tag) => tag.name.includes(m.split(":")[0]));
return { ok: hasModel, model: m, provider: "ollama", error: hasModel ? undefined : `Model ${m} not found` };
} catch (err) {
return { ok: false, model: m, provider: "ollama", error: (err as Error).message };
}
}