diff --git a/packages/api/src/llm/blog-prompts.ts b/packages/api/src/llm/blog-prompts.ts index d55e00f..6044325 100644 --- a/packages/api/src/llm/blog-prompts.ts +++ b/packages/api/src/llm/blog-prompts.ts @@ -447,6 +447,122 @@ Another example: Do NOT turn this into marketing content. Keep the engineer voice. Return the complete article with the notes added.`; +// ═══════════════════════════════════════════════════════ +// VIRAL & SIGNAL PASS — Flexoptix Social Masterfile v1.0 +// ═══════════════════════════════════════════════════════ + +/** + * Applied AFTER quality control. Transforms technically correct content + * into content that engineers share. Based on field-tested patterns + * from LinkedIn posts with highest engagement. + * + * Core principle: observation > explanation, clarity > completeness + */ +export const VIRAL_SIGNAL_PROMPT = `Transform this article for maximum engineer engagement. + +You are applying the FLEXOPTIX SOCIAL MASTERFILE — a content framework built from analyzing +which technical posts engineers actually save, share, and comment on. + +CORE DNA (non-negotiable): +- observation > explanation +- clarity > completeness +- truth > marketing +- One core truth per article. Everything else supports it or gets cut. + +═══ STEP 1: EXTRACT CORE TRUTH ═══ +Identify the ONE sentence that captures the article's core insight. +This must be observational, not explanatory. +GOOD: "nothing broke. you just lost the margin." +BAD: "proper validation is essential for successful deployments." + +═══ STEP 2: FIX THE HEADLINE ═══ +The headline must stop someone mid-scroll. +PATTERNS THAT WORK: +- "X isn't the problem" +- "everything works. until it doesn't" +- "same X. different result" +- "the part nobody tells you" +KILL: guide, overview, deep dive, analysis, comprehensive, understanding + +═══ STEP 3: FIX THE HOOK (first 2-3 sentences) ═══ +Must create immediate recognition or tension. Max 2-3 short sentences. +HOOK TYPES: +- Contradiction: "everything looks fine. until it doesn't." +- Blame shift: "everyone blames the optics. they're wrong." +- Experience trigger: "you've seen this before." +- Hidden truth: "this isn't in the datasheet." +AUTO-KILL openers: "In today's...", "As technology...", "This article...", "With increasing..." + +═══ STEP 4: KILL REPETITION ═══ +If an idea appears more than once — cut every repetition. +One statement per idea. Trust the reader. + +═══ STEP 5: KILL EXPLANATION BLOAT ═══ +Senior engineers don't over-explain. They observe. +- Remove "this means that..." +- Remove "it is important to understand..." +- Remove "proper validation is essential" +- Show, don't tell. If you need to explain why something matters, the writing is too weak. + +═══ STEP 6: ADD STORY MOMENT ═══ +The article needs at least one moment that feels like "I've been there." +- Something worked, then drifted +- Wrong blame happened +- Realization came late +DO NOT announce it ("imagine a scenario", "let's say", "here is an example"). +Just drop into it. If you have to announce a story, it's already weak. + +═══ STEP 7: NUMBERS AS PUNCHLINES ONLY ═══ +Remove ALL numbers that don't change understanding. +No wattage. No budgets. No specs. UNLESS the number IS the punchline. +GOOD: "829,518 ROAs. 1,554 ASPAs. Do the math." +BAD: "A typical DR4 consumes approximately 12W of power per port." + +═══ STEP 8: CHECK SIGNAL SCORE ═══ +Does this sound like a senior engineer? Check: +- Calm authority (no drama, no "recipe for disaster", no "harsh truth") +- Brevity (fewer words, more certain) +- Observational truth (show behaviors, not theory) +- No need to impress (simple words, clear statements) +- No over-explaining (leave gaps, imply, trust the reader) + +═══ STEP 9: CARRY LINE ═══ +The article MUST have one line people remember and quote. +Examples that work: +- "everything looks fine. until it doesn't." +- "same optics. same setup. different result." +- "nothing broke. you just lost the margin." +- "at 100g, you get away with it. at 400g, you don't." +If no carry line exists — create one. Build the article around it. + +═══ STEP 10: FINAL AUTO-KILL ═══ +DELETE any sentence containing: +- "in today's world", "this article explains", "best practices" +- "it is important to note", "proper validation", "in conclusion" +- "significant impact", "increasing demand", "recipe for disaster" +- "let me tell you", "this is critically important" +- "what do you think?", "let me know", "thoughts?" + +═══ LINKEDIN POST GENERATION ═══ +Also generate a standalone LinkedIn post (separate from the blog). +Structure: + hook (1-2 lines, stop the scroll) + situation (2-3 lines) + problem (2-3 lines) + wrong blame (1-2 lines) + shift (1-2 lines) + carry line (1 line) +Rules: +- No paragraphs longer than 2-3 lines +- No emojis +- Max 3 hashtags at the end +- Must stand alone without the blog + +Return: +1. The improved article (complete markdown) +2. A separator line "---LINKEDIN---" +3. The LinkedIn post`; + // ═══════════════════════════════════════════════════════ // TOPIC PROMPT BUILDER — Injects context data // ═══════════════════════════════════════════════════════ diff --git a/packages/api/src/llm/client.ts b/packages/api/src/llm/client.ts index 34261b0..794771d 100644 --- a/packages/api/src/llm/client.ts +++ b/packages/api/src/llm/client.ts @@ -1,14 +1,21 @@ /** - * Ollama LLM client for blog generation and content enhancement. + * LLM client for blog generation — supports Ollama (local) and Anthropic Claude (API). * - * Uses qwen2.5:14b on Mac Studio (.213) for text generation. - * Supports streaming and non-streaming modes. - * Includes 429 retry with exponential backoff + server-side concurrency guard. + * Provider selection: + * BLOG_LLM_PROVIDER=anthropic → Claude Sonnet/Haiku via Anthropic API + * BLOG_LLM_PROVIDER=ollama → qwen2.5 on local Ollama (default) + * + * Claude is strongly recommended for blog generation — qwen2.5:14b cannot + * follow complex multi-constraint prompts (mode collapse). */ const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434"; const LLM_MODEL = process.env.OLLAMA_LLM_MODEL || "qwen2.5:14b"; +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || ""; +const ANTHROPIC_MODEL = process.env.ANTHROPIC_MODEL || "claude-sonnet-4-20250514"; +const BLOG_LLM_PROVIDER = process.env.BLOG_LLM_PROVIDER || "ollama"; + interface LlmResponse { text: string; model: string; @@ -16,20 +23,83 @@ interface LlmResponse { evalCount: number; } -/** Sleep helper */ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } -/** - * Server-side concurrency guard — Ollama processes one generation at a time. - * Queue ensures sequential execution even with multiple concurrent API requests. - */ +// ═══════════════════════════════════════════════════════ +// ANTHROPIC CLAUDE PROVIDER +// ═══════════════════════════════════════════════════════ + +async function generateClaude( + systemPrompt: string, + userPrompt: string, + options?: { temperature?: number; maxTokens?: number; timeoutMs?: number }, +): Promise { + if (!ANTHROPIC_API_KEY) { + throw new Error("ANTHROPIC_API_KEY not set — cannot use Claude provider"); + } + + const startTime = Date.now(); + + const resp = await fetch("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": ANTHROPIC_API_KEY, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: ANTHROPIC_MODEL, + max_tokens: options?.maxTokens ?? 4096, + temperature: options?.temperature ?? 0.7, + system: systemPrompt, + messages: [{ role: "user", content: userPrompt }], + }), + signal: AbortSignal.timeout(options?.timeoutMs ?? 300000), + }); + + if (!resp.ok) { + const errText = await resp.text(); + // Rate limit retry + if (resp.status === 429) { + console.log("[LLM] Claude 429 — retrying in 10s..."); + await sleep(10000); + return generateClaude(systemPrompt, userPrompt, options); + } + throw new Error(`Claude API failed: ${resp.status} ${errText.slice(0, 200)}`); + } + + const data = await resp.json() as { + content: Array<{ type: string; text: string }>; + model: string; + usage: { input_tokens: number; output_tokens: number }; + }; + + const text = data.content + .filter((c) => c.type === "text") + .map((c) => c.text) + .join(""); + + const duration = Date.now() - startTime; + console.log(`[LLM] Claude ${data.model}: ${data.usage.input_tokens}+${data.usage.output_tokens} tokens, ${duration}ms`); + + return { + text, + model: data.model, + totalDuration: duration * 1_000_000, // ns for compat + evalCount: data.usage.output_tokens, + }; +} + +// ═══════════════════════════════════════════════════════ +// OLLAMA PROVIDER (existing) +// ═══════════════════════════════════════════════════════ + let ollamaQueue: Promise = Promise.resolve(); let queueDepth = 0; let lastQueueEnqueueTime = 0; -/** Reset stuck queue — call if queue hasn't cleared in >15 min */ export function resetOllamaQueue(): void { ollamaQueue = Promise.resolve(); queueDepth = 0; @@ -42,7 +112,6 @@ function enqueueOllama(fn: () => Promise): Promise { queueDepth++; lastQueueEnqueueTime = Date.now(); const result = ollamaQueue.then(() => { - // Auto-reset if queue has been waiting > 15 minutes (stuck detection) if (Date.now() - lastQueueEnqueueTime > 900000) { console.warn("[LLM] Queue auto-reset after 15min stall"); queueDepth = Math.max(0, queueDepth - 1); @@ -50,19 +119,17 @@ function enqueueOllama(fn: () => Promise): Promise { } return fn(); }); - // Keep queue alive even if fn throws (attach no-op error handler on chain) ollamaQueue = result.catch(() => {}).then(() => { queueDepth = Math.max(0, queueDepth - 1); }); return result; } -/** Generate text from a system prompt + user prompt — with 429 retry/backoff + queue */ -export async function generate( +async function generateOllama( systemPrompt: string, userPrompt: string, options?: { temperature?: number; maxTokens?: number; timeoutMs?: number }, ): Promise { return enqueueOllama(async () => { - const RETRY_DELAYS = [15000, 30000, 60000]; // 15s, 30s, 60s + const RETRY_DELAYS = [15000, 30000, 60000]; for (let attempt = 0; attempt <= RETRY_DELAYS.length; attempt++) { if (attempt > 0) { @@ -116,7 +183,22 @@ export async function generate( }); } -/** Chat-style generation with message history */ +// ═══════════════════════════════════════════════════════ +// PUBLIC API — auto-routes to configured provider +// ═══════════════════════════════════════════════════════ + +export async function generate( + systemPrompt: string, + userPrompt: string, + options?: { temperature?: number; maxTokens?: number; timeoutMs?: number }, +): Promise { + if (BLOG_LLM_PROVIDER === "anthropic" && ANTHROPIC_API_KEY) { + return generateClaude(systemPrompt, userPrompt, options); + } + return generateOllama(systemPrompt, userPrompt, options); +} + +/** Chat-style generation with message history (Ollama only for now) */ export async function chat( messages: ReadonlyArray<{ role: "system" | "user" | "assistant"; content: string }>, options?: { temperature?: number; maxTokens?: number }, @@ -158,17 +240,40 @@ export async function chat( }); } -/** Check if Ollama is available and model is loaded */ -export async function checkHealth(): Promise<{ ok: boolean; model: string; error?: string }> { +/** Check if configured LLM provider is available */ +export async function checkHealth(): Promise<{ ok: boolean; model: string; provider: string; error?: string }> { + if (BLOG_LLM_PROVIDER === "anthropic" && ANTHROPIC_API_KEY) { + try { + // Quick validation — just check API key works + const resp = await fetch("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": ANTHROPIC_API_KEY, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: ANTHROPIC_MODEL, + max_tokens: 5, + messages: [{ role: "user", content: "hi" }], + }), + signal: AbortSignal.timeout(10000), + }); + return { ok: resp.ok, model: ANTHROPIC_MODEL, provider: "anthropic" }; + } catch (err) { + return { ok: false, model: ANTHROPIC_MODEL, provider: "anthropic", error: (err as Error).message }; + } + } + try { const resp = await fetch(`${OLLAMA_URL}/api/tags`, { signal: AbortSignal.timeout(5000) }); - if (!resp.ok) return { ok: false, model: LLM_MODEL, error: `HTTP ${resp.status}` }; + if (!resp.ok) return { ok: false, model: LLM_MODEL, provider: "ollama", error: `HTTP ${resp.status}` }; const data = await resp.json() as { models: Array<{ name: string }> }; const hasModel = data.models.some((m) => m.name.includes(LLM_MODEL.split(":")[0])); - return { ok: hasModel, model: LLM_MODEL, error: hasModel ? undefined : `Model ${LLM_MODEL} not found` }; + return { ok: hasModel, model: LLM_MODEL, provider: "ollama", error: hasModel ? undefined : `Model ${LLM_MODEL} not found` }; } catch (err) { - return { ok: false, model: LLM_MODEL, error: (err as Error).message }; + return { ok: false, model: LLM_MODEL, provider: "ollama", error: (err as Error).message }; } }