From 8e0eda6c4145c515002bde785d3183c02c657111 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sun, 5 Apr 2026 22:47:15 +0200 Subject: [PATCH] =?UTF-8?q?fix(blog):=20anti-repetition=20engine=20?= =?UTF-8?q?=E2=80=94=206=20angle=20types,=20forbidden=20structures,=20exis?= =?UTF-8?q?ting=20article=20context=20injection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/api/src/llm/fo-blog-pipeline.ts | 89 ++++++++++++---- packages/api/src/routes/blog.ts | 129 +++++++++++++++++------ 2 files changed, 166 insertions(+), 52 deletions(-) diff --git a/packages/api/src/llm/fo-blog-pipeline.ts b/packages/api/src/llm/fo-blog-pipeline.ts index 945a907..baf53c8 100644 --- a/packages/api/src/llm/fo-blog-pipeline.ts +++ b/packages/api/src/llm/fo-blog-pipeline.ts @@ -12,6 +12,8 @@ * 8. Kill AI Tone (remove all AI fingerprints) * 9. QA Check (technical accuracy + weak section fixes) * 10. Quality Score (1-10 ratings + improvement suggestions) + * + APM (Auto-Precision Mode — final word-level cut) + * + Viral Signal (Social Masterfile — AVC, carry line, auto-kill, LinkedIn) * * Dedicated FO_Blog_LLM: * - Model: qwen2.5:14b on .213 (or override via FO_BLOG_MODEL env) @@ -303,14 +305,23 @@ CONTENT MODULES (use 2-3 per article): export const STEP1_TOPIC_EXPANSION = `You are a senior network engineer. -Given the topic below, expand it into: -- 5 real-world scenarios where this topic becomes a problem -- 5 common wrong assumptions engineers make about this -- 5 operational risks nobody talks about +Given the topic below, expand it into scenarios and angles from SIX DIFFERENT PERSPECTIVES. +Do NOT default to "physical layer failure" or "lab-to-production gap" — those are overused. + +For each perspective, generate 2-3 concrete, specific observations: + +1. ECONOMIC PERSPECTIVE — TCO, hidden costs, budget allocation, vendor pricing, ROI reality +2. OPERATIONAL PERSPECTIVE — procurement workflows, validation processes, team skills, SLAs +3. MARKET/TIMING PERSPECTIVE — when to buy, when to wait, what's mature vs. hype +4. TECHNICAL DEPTH PERSPECTIVE — specific failure modes at the protocol level, not just physical layer +5. POLITICAL PERSPECTIVE — vendor lock-in, procurement decisions, OEM vs. compatible debates +6. MIGRATION PERSPECTIVE — step-by-step realities when upgrading from previous generation Topic: {{TOPIC}} -Keep it practical, not theoretical. Think about what actually goes wrong in production.`; +{{EXISTING_ANGLES}} + +Be concrete, not generic. Think: what would a senior engineer with budget responsibility know that a junior engineer wouldn't?`; // ═══════════════════════════════════════════════════════ // STEP 2: ANGLE SELECTION @@ -318,16 +329,46 @@ Keep it practical, not theoretical. Think about what actually goes wrong in prod export const STEP2_ANGLE_SELECTION = `Based on the expanded scenarios below, select ONE strong angle for a technical blog post. -The angle must be: -- Practical and decision-driven (helps the reader DO something) -- Involves real trade-offs (not a clear-cut answer) -- Relevant for real deployments (not academic) -- Controversial enough to generate discussion +════════════════════════════════════════════════════════ +ANGLE DIVERSITY — MANDATORY +════════════════════════════════════════════════════════ -Then define: -- Target audience (e.g., DC leaf-spine engineer, ISP architect, enterprise campus) -- Core decision question the article answers -- The one thing the reader should DO after reading +{{FORBIDDEN_ANGLES}} + +ANGLE TYPES TO CHOOSE FROM (pick the one that fits the topic best — rotate through these): + +TYPE A — ECONOMIC: "What this actually costs" (TCO, hidden spend, budget reality) + Example: "Your $350 optic just cost you $18,000 in engineering time — here's the math" + +TYPE B — DECISION FRAMEWORK: "How to decide" (buy now vs wait, OEM vs compatible, which spec) + Example: "The 3 questions that determine whether 400G ZR is right for your deployment" + +TYPE C — MARKET REALITY: "What's hype vs production-ready right now" (timing, maturity, supply chain) + Example: "800G: which parts of the standard are actually shippable today" + +TYPE D — OPERATIONAL PLAYBOOK: "Step-by-step process" (how to do something, not what goes wrong) + Example: "The 6-step validation checklist before you deploy any 400G transceiver" + +TYPE E — VENDOR POLITICS: "The uncomfortable truth about vendor dynamics" + Example: "Why OEM compatibility lists exist — and why they're not what you think" + +TYPE F — MIGRATION REALITY: "What the upgrade path actually looks like, not what the datasheet says" + Example: "12 months into our 100G→400G migration: what we got wrong in month 1" + +════════════════════════════════════════════════════════ +FORBIDDEN ANGLE STRUCTURES (these are overused — auto-reject if you start here): +- "Lab worked fine → production failed → physical layer was the cause" → BANNED +- "Compatible optics get blamed → investigation → connector was dirty" → BANNED +- "400G exposes assumptions that 100G hid" → BANNED (used too many times) +- Any structure where the resolution is "clean your connectors" → BANNED +════════════════════════════════════════════════════════ + +Select the angle type, then define: +- ANGLE TYPE: (A/B/C/D/E/F) +- ANGLE SUMMARY: one sentence describing the specific angle +- TARGET AUDIENCE: (e.g., DC leaf-spine engineer with budget, ISP procurement lead, enterprise campus architect) +- CORE QUESTION: the specific decision or insight the article answers +- READER ACTION: the one thing the reader does differently after reading Expanded scenarios: {{SCENARIOS}}`; @@ -342,13 +383,21 @@ NOT a section list. NOT a structure. A flow plan — the sequence of ideas as th FORMAT: Write the outline as 3-4 narrative beats. Each beat = one core idea and how it connects to the next. No bullet points. No section headers. -The outline should describe: -- Opening situation: what moment the reader is in -- Core tension: what assumption they have that is wrong -- Production reality: 1-2 specific things that fail (described as moments, not scenarios) -- Consequence/resolution: what actually matters at the end +════════════════════════════════════════════════════════ +BANNED OUTLINE STRUCTURES — DO NOT USE THESE: +- Beat 1: "Lab works fine" → Beat 2: "Production fails" → Beat 3: "Physical layer was the cause" +- Any arc where the climax is "dirty connector" or "polarity mismatch" +- Any arc where the resolution is "validate your setup" as a generic close +- Opening with an engineer working late in a DC finding a failing link +════════════════════════════════════════════════════════ -Keep the outline focused on 3-4 ideas MAX. If you can't write it in 3-4 beats, it's too broad. +INSTEAD — match the outline structure to the angle type: +- ECONOMIC angle → open with a cost moment, close with a calculation the reader can use +- DECISION angle → open with the choice the reader is about to make, close with clear criteria +- MARKET angle → open with what the market says vs. what the data shows, close with timing advice +- OPERATIONAL angle → open with a process gap, close with a concrete improved process +- POLITICAL angle → open with the vendor dynamic, close with what independence actually costs/saves +- MIGRATION angle → open with the planning assumption, close with what month 6 actually looked like Angle: {{ANGLE}} Target audience: {{AUDIENCE}} diff --git a/packages/api/src/routes/blog.ts b/packages/api/src/routes/blog.ts index 1fe7253..2999044 100644 --- a/packages/api/src/routes/blog.ts +++ b/packages/api/src/routes/blog.ts @@ -16,7 +16,7 @@ import { pool } from "../db/client"; const pipelineProgress = new Map(); function setProgress(draftId: string, step: number, label: string): void { - const pct = Math.round((step / 17) * 92) + 2; // 2%..94% during run, 100% on complete + const pct = Math.round((step / 18) * 92) + 2; // 2%..94% during run, 100% on complete pipelineProgress.set(draftId, { step, total: 17, label, pct }); } @@ -31,6 +31,7 @@ import { ANTI_GENERIC_INTRO_PROMPT, QUALITY_CONTROL_PROMPT, PROCUREMENT_LAYER_PROMPT, + VIRAL_SIGNAL_PROMPT, buildTopicPrompt, } from "../llm/blog-prompts"; @@ -1019,7 +1020,7 @@ async function runLlmPipeline( const LLM_OPTS = { temperature: 0.7, maxTokens: 8192, timeoutMs: 480000 }; const LLM_REFINE = { temperature: 0.4, maxTokens: 6144, timeoutMs: 480000 }; - const TOTAL_STEPS = 17; // 16-step pipeline + APM final cut + const TOTAL_STEPS = 18; // 16-step pipeline + APM + Viral Signal + LinkedIn let stepsCompleted = 0; try { @@ -1097,11 +1098,35 @@ async function runLlmPipeline( // Get blog type config const blogType = BLOG_TYPES[selectedTopic as keyof typeof BLOG_TYPES] || BLOG_TYPES.tutorial; + // Load existing articles to prevent angle repetition + let existingAnglesContext = ""; + let forbiddenAnglesContext = ""; + try { + const existingResult = await pool.query( + `SELECT title, draft_content FROM blog_drafts + WHERE status IN ('published', 'review', 'ready') AND draft_content IS NOT NULL + ORDER BY created_at DESC LIMIT 10` + ); + if (existingResult.rows.length > 0) { + const summaries = existingResult.rows.map((r: { title: string; draft_content: string }) => { + // Extract first 150 chars of content as summary + const preview = (r.draft_content || "").replace(/^#[^\n]*\n/, "").trim().slice(0, 150); + return `- "${r.title}": ${preview}...`; + }).join("\n"); + + existingAnglesContext = `\n\nALREADY PUBLISHED ARTICLES (do NOT repeat their angles or structure):\n${summaries}\n\nFor this new article, choose a COMPLETELY DIFFERENT perspective and angle than any of the above.`; + + forbiddenAnglesContext = `ALREADY WRITTEN ANGLES (forbidden — do not repeat these):\n${existingResult.rows.map((r: { title: string }) => `- "${r.title}"`).join("\n")}\n\nThe new article MUST have a structurally different angle — different story type, different reader takeaway, different perspective lens.\n`; + } + } catch { /* fine if no articles yet */ } + // ═══ STEP 1: Topic Expansion ═══ console.log(" Step 1/10: Topic Expansion..."); setProgress(draftId, 1, "Step 1/10: Topic Expansion"); const step1 = await generate(systemPrompt, - STEP1_TOPIC_EXPANSION.replace("{{TOPIC}}", title), + STEP1_TOPIC_EXPANSION + .replace("{{TOPIC}}", title) + .replace("{{EXISTING_ANGLES}}", existingAnglesContext), LLM_OPTS ); stepsCompleted = 1; @@ -1110,7 +1135,9 @@ async function runLlmPipeline( console.log(" Step 2/10: Angle Selection..."); setProgress(draftId, 2, "Step 2/10: Angle Selection"); const step2 = await generate(systemPrompt, - STEP2_ANGLE_SELECTION.replace("{{SCENARIOS}}", step1.text), + STEP2_ANGLE_SELECTION + .replace("{{FORBIDDEN_ANGLES}}", forbiddenAnglesContext) + .replace("{{SCENARIOS}}", step1.text), LLM_REFINE ); stepsCompleted = 2; @@ -1259,8 +1286,8 @@ async function runLlmPipeline( stepsCompleted = 15; // ═══ STEP APM: Auto-Precision Mode (Final Cut — last filter before publish) ═══ - console.log(" Step 16/17: Auto-Precision Mode (final cut — if a word can go, it must go)..."); - setProgress(draftId, 16, "Step 16/17: Auto-Precision Mode"); + console.log(" Step 16/18: Auto-Precision Mode (final cut — if a word can go, it must go)..."); + setProgress(draftId, 16, "Step 16/18: Auto-Precision Mode"); const stepAPM = await generate(systemPrompt, STEP_APM.replace("{{ARTICLE}}", step9.text), LLM_REFINE @@ -1271,34 +1298,72 @@ async function runLlmPipeline( const pctAPM = Math.round((1 - wordsAPM / wordsBeforeAPM) * 100); console.log(` APM: ${wordsBeforeAPM} → ${wordsAPM} words (−${pctAPM}%) — precision cut done`); - // ═══ LinkedIn Post ═══ - console.log(" Step 17/17: LinkedIn Post (max 2,800 chars)..."); - setProgress(draftId, 17, "Step 17/17: LinkedIn Post"); - let linkedinPost: string | null = null; - let linkedinCharCount: number | null = null; + // ═══ STEP 17: Viral Signal — FLEXOPTIX Social Masterfile transformation ═══ + // Applies AVC (Auto-Viral-Check), ASS (Auto-Signal-Score), carry line enforcement, + // auto-kill phrase filter, and generates LinkedIn post in one pass. + console.log(" Step 17/18: Viral Signal (Social Masterfile transformation)..."); + setProgress(draftId, 17, "Step 17/18: Viral Signal"); + let viralArticle = stepAPM.text; + let viralLinkedinPost: string | null = null; try { - const stepLinkedIn = await generate(systemPrompt, - STEP_LINKEDIN_POST.replace("{{ARTICLE}}", stepAPM.text), - { temperature: 0.6, maxTokens: 1024, timeoutMs: 120000 } + const stepViral = await generate(systemPrompt, + VIRAL_SIGNAL_PROMPT + "\n\nArticle:\n" + stepAPM.text, + { temperature: 0.5, maxTokens: 8192, timeoutMs: 480000 } ); - linkedinPost = stepLinkedIn.text.trim(); - linkedinCharCount = linkedinPost.length; - // Enforce hard limit — truncate at last sentence before 2800 if too long - if (linkedinCharCount > 2800) { - linkedinPost = linkedinPost.slice(0, 2800).replace(/[^.!?]*$/, "").trim(); - linkedinCharCount = linkedinPost.length; - console.log(` LinkedIn post truncated to ${linkedinCharCount} chars`); + const viralOutput = stepViral.text.trim(); + // Parse output: article + ---LINKEDIN--- + linkedin post + const linkedinSep = viralOutput.indexOf("---LINKEDIN---"); + if (linkedinSep !== -1) { + viralArticle = viralOutput.slice(0, linkedinSep).trim(); + viralLinkedinPost = viralOutput.slice(linkedinSep + "---LINKEDIN---".length).trim(); + console.log(` Viral Signal: article ${viralArticle.split(/\s+/).length} words + LinkedIn ${viralLinkedinPost.length} chars`); } else { - console.log(` LinkedIn post: ${linkedinCharCount} chars`); + // No separator — treat entire output as article + viralArticle = viralOutput; + console.log(` Viral Signal: article ${viralArticle.split(/\s+/).length} words (no LinkedIn section)`); + } + // Validate viral output isn't too short (LLM may have over-cut) + if (viralArticle.split(/\s+/).length < 400) { + console.log(" ⚠ Viral Signal output too short — falling back to APM output"); + viralArticle = stepAPM.text; } } catch { - console.log(" LinkedIn post generation skipped"); + console.log(" Viral Signal skipped (error) — using APM output"); } stepsCompleted = 17; - // Extract only the article from APM output (APM returns clean article only) - // Fall back to step9.text if APM output looks too short or empty - let finalArticleText = stepAPM.text.trim().length > 200 ? stepAPM.text : step9.text; + // ═══ STEP 18: LinkedIn Post ═══ + // Use Viral Signal LinkedIn if available, otherwise generate via STEP_LINKEDIN_POST + console.log(" Step 18/18: LinkedIn Post (max 2,800 chars)..."); + setProgress(draftId, 18, "Step 18/18: LinkedIn Post"); + let linkedinPost: string | null = viralLinkedinPost; + let linkedinCharCount: number | null = viralLinkedinPost ? viralLinkedinPost.length : null; + if (!linkedinPost) { + // Fallback: dedicated LinkedIn post generator + try { + const stepLinkedIn = await generate(systemPrompt, + STEP_LINKEDIN_POST.replace("{{ARTICLE}}", viralArticle), + { temperature: 0.6, maxTokens: 1024, timeoutMs: 120000 } + ); + linkedinPost = stepLinkedIn.text.trim(); + linkedinCharCount = linkedinPost.length; + } catch { + console.log(" LinkedIn post generation skipped"); + } + } + // Enforce hard limit — truncate at last sentence before 2800 if too long + if (linkedinPost && linkedinPost.length > 2800) { + linkedinPost = linkedinPost.slice(0, 2800).replace(/[^.!?]*$/, "").trim(); + linkedinCharCount = linkedinPost.length; + console.log(` LinkedIn post truncated to ${linkedinCharCount} chars`); + } else if (linkedinPost) { + console.log(` LinkedIn post: ${linkedinCharCount} chars`); + } + stepsCompleted = 18; + + // Extract article from Viral Signal output (or APM fallback) + // Fall back to step9.text if output looks too short or empty + let finalArticleText = viralArticle.trim().length > 200 ? viralArticle : step9.text; const articleMarkers = [ "### COMPLETE FIXED ARTICLE", "## COMPLETE FIXED ARTICLE", @@ -1345,8 +1410,8 @@ async function runLlmPipeline( await pool.query( `UPDATE blog_drafts SET draft_content = $1, word_count = $2, - generated_by = 'fo-blog-engine-v6', - pipeline_version = 'v6-precision-mode', + generated_by = 'fo-blog-engine-v7', + pipeline_version = 'v7-viral-signal', pipeline_steps_completed = $3, auto_qa_score = $4, outline = $5, @@ -1390,13 +1455,13 @@ async function runLlmPipeline( } clearProgress(draftId); - console.log(`Blog FO Pipeline: ${draftId} complete — ${wordCount} words, ${stepsCompleted}/17 steps, QA: ${(autoQaScore as any)?.overall || "N/A"}/10, LinkedIn: ${linkedinCharCount ?? "n/a"} chars`); + console.log(`Blog FO Pipeline: ${draftId} complete — ${wordCount} words, ${stepsCompleted}/18 steps, QA: ${(autoQaScore as any)?.overall || "N/A"}/10, LinkedIn: ${linkedinCharCount ?? "n/a"} chars`); } catch (llmErr) { clearProgress(draftId); - console.warn(`Blog FO Pipeline failed at step ${stepsCompleted + 1}/16 for ${draftId}: ${(llmErr as Error).message}`); + console.warn(`Blog FO Pipeline failed at step ${stepsCompleted + 1}/18 for ${draftId}: ${(llmErr as Error).message}`); // Update with partial progress await pool.query( - `UPDATE blog_drafts SET pipeline_steps_completed = $1, pipeline_version = 'v5-narrative-control', + `UPDATE blog_drafts SET pipeline_steps_completed = $1, pipeline_version = 'v7-viral-signal', outline = $2, updated_at = NOW() WHERE id = $3::uuid`, [stepsCompleted, JSON.stringify({ error: (llmErr as Error).message, steps_completed: stepsCompleted }), draftId] ).catch(() => {}); @@ -1562,7 +1627,7 @@ blogRouter.post("/llm/reset-queue", (_req: Request, res: Response) => { blogRouter.get("/:id/progress", (req: Request, res: Response) => { const p = pipelineProgress.get(String(req.params.id)); if (!p) { - res.json({ success: true, running: false, step: 0, total: 10, label: "Idle", pct: 0 }); + res.json({ success: true, running: false, step: 0, total: 18, label: "Idle", pct: 0 }); return; } res.json({ success: true, running: true, ...p });