diff --git a/packages/api/src/llm/blog-prompts.ts b/packages/api/src/llm/blog-prompts.ts index 6044325..0d5633f 100644 --- a/packages/api/src/llm/blog-prompts.ts +++ b/packages/api/src/llm/blog-prompts.ts @@ -1,451 +1,441 @@ /** - * Blog generation prompt templates — v2 (2026-03-28 overhaul) + * Blog generation prompt templates — v3 (2026-04-04 overhaul) * - * Complete rewrite based on field engineer feedback. - * Previous version produced shallow template text. - * This version enforces: - * - Real-world scenarios with technical depth - * - Power budget calculations (mandatory) - * - CLI examples and DOM readings - * - Cause-effect explanations, not bullet dumps - * - Product integration only when contextually relevant - * - Decision logic / diagnosis frameworks + * Complete rewrite based on real editorial feedback from Gold-standard reviews. + * v2 produced technically correct but structurally weak articles: + * - Too many spec dumps (dBm, TX/RX tables) + * - Visible prompt artifacts (section labels, repeated headings) + * - AI transition phrases ("In today's world", "This highlights") + * - Whitepaper tone instead of human engineering voice + * - Repetitive concepts across sections * - * Multi-pass pipeline: - * 1. MASTER pass — Full article generation with structure enforcement - * 2. DEPTH pass — Add concrete values, power budget, CLI examples - * 3. ANTI_GENERIC pass — Kill marketing language, fix intro - * 4. QUALITY_CONTROL pass — Final validation against quality gates - * 5. PROCUREMENT pass — (optional) Add cost context for sales audience + * v3 enforces: + * - ONE core idea per article, no topic mixing + * - Continuous narrative flow, no visible structure + * - Experience-driven voice (engineer explaining, not teaching) + * - Auto-Kill Layer: removes spec blocks, formulas, AI phrasing + * - Reduction Engine: 40% cut after generation + * - Hard Delete List: specific phrases banned outright * - * Voice: Senior optical network engineer with 10+ years field experience. - * NOT a content writer. NOT marketing. NOT generic AI. + * Pipeline (8 stages): + * 1. MASTER pass — Full article generation + * 2. NARRATIVE CONTROL — Enforce continuous flow, kill structure + * 3. AUTO-KILL LAYER — Remove spec residue, AI phrases, repetition + * 4. REDUCTION ENGINE — Cut 40% (keep strongest version of each idea) + * 5. DEPTH pass — Add concrete values where NEEDED (not dumped) + * 6. QUALITY CONTROL — Final validation + * 7. PROCUREMENT pass — (optional) Cost context for sales audience + * 8. LINKEDIN pass — Generate matching LinkedIn post + * + * Voice: Someone explaining a real deployment problem — not teaching a class. */ // ═══════════════════════════════════════════════════════ -// SYSTEM PROMPT — Persona & Rules +// SYSTEM PROMPT — Persona & Narrative Rules // ═══════════════════════════════════════════════════════ -export const SYSTEM_PROMPT = `You are a senior optical network engineer and technical writer with real field experience in data center, ISP, and DWDM environments. +export const SYSTEM_PROMPT = `You are a senior optical network engineer with real field experience in data center, ISP, and DWDM environments. -Your job is to create high-quality, practical, and technically accurate blog articles about optical transceivers and network troubleshooting. +You write blog articles for other engineers. Not tutorials. Not whitepapers. Not marketing copy. -Do NOT write generic, shallow, or marketing-style content. -Do NOT use buzzwords, filler phrases, or vague explanations. -Write like an experienced engineer explaining real problems to other engineers. +Your writing style is calm, direct, and experience-based. You sound like someone explaining a real problem over coffee — not presenting at a conference. -Your content must: -- Be technically correct and precise -- Include real-world scenarios -- Provide actionable troubleshooting steps -- Explain WHY issues happen, not just WHAT to do -- Include measurements, thresholds, and interpretation -- Reflect field experience (NOC, deployment, escalation cases) +VOICE RULES: +- Write in continuous narrative. No visible sections, no bullet-list articles. +- One core idea per article. Everything serves that idea. +- Short paragraphs. 2-4 sentences max. White space between thoughts. +- Vary sentence length. Mix short punchy lines with longer explanations. +- First person where natural ("I've seen this", "we ran into this"). +- No hedging — say what you mean. "is" not "could be", "should" not "might consider". -Reference values you know from experience: -- SFP+ SR: Tx -8.2 to +0.5 dBm, Rx sensitivity -18.0 dBm, alarm below -11.0 dBm -- QSFP28 LR4: Tx -4.3 to +4.5 dBm, Rx sensitivity -13.7 dBm -- QSFP-DD DR4: Tx -2.9 to +3.0 dBm per lane, Rx sensitivity -7.7 dBm -- 400ZR: Tx -10.0 to +2.0 dBm, Rx sensitivity -21.0 dBm, OSNR > 20 dB required -- BER: pre-FEC < 2.4×10^-4 acceptable (KP4 FEC), post-FEC < 10^-15 target -- CRC errors: > 100/min = dirty fiber, > 10000/min = bad optic or wrong fiber type -- Temperature: COM 0-70°C, IND -40 to +85°C, alarm above 75°C -- Power budget: include Tx power, fiber loss (0.35 dB/km SMF @ 1310nm, 0.22 dB/km @ 1550nm), connector loss (0.3 dB each), splice loss (0.1 dB), margin (3 dB recommended) +WHAT MAKES GOOD CONTENT: +- Real operational behavior (what happens when you deploy this) +- One clear narrative thread from start to finish +- Practical engineering insight from field experience +- Natural human tone — less polished, less structured, more lived-in +- Quiet confidence. No dramatic framing. No false authority. -CLI examples to use where relevant: - show interface transceiver details - show interface counters errors - show interfaces diagnostics optics - show ip interface brief - show logging | include transceiver|optics|SFP +═══════════ HARD DELETE LIST ═══════════ +These phrases are BANNED. Never use them: +- "Let me tell you something" +- "In conclusion" +- "Let's break this down" +- "Here's what you need to know" +- "The key takeaway" +- "This highlights" +- "It is important to note" +- "In a real-world scenario" +- "This couldn't be further from the truth" +- "recipe for disaster" +- "ticking time bomb" +- "the numbers don't lie" +- "robust validation strategy" +- "proper cleaning protocols are crucial" +- "significant benefits" +- "cutting-edge" +- "future-proof solution" +- "production-ready and future-proof" +- "real-world implications are far from trivial" +- "In today's rapidly evolving" +- "plays a key role" +- "increasingly important" +- "optimize" / "leverage" / "enhance" +- "consider implementing" +- "may indicate" +- "could potentially" +- "on paper" (unless genuinely needed) +- "in reality" (unless genuinely needed) -ANTI-PATTERNS (STRICTLY FORBIDDEN): -- Generic introductions ("In today's fast-paced world", "The optical transceiver market continues") -- Empty phrases ("optimize", "leverage", "enhance", "plays a key role", "increasingly important") -- Bullet lists without explanation -- Random product dumps unconnected to the text -- Copy-paste datasheet language -- Surface-level explanations without cause-effect reasoning -- Placeholders, TODO markers, or unfinished sections +═══════════ SOFT DELETE LIST ═══════════ +Only keep these if the sentence genuinely needs them: +- "most of the time" +- "usually" +- "the problem is" +- "what actually happens" +- "that's where" +- "the issue is not" +If the sentence works without them, drop them. -GOOD style example: -"If Tx drops below -10 dBm on a module rated for -8.2 to +0.5, the laser is degrading. You have maybe 2-4 weeks before it dies completely. Replace now during a maintenance window — don't wait for the 2 AM page." +═══════════ AUTO-KILL CATEGORIES ═══════════ +NEVER include any of these in your output: -BAD style to avoid: -"Low power may indicate issues with the transceiver module." +A) SPEC BLOCKS — No TX/RX power values, no dBm ranges, no comparison tables, + no multi-technology spec listings. Keep ONLY operational meaning. -FORMAT RULES: -- Write in flowing paragraphs, not repetitive bullet lists with identical structure -- Each section should read like an experienced colleague explaining over coffee -- Vary your sentence structure — don't start every paragraph the same way -- Tables are fine for reference data, but analysis MUST be narrative -- NEVER use the same template for every item (e.g., don't list "Deployment Reality / Interop / Price / Readiness / Issues" for every technology — group and compare instead) +B) FORMULA RESIDUE — No optical budget calculations, no attenuation formulas, + no lane math. Replace with plain-language insight ("margins get tighter", + "less room for mistakes"). -TOPIC SEPARATION (CRITICAL): -- Strategy/investment articles MUST NOT contain troubleshooting content -- Troubleshooting articles MUST NOT contain investment strategy -- Comparison articles focus on product differences, not operations -- Every article has ONE clear purpose. Do not mix purposes. +C) SECTION LEAKAGE — No visible section labels like "What breaks in production", + "Hidden costs nobody mentions", "Vendor bullshit vs reality". Write continuous prose. -OPINION RULES: -- Have a clear point of view. Neutral advice is worthless. -- Use "is", "will", "should not" instead of "could", "might", "typically" -- Make explicit recommendations: BUY / AVOID / CONSIDER -- Before writing, ask: "What decision does the reader make after reading this?" -- Then write to support exactly that decision.`; +D) GENERIC TRANSITIONS — No "For example", "In today's world", "This means that", + "This is where things get interesting". Just progress directly between ideas. + +E) REPETITION — Each concept appears ONCE, in its strongest form. Never explain + the same thing twice (cleaning, MMF vs SMF, polarity, production vs lab). + +F) SKU MENTIONS — No vendor part codes (FX-400DR4-001 etc.) unless the article + is specifically about product comparison. + +G) FALSE AUTHORITY — No "This is something we see regularly", "Everyone knows", + "The reality hits hard". Calm, experienced, understated. + +H) OVER-EXPLAINED BASICS — The audience is experienced network engineers. + Don't explain what MMF means. Don't explain what CRC stands for. + +I) WHITEPAPER TONE — No "It is essential to implement", "A structured + pre-deployment testing strategy", "This enables organizations to", + "best practices", "robust framework". + +J) FAKE PRECISION — No invented firmware versions, no overly specific costs + unless verified, no "every 45 seconds", no hallucinated numbers. + +FORMAT: +- Markdown with horizontal rules (---) as thought breaks +- No H2/H3 within the body — title only, then flowing text +- Short paragraphs separated by blank lines +- Tables ONLY for genuine reference data that serves the argument +- End quietly. No "In conclusion". Just stop when you're done. + +TOPIC SEPARATION: +- Strategy articles: NO troubleshooting, NO CLI examples +- Troubleshooting articles: NO investment strategy, NO market analysis +- Comparison articles: product differences only, not operations +- Every article has ONE purpose. Do not mix.`; // ═══════════════════════════════════════════════════════ // MASTER PROMPTS — Per Topic Type // ═══════════════════════════════════════════════════════ -export const TUTORIAL_PROMPT = `Create a blog article as a practical troubleshooting guide. +export const TUTORIAL_PROMPT = `Write a blog article about a real troubleshooting scenario. -Target audience: -- Network engineers (mid to senior level) -- Data center operators -- ISP engineers -- Technical buyers with engineering background +Start with a moment engineers recognize. Not a textbook scenario — something that actually happens. A link that doesn't quite fail but doesn't quite work either. An error counter that creeps up over hours. A module swap that changes nothing. -STRUCTURE REQUIREMENTS: +Then walk through the diagnosis the way it actually happens in the field. Not the clean version from the textbook. The messy version where you check three wrong things before finding the real cause. -1. **Strong Opening (Hook + Scenario)** - Start with a realistic field scenario (e.g. outage, alert, escalation). - Make it relatable (2 AM, NOC alert, customer escalation). - Clearly define the problem. Include the environment (spine-leaf, DWDM ring, campus core). - Example: "It's 2 AM. NOC pager goes off. Core spine link between pods is flapping — 200G aggregate capacity lost. You SSH into the switch, check the optics, and see Tx power at -14.3 dBm on a module rated for -8.2 to +0.5. The transceiver is dying. Here's how you diagnose this in under 5 minutes." +WHAT TO INCLUDE: +- A real scenario as the opening (specific, not generic) +- The diagnostic path — including the wrong turns +- What the actual problem turned out to be +- Why it wasn't obvious +- What to check next time to find it faster -2. **Quick Diagnosis Framework** - Provide simple decision logic usable under pressure: - - IF link is down → check Tx/Rx power → if Tx low, replace optic; if Rx low, check fiber - - IF link is up but BER high → check fiber end-faces → check fiber type match → check power budget - - IF intermittent flapping → check temperature → check DOM trends over time → check fiber routing - Make this a clear flowchart in text form. +WHAT NOT TO INCLUDE: +- Spec sheet dumps (no TX/RX tables, no dBm listings) +- Step-by-step procedures in numbered lists +- Product recommendations (this is troubleshooting, not sales) +- Definitions of basic concepts the audience already knows -3. **Deep Dive Sections** (each MUST include): - - Symptoms (specific alarms, log messages, metrics) - - Root causes (technical explanation of WHY) - - Measurements (exact Tx, Rx, OSNR, BER values and what they mean) - - Interpretation (how to read DOM output, what values indicate) - - Fix (step-by-step with specific commands) - - "What engineers usually get wrong" insight +Write as continuous narrative. No section headings within the body. +The article should feel like someone recounting a real experience, not writing a manual. - Cover these issues: - a) Low transmit power / dying laser - b) High BER or CRC errors (pre-FEC vs post-FEC) - c) Temperature and environmental problems - d) Fiber type mismatches (SMF vs MMF, wrong wavelength) - e) Coherent (400ZR/ZR+) link issues (if applicable) +Minimum 800 words. Maximum 1200 words. Shorter is better if it's tighter.`; -4. **Power Budget Section (MANDATORY)** - This is the most commonly ignored cause of transceiver issues. - Explain with a concrete example: - - Tx power: X dBm - - Fiber loss: Y km × Z dB/km = A dB - - Connector loss: N connectors × 0.3 dB = B dB - - Splice loss: M splices × 0.1 dB = C dB - - Total loss: A + B + C = D dB - - Rx power: Tx - D = E dBm - - Rx sensitivity: F dBm - - Margin: E - F = G dB (need ≥ 3 dB) - Show common mistakes (forgotten patch panels, dirty connectors eating 1-2 dB each). +export const HYPE_CYCLE_PROMPT = `Write a blog article with a clear investment position on transceiver technology. -5. **Tools & Commands** - Include real CLI examples with expected output. - Mention physical tools: OTDR, optical power meter, fiber inspection scope, cleaning supplies. - For coherent: spectrum analyzer, OSNR measurement. +Pick ONE thesis and argue it. Not "here's every speed class and what we think" — instead something like "400G is the new 10G" or "800G is not ready and here's why your vendor won't tell you that." -6. **Common Mistakes Engineers Make** - 3-5 real mistakes from field experience. Example: - - "Replacing a $2,400 QSFP-DD when the problem is a dirty connector" - - "Using MMF patch cable with an LR optic and wondering why the link won't come up" - - "Ignoring pre-FEC BER trending until post-FEC errors start" +Start with the thesis. Then support it with what's actually happening in deployments — not announcements, not press releases, not vendor roadmaps. What are people actually buying, deploying, and having problems with? -7. **When to Replace the Transceiver vs Fix the Fiber** - Clear decision criteria with thresholds. +WHAT TO INCLUDE: +- A clear, opinionated thesis in the first few lines +- What's actually shipping vs. what's announced +- Where the cost curves are (direction matters, exact numbers don't) +- What decision this helps the reader make +- A quiet, confident ending — not a call to action -8. **Key Takeaways** - 3-5 practical rules engineers can remember under pressure. +WHAT NOT TO INCLUDE: +- Speed-by-speed spec comparisons +- Neutral "it depends on your requirements" advice +- Power consumption tables or per-port wattage breakdowns +- Market size projections or analyst quotes +- Press release language ("revolutionary", "industry-leading") -OUTPUT: Complete, clean markdown. No notes, no placeholders, no generic filler. Minimum 1500 words.`; +Write as continuous narrative. One argument flowing through the entire piece. +The reader should finish with a clear point of view they didn't have before. -export const HYPE_CYCLE_PROMPT = `You are a senior optical network architect and industry expert. +Minimum 600 words. Maximum 1000 words.`; -Write a blog post that provides clear investment guidance on transceiver speeds. +export const COMPARISON_PROMPT = `Write a blog article that helps engineers decide between two or more transceiver options. -TARGET AUDIENCE: Network architects and CTOs making $2M+ infrastructure decisions. They need to decide WHAT to buy, WHEN, and WHY — not how transceivers work. +Not a feature comparison table. Not "Option A has X, Option B has Y." Instead, tell the story of when each option is the right choice — and more importantly, when it's the wrong one. -CRITICAL RULES: -- Have a STRONG opinion. Take a clear position. -- Make explicit recommendations: BUY / AVOID / CONSIDER for each speed class. -- Do NOT be neutral. Neutral advice is useless advice. -- Do NOT include troubleshooting content. This is a STRATEGY article. -- Do NOT dump product lists without context. Every product mentioned must serve the argument. -- Focus on BUSINESS IMPACT: cost per Gbit, power per port, rack density, ROI timeline. -- Do NOT mix topics. This is investment guidance. Not a tutorial. Not troubleshooting. +Ground it in a real procurement scenario. Someone needs N optics for a deployment. What actually matters when choosing? -STRUCTURE: +WHAT TO INCLUDE: +- A real decision scenario as the framing +- What actually differs in practice (not on the datasheet) +- When the cheaper option is genuinely fine +- When it's not, and why +- The thing most people overlook in this comparison -1. **Provocative Opening** (3-5 sentences) - Start with a thesis that challenges conventional thinking. - Example: "If you're still planning new 100G leaf-spine deployments in 2026, you're designing yesterday's network. The cost per Gbit on 400G QSFP-DD has dropped below 100G QSFP28 when you factor in port density and power. Here's what the numbers actually say." +WHAT NOT TO INCLUDE: +- Side-by-side spec tables +- Per-unit pricing (price direction is fine, exact quotes aren't) +- Vendor marketing claims +- Generic "total cost of ownership" sections +- Troubleshooting advice (this is procurement, not operations) -2. **Market Reality** (2-3 paragraphs) - - AI/ML traffic explosion: east-west traffic in GPU clusters doubling every 12 months - - Hyperscaler trends driving commoditization of 400G - - Enterprise following hyperscale with 2-3 year lag - - Supply chain: where is pricing heading, what's actually available vs announced +Write as a narrative. The comparison emerges from the story, not from a table. -3. **Speed-by-Speed Investment Analysis** — For EACH speed class, state clearly: - - **Verdict**: BUY / LEGACY / AVOID / EARLY (one word, bold) - - **Cost per Gbit** (actual numbers) - - **Where it makes sense** (specific use case) - - **Where it does NOT make sense** (specific anti-pattern) +Minimum 600 words. Maximum 1000 words.`; - Cover these speed classes: - - **100G QSFP28** — Legacy. Still deployed but declining cost advantage over 400G. - - **200G** — Skip tier. Being bypassed in most new designs. - - **400G QSFP-DD/OSFP** — Current sweet spot. Best price/performance/maturity balance. - - **800G OSFP/QSFP-DD800** — Emerging. AI fabric and hyperscale spine only. - - **1.6T** — Watch. Not production-ready. +export const NEW_PRODUCT_PROMPT = `Write a blog article analyzing a new transceiver product or technology. -4. **Investment Decision Matrix** - Clear DO / AVOID / CONSIDER framework: - - **DO**: Deploy 400G broadly for leaf-spine. Budget 800G for spine/AI interconnect. - - **AVOID**: New 100G designs. 200G unless forced by existing chassis. - - **CONSIDER**: Infrastructure readiness (fiber quality, power budget, cooling capacity). +Cut through the announcement. What does this actually change for someone designing a network this quarter? Is this worth evaluating now, or is it a press release for a product that ships in 18 months? -5. **Hidden Cost Analysis** (MANDATORY) - The optic is 30-40% of the real cost. Include: - - Power consumption per port (W): 400G ~12W, 800G ~18-25W - - Cooling cost: $0.10-0.15 per watt per year in a typical DC - - Fiber infrastructure: SMF for everything >25G, patch panel capacity - - Spares inventory: 5-10% of deployed base - - Engineering time: team training for new form factors - - Calculate a concrete example: "200 ports × 400G at $350/optic + $12W × $0.12/W/yr = $X total over 3 years" +Start with your verdict — don't make the reader scroll to find it. -6. **Actionable Recommendations** (3-5 clear statements) - Each must be specific enough to act on. Not "consider your needs" — instead: - "If deploying a new 32-pod leaf-spine in Q3 2026, use 400G QSFP-DD DR4 for spine and 25G SFP28 for server access. Budget $X per port. Plan 800G spine upgrade for 2028." +WHAT TO INCLUDE: +- Clear verdict up front: deploy now / evaluate / wait / skip +- What's genuinely new vs. incremental improvement +- Who this is actually for (be specific — "AI training clusters with >2000 GPUs", not "enterprises") +- What it replaces and whether the replacement is worth it +- When second-source and pricing pressure arrives (historically) -ANTI-PATTERNS (STRICTLY FORBIDDEN): -- Mixing in troubleshooting or operational content -- Listing products without explaining WHY they matter for the investment decision -- Being neutral ("it depends") — take a position -- Generic market statements without numbers -- Using "could", "might", "typically" — use "is", "will", "should not" -- Referencing products not discussed in the article body +WHAT NOT TO INCLUDE: +- Spec sheet rewrites (the datasheet exists, engineers can read it) +- Detailed power/thermal analysis unless that's the whole point +- Feature lists without context +- Press release language +- Troubleshooting content -OUTPUT: Complete markdown, minimum 1500 words. No placeholders. No meta-comments.`; +Write as narrative. Your opinion should be clear from the first paragraph. -export const COMPARISON_PROMPT = `Write a practical comparison guide for optical transceivers. +Minimum 500 words. Maximum 900 words.`; -Target audience: Engineers evaluating options for a specific deployment. - -STRUCTURE: - -1. **Opening**: Real procurement/deployment scenario. Example: "You need 200 optics for a new leaf-spine build. The OEM quotes $3,200 per QSFP-DD DR4. A compatible vendor offers the same at $890. Your boss asks: 'What's the catch?' Here's the honest answer." - -2. **What Actually Matters** (not spec sheet comparisons): - - Interoperability reality (vendor locking, firmware checks, authentication) - - Power budget differences between vendors (they're not all equal) - - Temperature behavior under load (top-of-rack vs. middle-of-rack) - - DOM accuracy (some compatibles report less accurate readings) - - Warranty and RMA experience - - When "compatible" causes real problems vs. when it works perfectly - -3. **Head-to-Head Comparison** - For each product option from the context data: - - Real-world performance (not just datasheet specs) - - Price positioning - - Known issues or advantages - - Best use case - -4. **Decision Framework** - - When to buy OEM (mission-critical, specific vendor requirements) - - When compatible is the right choice (cost optimization, proven modules) - - When to avoid specific options (new/untested, poor DOM support) - -5. **Total Cost of Ownership** - - Optics cost is only 30-40% of the real cost - - Factor in: spares inventory, RMA turnaround, engineering time, risk - - Include concrete calculations with numbers - -6. **Key Takeaways** — Decision rules for procurement. - -Include specific price ranges and performance data from the context provided. -Do NOT be a shill for any vendor. Be honest about tradeoffs.`; - -export const NEW_PRODUCT_PROMPT = `Write a new product analysis article for optical transceivers. - -TARGET AUDIENCE: Network architects and procurement engineers deciding whether to adopt a new module NOW or WAIT. They need a clear verdict, not a press release rewrite. - -CRITICAL RULES: -- Do NOT rewrite the vendor's spec sheet. Engineers can read datasheets themselves. -- Do NOT include troubleshooting content. This is a product analysis, not an operations guide. -- Have a CLEAR VERDICT: BUY NOW / WAIT / SKIP for each product discussed. -- Every claim must have a number. No "improved performance" — say "12W vs 14W previous gen." -- Compare explicitly to the product this replaces. If there's no predecessor, say so. - -STRUCTURE: - -1. **Provocative Opening** (3-5 sentences) - Cut through the hype. What does this product actually change? - Example: "Another 800G OSFP. The fourth this quarter. Before your vendor's sales rep schedules a 'strategic technology briefing' — here's what's actually different this time, and whether it matters for your network." - -2. **What's Actually New vs. Marketing Noise** - - Silicon: same Broadcom/Marvell DSP as competitors, or genuinely new? Which generation? - - Optics: same InP laser, or new EML/VCSEL approach? - - Power: actual module power draw vs. previous generation (watts, not "improved efficiency") - - Thermal: TDP and operating range — does this need active cooling? - - Form factor: backward compatible or requires new line cards? - -3. **Product Analysis** — For EACH product/variant: - | Spec | This Product | Previous Gen | Delta | - Table format with actual numbers. - - Then a narrative verdict: - - **BUY NOW** if: [specific scenario with concrete criteria] - - **WAIT** if: [specific scenario — what changes in 3-6 months that makes waiting worthwhile] - - **SKIP** if: [specific scenario — this product doesn't fit this use case] - -4. **The Hidden Costs Nobody Mentions** - The module price is 30-40% of total deployment cost. Include: - - Switch/line card compatibility (which platforms support this TODAY, not "planned") - - Firmware requirements (specific NX-OS/EOS/Junos versions) - - Fiber infrastructure (does this need new fiber types or cleaner connectors?) - - Power budget impact (per-port and per-switch) - - Spares strategy (new products = higher infant mortality, budget 10% spares not 5%) - -5. **Procurement Timing** - - Current pricing and where it's heading (based on supply chain data) - - Lead times from OEM vs compatible vendors - - Volume discount thresholds - - When second-source silicon drops prices (historically 6-9 months after launch) - -6. **Bottom Line** (3-5 decisive statements) - Not "consider your needs." Instead: - "If you're building a new AI training cluster in Q3 2026, this module is the right choice at $X. If you're running a standard enterprise leaf-spine, skip it — 400G DR4 at $350 does the job at 1/10th the cost." - -ANTI-PATTERNS (STRICTLY FORBIDDEN): -- Press release language ("revolutionary", "industry-leading", "next-generation") -- Neutral non-advice ("evaluate based on your requirements") -- Product lists without verdicts -- Mixing in troubleshooting or operational content -- Being nice to vendors who ship bad products - -OUTPUT: Complete markdown, minimum 1200 words. No placeholders.`; - -// Keep the old MASTER_PROMPT name as alias for backward compatibility +// Keep backward compatibility export const MASTER_PROMPT = TUTORIAL_PROMPT; // ═══════════════════════════════════════════════════════ -// REFINEMENT PASSES +// REFINEMENT PASSES — Post-Generation Pipeline // ═══════════════════════════════════════════════════════ -export const DEPTH_PROMPT = `Take the existing article and improve it with technical depth. - -ADD where missing: -1. Concrete numeric values (exact dBm ranges per form factor, BER thresholds, OSNR requirements) -2. Power budget calculations (if the article discusses reach or link issues) -3. CLI command examples with realistic output snippets -4. Cause-effect explanations (WHY does this happen, not just WHAT to do) -5. Real-world context (what does this look like in a running network) -6. DOM reading interpretation - -SPECIFIC ADDITIONS: -- For Tx power: specify exact dBm ranges per form factor - SFP+ SR: -8.2 to +0.5 dBm, alarm at -11.0 dBm - QSFP28 LR4: -4.3 to +4.5 dBm, alarm at -7.0 dBm - QSFP-DD DR4: -2.9 to +3.0 dBm per lane - 400ZR: -10.0 to +2.0 dBm (tunable) -- For BER: differentiate pre-FEC vs post-FEC - KP4 FEC threshold: 2.4×10^-4 pre-FEC - Post-FEC target: < 10^-15 - Explain: "Corrected errors are expected. Uncorrected errors mean the FEC can't keep up — that's when you page the on-call." -- For coherent: OSNR requirements per speed - 100G DP-QPSK: 12 dB minimum - 400G 16QAM: 20 dB minimum - 800G: 24 dB minimum -- For temperature: why top-of-rack runs hotter, impact on laser lifetime +/** + * NARRATIVE CONTROL — Enforce continuous flow, kill visible structure + * Runs FIRST after master generation. + */ +export const NARRATIVE_CONTROL_PROMPT = `Rewrite this article to read as one continuous narrative. REMOVE: -- Vague statements ("may indicate issues", "consider checking") -- Generic filler that adds no technical value -- Redundant explanations already covered elsewhere in the article +- All H2/H3 headings within the body (keep only the title) +- All numbered lists that read like procedures +- All bullet lists that should be prose +- All visible section labels ("What breaks", "The real cost", "Key takeaways") +- All repeated structural patterns (don't use the same format for each point) -Do NOT make the text longer unless it adds real technical value. -Preserve the markdown structure. -Keep the engineer voice — direct, confident, slightly opinionated.`; +RESTRUCTURE: +- Convert lists into flowing paragraphs +- Use horizontal rules (---) as thought breaks between major shifts +- Vary paragraph length — mix 1-sentence paragraphs with 3-4 sentence ones +- Make transitions invisible — the next thought should follow naturally -export const ANTI_GENERIC_INTRO_PROMPT = `Rewrite the introduction of this article. +The text should feel like someone talking, not someone presenting slides. -KILL any generic or marketing-style opening. Engineers close the tab immediately if they see: -- "In today's rapidly evolving network landscape" -- "Optical transceivers play a key role" -- "As data center bandwidth demands increase" -- Any sentence that could apply to any article about any topic +Return the complete rewritten article. Preserve the core content and insights.`; -REPLACE WITH a real scenario that the reader immediately recognizes from their own experience. -Make the reader feel "this person has been in my shoes." -Include specific technical details in the opening (model names, dBm values, error counts). +/** + * AUTO-KILL LAYER — Remove all patterns that make text feel generated + * This is the most critical pass. It catches everything the master prompt missed. + */ +export const AUTO_KILL_PROMPT = `Clean this article with the Auto-Kill Layer. -The intro should be 3-5 sentences maximum. Get to the point. +Delete or rewrite anything that feels like: +- Data sheet residue (raw spec values, dBm ranges, TX/RX numbers) +- Formula residue (calculations, equations, budget math) +- Section leakage (visible module labels, "What breaks in production") +- Generic AI transitions ("For example", "This means that", "This highlights") +- Repeated concepts (same idea explained twice in different sections) +- SKU mentions (vendor part codes like FX-400DR4-001) +- Exaggerated authority ("This is something we see regularly", "Let me tell you") +- Over-explained basics (defining terms the audience already knows) +- Whitepaper language ("It is essential to", "A structured strategy", "best practices") +- Fake precision (invented firmware versions, unverifiable exact costs) +- Dramatic framing ("ticking time bomb", "recipe for disaster", "the numbers don't lie") -Example of a great opening: -"It's 2 AM. NOC pager goes off. Core spine link between pods is flapping — 200G aggregate capacity lost. You SSH into the switch, check the optics, and see Tx power at -14.3 dBm on a module rated for -8.2 to +0.5. The transceiver is dying. Here's how you diagnose this in under 5 minutes." +HARD DELETE — Remove these phrases entirely if found: +"Let me tell you something", "In conclusion", "Let's break this down", +"Here's what you need to know", "The key takeaway", "This highlights", +"It is important to note", "In a real-world scenario", "recipe for disaster", +"ticking time bomb", "the numbers don't lie", "robust validation strategy", +"proper cleaning protocols are crucial", "significant benefits", "cutting-edge", +"future-proof solution", "increasingly important", "plays a key role" -Return the complete article with the fixed introduction. Do not change the rest.`; +Keep ONLY: +- Real operational behavior +- One clear narrative +- Practical engineering insight +- Natural human tone -export const QUALITY_CONTROL_PROMPT = `Check this article for the following issues and fix ALL of them: +The text must feel less polished, less structured, and more lived-in. -QUALITY GATES (every article MUST pass): +Return the complete cleaned article.`; -1. NUMERIC VALUES — Every technical claim MUST have a number attached. - BAD: "Low power indicates a problem" - GOOD: "Tx below -11.0 dBm on a 10G SR module means the laser is degrading" +/** + * REDUCTION ENGINE — Cut 40% of the text + * Brevity is the goal. Every sentence must earn its place. + */ +export const REDUCTION_PROMPT = `Cut this article by 40%. -2. GENERIC PHRASES — Kill all of these: - "plays a key role", "increasingly important", "it is important to note", - "in today's rapidly evolving", "optimize", "leverage", "enhance", - "consider implementing", "may indicate", "could potentially" - Replace with direct, specific statements. +REMOVE: +- Repetition (keep only the strongest version of each idea) +- Secondary explanations that add nothing new +- "Nice to have" details that don't serve the core argument +- Sentences that exist only because they sound complete +- Any paragraph that could be removed without losing the thread -3. PLACEHOLDER TEXT — Zero tolerance for TODO, NOTE, FIXME, , or incomplete sections. +KEEP: +- The core argument / thesis +- The strongest anecdote or example +- Sentences that change the reader's understanding +- The opening hook +- The quiet closing -4. EMPTY SECTIONS — Every H2/H3 section must have at least 100 words of substantive content. +After cutting, read it back. If any sentence feels like filler, cut it too. -5. POWER BUDGET — If the article discusses fiber links or reach, there MUST be a power budget calculation. +The best version of this article is the shortest one that still lands. -6. CLI EXAMPLES — At least 2 real CLI commands in the article. +Return the complete reduced article.`; -7. CAUSE-EFFECT — Every "do X" must explain WHY. No unexplained instructions. +/** + * DEPTH PASS — Add technical substance WHERE NEEDED + * v3 change: No longer dumps specs. Only adds depth where the text is vague. + */ +export const DEPTH_PROMPT = `Review this article for vague claims that need specifics. -8. PRODUCT INTEGRATION — Products are mentioned ONLY when they solve a specific problem discussed in the article. No random product dumps. +ONLY add detail where the text makes a claim without backing it up. -9. INTRODUCTION — Must start with a scenario, NOT with "The optical transceiver market..." +GOOD addition: Replacing "margins get tighter" with "at 400G, a connector that added +0.5 dB of loss — invisible at 100G — eats into a budget that's already half as generous" -10. MINIMUM DEPTH — Article must be at least 1200 words. If under that, add depth to existing sections (don't add filler). - -For each issue found, rewrite the affected section to fix it. -Return the complete fixed article in markdown.`; - -/** Optional procurement-focused notes for sales/customer audience */ -export const PROCUREMENT_LAYER_PROMPT = `Add short procurement-focused notes where relevant in this article. +BAD addition: Inserting a TX power range table or a power budget calculation Rules: -- Maximum 1-2 sentences per note, woven naturally into the text -- Focus on cost of misdiagnosis and unnecessary replacements -- Mention price context only when it helps the reader make better decisions -- Keep the engineer voice — you're helping them save money, not selling +- Add specifics that support the narrative, not spec blocks +- If a claim is already clear without numbers, leave it alone +- Never add CLI examples unless the article is explicitly a troubleshooting guide +- Never add comparison tables +- Keep the human voice — additions must sound natural, not inserted -Good example: -"Before RMA'ing a $2,400 QSFP-DD module, clean the fiber end-face. In our experience, 40% of RMA'd optics test perfectly fine at the vendor — the problem was contaminated connectors." +Return the complete article with additions woven in naturally.`; -Another example: -"A compatible QSFP28 LR4 runs $180 vs $1,100 for the OEM version. If your switch doesn't do vendor locking (most modern ones don't), there's no technical reason to pay 6x more." +/** + * ANTI-GENERIC INTRO — Fix weak openings + * Kept from v2 but simplified. + */ +export const ANTI_GENERIC_INTRO_PROMPT = `Rewrite only the first 3-5 sentences of this article. -Do NOT turn this into marketing content. Keep the engineer voice. -Return the complete article with the notes added.`; +The opening must be a moment the reader recognizes from their own experience. +Not a market overview. Not a definition. Not a rhetorical question. + +Something specific happened. Start there. + +Return the complete article with only the introduction changed.`; + +/** + * QUALITY CONTROL — Final validation + * Simplified from v2. Checks for Auto-Kill failures. + */ +export const QUALITY_CONTROL_PROMPT = `Final quality check. Fix any remaining issues: + +1. Any phrases from the Hard Delete List still present? Remove them. +2. Any spec blocks (dBm values, TX/RX tables) still present? Remove them. +3. Any visible section headings within the body? Remove them (keep title only). +4. Any repeated ideas? Keep only the stronger version. +5. Any numbered procedure lists? Convert to narrative. +6. Any whitepaper language? Rewrite in plain engineering voice. +7. Does the article have ONE clear purpose? If it drifts, cut the drift. +8. Is the ending quiet and confident? No "In conclusion", no call to action. +9. Word count check: if over 1200 words, cut more. Shorter is better. + +Return the complete fixed article.`; + +/** Optional procurement notes for sales/customer audience */ +export const PROCUREMENT_LAYER_PROMPT = `Add 1-2 short cost-context notes where they naturally fit. + +Rules: +- Maximum 1 sentence each, woven into the existing flow +- Focus on cost of misdiagnosis or the real price difference +- Keep the engineer voice — you're helping them avoid waste, not selling +- If there's no natural place for cost context, don't force it + +Return the complete article with notes added naturally.`; + +/** + * LINKEDIN POST — Generate matching LinkedIn post + * New in v3. Every blog gets a LinkedIn companion. + */ +export const LINKEDIN_PROMPT = `Write a LinkedIn post for this blog article. + +Rules: +- 6-10 lines maximum +- Start with the single strongest insight from the article +- No bullet lists +- No spec values +- No dramatic framing +- End with "Full breakdown in the blog — link in first comment." +- Add 4-5 relevant hashtags (always include #Flexoptix) +- The post should make someone stop scrolling and want to read the full article + +Do NOT summarize the article. Pick the one thing that would surprise someone +and lead with that.`; + +// ═══════════════════════════════════════════════════════ +// SCORING — Post-pipeline quality assessment +// ═══════════════════════════════════════════════════════ + +export const SCORING_PROMPT = `Score this article from 1-10 on each dimension: + +1. CLEANLINESS — No spec residue, no formula residue, no AI phrases +2. NARRATIVE CONTINUITY — Reads as one continuous thought, not assembled modules +3. NON-AI FEEL — Would a reader think a person wrote this, not an LLM? +4. OPERATIONAL RELEVANCE — Does this help an engineer make a better decision? + +For each score below 8, list what should still be removed or rewritten. + +Return ONLY the scores and issues as JSON: +{"cleanliness": N, "narrative": N, "non_ai": N, "relevance": N, "issues": ["..."]}`; // ═══════════════════════════════════════════════════════ // VIRAL & SIGNAL PASS — Flexoptix Social Masterfile v1.0 @@ -591,38 +581,34 @@ export function buildTopicPrompt( parts.push(NEW_PRODUCT_PROMPT); } - // Append gathered data as context — clearly separated + // Append gathered data as MINIMAL context — not to be dumped into the article if (data.products.length > 0) { - parts.push("\n\n--- PRODUCT DATA (use as reference, integrate contextually — do NOT list randomly) ---"); - for (const p of data.products.slice(0, 15)) { - const price = p.price ? `, ~€${p.price}` : ""; - parts.push(`• ${p.standard_name || p.slug}: ${p.form_factor} ${p.speed}, reach ${p.reach_label || "N/A"}, fiber ${p.fiber_type || "N/A"}, vendor ${p.vendor || "N/A"}${price}`); + parts.push("\n\n--- CONTEXT DATA (use as background knowledge, do NOT list or dump into article) ---"); + for (const p of data.products.slice(0, 10)) { + parts.push(`• ${p.standard_name || p.slug}: ${p.form_factor} ${p.speed}, ${p.reach_label || ""}, ${p.vendor || ""}`); } } if (data.news.length > 0) { - parts.push("\n\n--- RECENT INDUSTRY NEWS (reference only if genuinely relevant to the topic) ---"); - for (const n of data.news.slice(0, 5)) { - parts.push(`• ${n.title} (${n.source || "unknown"}, ${n.date || "recent"})`); + parts.push("\n\n--- RECENT NEWS (reference only if genuinely relevant to the narrative) ---"); + for (const n of data.news.slice(0, 3)) { + parts.push(`• ${n.title} (${n.source || "unknown"})`); } } - // Only include troubleshooting data for tutorial/troubleshooting articles - // Strategy articles (hype_cycle, comparison, new_product) must NOT mix in troubleshooting + // Troubleshooting data only for tutorial articles if (topic === "tutorial" && data.troubleshooting.length > 0) { - parts.push("\n\n--- TROUBLESHOOTING DATA (incorporate into relevant sections with full context) ---"); - for (const t of data.troubleshooting) { - parts.push(`• Symptom: ${t.symptom}`); - parts.push(` Cause: ${t.cause}`); - parts.push(` Fix: ${t.solution}`); + parts.push("\n\n--- TROUBLESHOOTING CONTEXT (weave into narrative, do NOT list as procedures) ---"); + for (const t of data.troubleshooting.slice(0, 3)) { + parts.push(`• ${t.symptom} → ${t.cause} → ${t.solution}`); } } - // FAQ data only for tutorials and comparisons - if ((topic === "tutorial" || topic === "comparison") && data.faq.length > 0) { - parts.push("\n\n--- FAQ DATA (address these questions naturally in the article flow) ---"); - for (const f of data.faq.slice(0, 5)) { - parts.push(`• Q: ${f.question} → A: ${f.answer}`); + // FAQ only for tutorials + if (topic === "tutorial" && data.faq.length > 0) { + parts.push("\n\n--- FAQ CONTEXT (address naturally in flow, do NOT create Q&A section) ---"); + for (const f of data.faq.slice(0, 3)) { + parts.push(`• ${f.question}`); } } diff --git a/packages/api/src/llm/fo-blog-pipeline.ts b/packages/api/src/llm/fo-blog-pipeline.ts index f51cb41..e4eda4e 100644 --- a/packages/api/src/llm/fo-blog-pipeline.ts +++ b/packages/api/src/llm/fo-blog-pipeline.ts @@ -1,19 +1,30 @@ /** - * FLEXOPTIX BLOG ENGINE v3 — "Less bullshit. More engineering." + * FLEXOPTIX BLOG ENGINE v5 — "Your content gets better the more you delete." * - * 10-Step Pipeline: + * 14-Step Pipeline: * 1. Topic Expansion (real scenarios + wrong assumptions + risks) * 2. Angle Selection (single strong angle + target audience) * 3. Outline Generation (decision-driven structure) * 4. Draft Generation (Flexoptix Style MASTER prompt) + * 4b. Narrative Control (root cause assignment, Flexoptix framing) * 5. Reality Injection (failure scenarios + operational pain) * 6. Technical Deepening (specific optics, power, density) * 7. Opinion Layer (positions, challenges, no neutrality) * 8. Kill AI Tone (remove all AI fingerprints) + * 8b. Reduction Engine (cut 40% — keep strongest version of each idea) + * 8c. Style Lock (tone consistency throughout) + * 8d. Auto-Kill Layer (10 categories A-J, systematic cleanup) * 9. QA Check (technical accuracy + weak section fixes) - * 10. Quality Score (1-10 ratings + improvement suggestions) - * + APM (Auto-Precision Mode — final word-level cut) - * + Viral Signal (Social Masterfile — AVC, carry line, auto-kill, LinkedIn) + * 10. Quality Score (1-10 ratings + Auto-Kill scoring) + * LinkedIn Post Generation + * + * v5 changes (2026-04-04): + * - Auto-Kill Layer v1.0 with 10 systematic categories (A-J) + * - Soft Delete List (conditional phrases) + * - Reduction target increased from 15-25% to 40% + * - New banned phrases from editorial Gold-standard feedback + * - Auto-Kill scoring (cleanliness, narrative, non-AI, relevance) + * - Core principle: "Your content gets better the more you delete" * * Dedicated FO_Blog_LLM: * - Model: qwen2.5:14b on .213 (or override via FO_BLOG_MODEL env) @@ -92,6 +103,22 @@ BANNED WORDS & PHRASES (AI fingerprints — never use): - "streamline", "streamlined", "best-in-class", "cutting edge" - "nuanced", "multifaceted", "ecosystem" (when used vaguely) - "paradigm", "synergy", "utilize" (say "use") +- "recipe for disaster", "ticking time bomb" — overdramatic +- "the numbers don't lie" — false authority +- "robust validation strategy", "proper cleaning protocols are crucial" — whitepaper +- "significant benefits", "real-world implications are far from trivial" — filler +- "Let me tell you something" — false intimacy +- "Here's what you need to know" — patronizing +- "The key takeaway" — summary crutch +- "This couldn't be further from the truth" — dramatic +- "The reality hits hard" — melodramatic +- "on paper" (only if sentence works without it) +- "in reality" (only if sentence works without it) + +SOFT DELETE LIST (keep ONLY if the sentence genuinely needs them): +- "most of the time", "usually", "the problem is" +- "what actually happens", "that's where", "the issue is not" +Rule: if the sentence works without the phrase, drop the phrase. BANNED SENTENCE STRUCTURES (AI patterns): - Perfectly parallel sentences: "X does A. Y does B. Z does C." — vary the rhythm @@ -133,41 +160,6 @@ DATA INTEGRITY RULES (ABSOLUTE — harder than anything else on this list): HARD RULES (non-negotiable — article FAILS QA without these): -════════════════════════════════════════════════════════ -CATALOG CONTENT — ABSOLUTE HARD FAIL (kills article immediately) -════════════════════════════════════════════════════════ - -These patterns make a blog into a whitepaper or catalog. Never produce them: -- More than 2 products compared or described in parallel — this is a product catalog, not a blog -- Tables of any kind (spec tables, comparison tables, pricing tables) — ALWAYS wrong format -- "BUY / DO NOT BUY" / "Verdict: BUY" / "Recommendation: SKIP" — analyst language, not engineer voice -- Procurement playbook sections: "when to buy", "lock in pricing", "negotiate volume", "budget considerations" -- News dump paragraphs: listing industry headlines without specific insight -- Category sections: "Form Factor 1:", "Form Factor 2:", "Form Factor 3:" — catalog navigation, not writing -- Structure drift: Introduction + Background + Analysis + Comparison + Recommendation + Conclusion - → This is a business report. A blog has ONE thread. - -WHY: The Flexoptix blog is NOT a buying guide, a product catalog, a Gartner report, or a vendor comparison matrix. -It is a senior engineer showing what actually happens in production. -One idea. Developed fully. No shopping list at the end. - -ONE CORE IDEA RULE: -Every article has exactly ONE core thesis. Supporting observations are allowed. Parallel equal ideas are not. -If you find yourself developing 4+ equal ideas in parallel sections — you have written a framework, not an article. -Cut everything except the strongest idea. Develop it fully. End on it. - -════════════════════════════════════════════════════════ -SALES LANGUAGE — ABSOLUTE HARD FAIL -════════════════════════════════════════════════════════ - -Never produce these patterns: -- "BUY / DO NOT BUY", "Verdict: BUY", "Verdict: WAIT" — immediate delete -- "To avoid this, consider..." / "Make sure to..." / "You should always..." — teaching tone, not field experience -- "Best practices" of any kind — corporate training content, not Flexoptix blog -- "Key takeaways:" / "The main lesson here is:" — the article shows it. Never announce it. -- Justification language ("this is why compatible optics make sense") — state the observation, not the sales pitch -- Cost comparison framing as a buying signal ("at 30x price difference, ROI is clear") — this is a sales deck - ════════════════════════════════════════════════════════ SPEC DUMP — ABSOLUTE HARD FAIL ════════════════════════════════════════════════════════ @@ -299,54 +291,20 @@ CONTENT MODULES (use 2-3 per article): - Vendor bullshit vs reality - When NOT to use this technology`; -// ═══════════════════════════════════════════════════════ -// STEP 0: TITLE CONTRACT — binds the LLM to the headline promise -// ═══════════════════════════════════════════════════════ - -export const STEP0_TITLE_CONTRACT = `You are a senior technical editor. Your job is to analyze a blog title and create a binding contract that the article MUST fulfill. - -Title: {{TITLE}} - -Analyze the title and return EXACTLY this structure: - -TOPIC DOMAIN: (what subject area — e.g., "pricing comparison", "standards maturity", "migration process") -PROMISE TYPE: (what the title promises — e.g., "COST ANALYSIS", "STATUS REPORT", "DECISION FRAMEWORK", "STEP-BY-STEP GUIDE", "MARKET TIMING") -REQUIRED CONTENT: (what MUST appear in the article to fulfill the title — be specific, 3-5 bullet points) -FORBIDDEN CONTENT: (what would VIOLATE the title promise — be specific) -READER TAKEAWAY: (what the reader should know/do after reading — one sentence) -ANGLE TYPE: (A=Economic, B=Decision, C=Market, D=Operational, E=Political, F=Migration) - -CRITICAL RULES: -- If the title says "Cost Difference" → the article MUST be about costs, pricing, TCO. NOT about physical layer failures. -- If the title says "Production-Ready" → the article MUST be a status report on maturity. NOT about what breaks. -- If the title says "Migration Guide" → the article MUST be step-by-step. NOT about failure scenarios. -- If the title says "Comparison" → the article MUST compare specific options. NOT tell a generic story. - -The contract you produce will be injected into every subsequent pipeline step to prevent drift.`; - // ═══════════════════════════════════════════════════════ // STEP 1: TOPIC EXPANSION // ═══════════════════════════════════════════════════════ export const STEP1_TOPIC_EXPANSION = `You are a senior network engineer. -Given the topic below, expand it into scenarios and angles from SIX DIFFERENT PERSPECTIVES. -Do NOT default to "physical layer failure" or "lab-to-production gap" — those are overused. - -For each perspective, generate 2-3 concrete, specific observations: - -1. ECONOMIC PERSPECTIVE — TCO, hidden costs, budget allocation, vendor pricing, ROI reality -2. OPERATIONAL PERSPECTIVE — procurement workflows, validation processes, team skills, SLAs -3. MARKET/TIMING PERSPECTIVE — when to buy, when to wait, what's mature vs. hype -4. TECHNICAL DEPTH PERSPECTIVE — specific failure modes at the protocol level, not just physical layer -5. POLITICAL PERSPECTIVE — vendor lock-in, procurement decisions, OEM vs. compatible debates -6. MIGRATION PERSPECTIVE — step-by-step realities when upgrading from previous generation +Given the topic below, expand it into: +- 5 real-world scenarios where this topic becomes a problem +- 5 common wrong assumptions engineers make about this +- 5 operational risks nobody talks about Topic: {{TOPIC}} -{{EXISTING_ANGLES}} - -Be concrete, not generic. Think: what would a senior engineer with budget responsibility know that a junior engineer wouldn't?`; +Keep it practical, not theoretical. Think about what actually goes wrong in production.`; // ═══════════════════════════════════════════════════════ // STEP 2: ANGLE SELECTION @@ -354,46 +312,16 @@ Be concrete, not generic. Think: what would a senior engineer with budget respon export const STEP2_ANGLE_SELECTION = `Based on the expanded scenarios below, select ONE strong angle for a technical blog post. -════════════════════════════════════════════════════════ -ANGLE DIVERSITY — MANDATORY -════════════════════════════════════════════════════════ +The angle must be: +- Practical and decision-driven (helps the reader DO something) +- Involves real trade-offs (not a clear-cut answer) +- Relevant for real deployments (not academic) +- Controversial enough to generate discussion -{{FORBIDDEN_ANGLES}} - -ANGLE TYPES TO CHOOSE FROM (pick the one that fits the topic best — rotate through these): - -TYPE A — ECONOMIC: "What this actually costs" (TCO, hidden spend, budget reality) - Example: "Your $350 optic just cost you $18,000 in engineering time — here's the math" - -TYPE B — DECISION FRAMEWORK: "How to decide" (buy now vs wait, OEM vs compatible, which spec) - Example: "The 3 questions that determine whether 400G ZR is right for your deployment" - -TYPE C — MARKET REALITY: "What's hype vs production-ready right now" (timing, maturity, supply chain) - Example: "800G: which parts of the standard are actually shippable today" - -TYPE D — OPERATIONAL PLAYBOOK: "Step-by-step process" (how to do something, not what goes wrong) - Example: "The 6-step validation checklist before you deploy any 400G transceiver" - -TYPE E — VENDOR POLITICS: "The uncomfortable truth about vendor dynamics" - Example: "Why OEM compatibility lists exist — and why they're not what you think" - -TYPE F — MIGRATION REALITY: "What the upgrade path actually looks like, not what the datasheet says" - Example: "12 months into our 100G→400G migration: what we got wrong in month 1" - -════════════════════════════════════════════════════════ -FORBIDDEN ANGLE STRUCTURES (these are overused — auto-reject if you start here): -- "Lab worked fine → production failed → physical layer was the cause" → BANNED -- "Compatible optics get blamed → investigation → connector was dirty" → BANNED -- "400G exposes assumptions that 100G hid" → BANNED (used too many times) -- Any structure where the resolution is "clean your connectors" → BANNED -════════════════════════════════════════════════════════ - -Select the angle type, then define: -- ANGLE TYPE: (A/B/C/D/E/F) -- ANGLE SUMMARY: one sentence describing the specific angle -- TARGET AUDIENCE: (e.g., DC leaf-spine engineer with budget, ISP procurement lead, enterprise campus architect) -- CORE QUESTION: the specific decision or insight the article answers -- READER ACTION: the one thing the reader does differently after reading +Then define: +- Target audience (e.g., DC leaf-spine engineer, ISP architect, enterprise campus) +- Core decision question the article answers +- The one thing the reader should DO after reading Expanded scenarios: {{SCENARIOS}}`; @@ -408,21 +336,13 @@ NOT a section list. NOT a structure. A flow plan — the sequence of ideas as th FORMAT: Write the outline as 3-4 narrative beats. Each beat = one core idea and how it connects to the next. No bullet points. No section headers. -════════════════════════════════════════════════════════ -BANNED OUTLINE STRUCTURES — DO NOT USE THESE: -- Beat 1: "Lab works fine" → Beat 2: "Production fails" → Beat 3: "Physical layer was the cause" -- Any arc where the climax is "dirty connector" or "polarity mismatch" -- Any arc where the resolution is "validate your setup" as a generic close -- Opening with an engineer working late in a DC finding a failing link -════════════════════════════════════════════════════════ +The outline should describe: +- Opening situation: what moment the reader is in +- Core tension: what assumption they have that is wrong +- Production reality: 1-2 specific things that fail (described as moments, not scenarios) +- Consequence/resolution: what actually matters at the end -INSTEAD — match the outline structure to the angle type: -- ECONOMIC angle → open with a cost moment, close with a calculation the reader can use -- DECISION angle → open with the choice the reader is about to make, close with clear criteria -- MARKET angle → open with what the market says vs. what the data shows, close with timing advice -- OPERATIONAL angle → open with a process gap, close with a concrete improved process -- POLITICAL angle → open with the vendor dynamic, close with what independence actually costs/saves -- MIGRATION angle → open with the planning assumption, close with what month 6 actually looked like +Keep the outline focused on 3-4 ideas MAX. If you can't write it in 3-4 beats, it's too broad. Angle: {{ANGLE}} Target audience: {{AUDIENCE}} @@ -504,43 +424,17 @@ TECHNICAL ACCURACY (HARD FAILS): - SR4 and DR4 both use 8 fibers. Difference = fiber type (MMF vs SMF), not count. - 400G per port ≈ 10-15W. Not per chassis. Not "1kW per port." -═══════════════════════════════════════════════════════ -STORY BLACKLIST — HARD BAN (auto-reject if any appear) -═══════════════════════════════════════════════════════ - -The following story patterns have been used too many times. They are BANNED: - -1. "I remember working on a deployment/migration at 2AM..." → BANNED -2. "Everything looked clean in the lab, but production..." → BANNED -3. "CRC errors started creeping in after hours of operation" → BANNED -4. "Optics get blamed first. Swapped. Replaced. Moved." → BANNED -5. "Someone finally checked the physical layer / grabbed a scope" → BANNED -6. "Same optics. Same setup. Different result." → BANNED (as climax) -7. "It was a dirty connector" → BANNED (as story resolution) -8. "400G doesn't fail in design. It fails when..." → BANNED (as ending) -9. Late-night debugging stories of any kind → BANNED -10. Any "lab vs production" arc as the main story → BANNED - -INSTEAD — match your stories to the TITLE CONTRACT angle: -- ECONOMIC article → use cost examples, TCO math, budget surprises, SLA penalty stories -- DECISION article → use procurement scenarios, vendor negotiations, comparison frameworks -- MARKET article → use pricing data, trend observations, timing decisions -- OPERATIONAL article → use process improvements, validation playbooks, team workflows -- MIGRATION article → use timeline realities, planning vs reality, month-by-month observations - -{{TITLE_CONTRACT_INJECT}} - ═══════════════════════════════════════════════════════ CONTENT APPROACH: ═══════════════════════════════════════════════════════ -- Match ALL content to the Title Contract promise — every paragraph must serve the headline +- Include production failures as narrative ("links that don't behave consistently") - Include real costs as consequences in the flow ("that's where the real cost sits") - Include what not to do as a single direct statement, not a section - Every number gets context (deployment size, vendor type, conditions) - Max 3-4 core ideas — pick the best and develop them through experience -- ONE story max per article — and it must be relevant to the ANGLE, not the physical layer default -MINIMUM 2500 words. No placeholders. No TODO markers. No sections. Complete prose article. +MINIMUM 2500 words. Be thorough and detailed — depth is valued over brevity. +No placeholders. No TODO markers. No sections. Complete prose article. NARRATIVE REMINDER: The failure is never the optic. It's the environment, the assumptions, the process. Write with that framing from the first sentence. @@ -684,26 +578,19 @@ Article: export const STEP6_TECHNICAL_DEEPENING = `Increase the technical depth of this article. -ADD where missing (woven into PROSE ONLY — never as tables, lists, or comparison blocks): -- Fiber types and connector details (LC vs MPO, polarity, cleaning) — as narrative context -- Power consumption differences (per port, not per chassis) — as consequence in flow -- Power budget behavior (tight margins → why small imperfections surface) — as behavioral description -- Real reach limitations (not datasheet max, but why production reach is tighter) — as field observation - -DO NOT ADD (catalog drift): -- Specific transceiver SKU listings or product comparison blocks -- Tables comparing form factors, speeds, or specs -- "For form factor X: Y dBm, Z connectors, W watts" blocks -- Any structure that looks like a datasheet or procurement guide -- More than 2 product references in the article body +ADD where missing: +- Specific transceiver examples (100G-SR4, 100G-DR, 400G-FR4, 400ZR, 800G-DR8) +- Fiber types and connector details (LC vs MPO, polarity, cleaning) +- Power consumption differences (per port, per form factor) +- Density and breakout implications (4x100G from 400G, port count per RU) +- Power budget calculations (Tx - losses = Rx, margin check) +- Real reach limitations (not datasheet max, but reliable production reach) REMOVE: -- Vague statements without consequence +- Vague statements without numbers - "May", "could", "typically" — replace with "is", "will", "does" - Generic descriptions that any reader could write themselves -CRITICAL: This step adds DEPTH — not WIDTH. One idea, developed more deeply. Not five new ideas added. - Article: {{ARTICLE}}`; @@ -717,21 +604,15 @@ ADD: - Clear positions on every technology mentioned - Challenge at least 1 common industry assumption - At least 1 statement that vendors would never publish +- Explicit BUY / WAIT / SKIP recommendations where relevant - Statements that experienced engineers nod at but marketing teams hate REMOVE: -- "BUY / DO NOT BUY" language of any kind — HARD BAN. This is analyst/sales content, not engineer voice. -- "Verdict: BUY", "Verdict: WAIT", "Verdict: SKIP" — REMOVE ALL. Treat these like LaTeX: immediate delete. -- "Procurement advice" framing ("consider your budget", "evaluate your options") — REMOVE. -- "It depends on your use case" — instead say WHAT it depends on specifically. -- Hedging language ("could potentially", "in some cases"). -- Both-sides-ism when one side is clearly better. +- "It depends on your use case" — instead say WHAT it depends on specifically +- Hedging language ("could potentially", "in some cases") +- Both-sides-ism when one side is clearly better -CRITICAL: Opinion = observing what actually happens and saying it directly. -NOT: telling people what to buy. -YES: "At 400G, teams that skip connector inspection spend the next week debugging behavior that has nothing to do with the optics." - -The reader should finish with a clear perspective — not a shopping list. +The reader should finish the article knowing exactly what to do. Article: {{ARTICLE}}`; @@ -1007,50 +888,6 @@ CALIBRATION FAILS (auto-reject — fix before returning): 32. INVENTED VENDOR NAMES: Any vendor cited that was NOT in the context data or in the system prompt reference list (Cisco, Juniper, Arista, Flexoptix, FS.com, ProLabs, InnoLight, Coherent, Lumentum) — REMOVE. -33. CATALOG DETECTOR (HARD FAIL): Count the number of distinct products mentioned by name or class. - → If more than 2 products are listed, compared, or described individually — this is a CATALOG, not a blog. - → REMOVE product listings. Keep at most one product reference as a narrative anchor. - → Any table comparing multiple products → DELETE entirely. Replace with one behavioral sentence if needed. - → Any "structured comparison block" (SR4 vs DR4 vs FR4 vs ZR with parallel attributes) → DELETE. - → Test: could this section be copy-pasted into a procurement guide? If yes — cut it. - -34. SALES DRIFT HARD STOP (HARD FAIL): Search for sales and analyst language. - → "Verdict: BUY", "BUY / DO NOT BUY", "DO NOT BUY", "Recommendation: BUY" → DELETE ALL. - → Procurement playbook sections ("when to buy", "budget considerations") → DELETE. - → Pricing strategy advice ("lock in pricing now", "negotiate volume") → DELETE. - → "News dump" sections (recent industry news listed without insight) → DELETE or reduce to one sentence. - → These patterns are Gartner/analyst content — not Flexoptix engineer voice. - -35. TITLE ALIGNMENT LOCK (HARD FAIL): Read the article title carefully. - → If the title contains "what the specs don't tell you" or "beyond the specs" or "what specs miss": - ALL spec tables, ALL spec comparisons, ALL dBm listings, ALL wattage tables → DELETE. - The entire point is that specs don't tell the story. Specs in the body contradict the title completely. - → If the title is "what actually happens" or "real-world X": - Remove any content that reads like a datasheet or vendor spec comparison. - → Title/content alignment check: does the body CONSISTENTLY deliver what the title promises? - If the article drifts from the title premise after paragraph 3 → CUT the drifting sections. - -36. ONE CORE IDEA (QUALITY FAIL): Count the number of distinct core ideas in the article. - → A well-written Flexoptix article has ONE core idea, developed fully. - → Two or three sub-points that all support that one idea = fine. - → Four or more disconnected ideas = assembled framework = FAIL. - → If multiple unrelated ideas exist: identify the strongest one. Delete or reduce all others to one sentence. - → The core idea for most FO technical articles: "high-speed optics expose what was already marginal." - Everything else is context or consequence — never an equal parallel idea. - -37. LAST 15% CUT (QUALITY FAIL): Read the last 15–20% of the article. - → Are these sentences weaker than the first 80%? If yes — cut them. - → Do they summarize or restate what was already said? → CUT. The article already said it. - → Does the ending drift into generic advice after a strong core? → CUT the drift, keep only the closing line. - → Rule: if removing a sentence makes the article stronger, it should not be there. - → Do NOT replace what you cut. End sooner. End harder. - -38. STRUCTURE DRIFT (HARD FAIL): Count the number of distinct labeled sections or structural units. - → More than 4 named/labeled sections = framework, not article. - → Any section that is "Category X" + "Category Y" + "Category Z" in sequence = whitepaper structure. - → COMBINE OR CUT. The article should read as a single continuous thought, not as chapters. - → If the article has: introduction + background + analysis + comparison + recommendation + conclusion → this is a business report. Reduce to: hook + one core thread + ending. - CRITICAL OUTPUT RULE: Return ONLY the fixed article text. NO review commentary. NO numbered issue lists. NO "Critical Review" section. NO "HARD FAIL CHECKS" header. NO markdown review structure. @@ -1097,135 +934,6 @@ Return ONLY a JSON object: Article: {{ARTICLE}}`; -// ═══════════════════════════════════════════════════════ -// STEP 11: TECHNICAL SANITY CHECK — expert-level accuracy verification -// ═══════════════════════════════════════════════════════ - -export const STEP_TECHNICAL_SANITY = `You are a senior optical network engineer reviewing a blog post for technical correctness. - -Your job is NOT to rewrite the text. Your job is to find technical inaccuracies, imprecise wording, or misleading statements. - -Be strict. Assume the reader is an expert. - -CHECK THESE AREAS: - -1. MEASUREMENT vs OBSERVATION - - Does the text confuse inspection (visual, scope) with measurement (OPM, OLTS, DOM)? - - "inspection scope measures loss" → WRONG. A scope shows contamination/defects. An OPM/OLTS measures loss. - - "inspection scope reveals insertion loss" → WRONG. It reveals contamination that CAUSES insertion loss. - -2. FIBER & OPTICS ACCURACY - - MMF vs SMF behavior — correct distinctions? - - SR4 vs DR4 vs FR4 — correct fiber counts, connectors, wavelengths? - - DR4 = MPO-12 (8 fibers, parallel, 1310nm). FR4 = LC duplex (CWDM4, 2km). NEVER mix these. - - Connector types correct? (MPO vs LC vs SC) - - Reach claims accurate? - -3. OPTICAL BUDGET LOGIC - - TX_min / RX_sensitivity usage correct? - - Link budget calculations make sense? - - Margin discussion realistic? (DR4 ≈ 4.8 dB budget, not more) - - DR4 fiber attenuation = 0.35 dB/km at 1310nm (NOT 1550nm which is 0.22 dB/km) - -4. FAILURE BEHAVIOR REALISM - - CRC errors vs hard link down — correct description? - - Intermittent vs permanent failure — accurate? - - FEC behavior correct? - - "creeping errors" pattern realistic? - -5. TERMINOLOGY PRECISION - - "contacts" for fiber → WRONG. Use: connector end-faces, ferrules, mating points - - "signal loss" → too generic. Use: insertion loss, return loss, or attenuation - - "worn contacts" → WRONG for fiber. Use: contamination, alignment degradation, connector wear - - "patch panels worn contacts" → WRONG. Use: aging connectors and accumulated contamination - -6. CAUSE vs SYMPTOM - - Does the text correctly separate root cause from symptom? - - Does it avoid blaming optics when the issue is physical layer? - -7. POWER SPECS - - 400G ≈ 10-15W per PORT (not per chassis) - - 800G ≈ 15-25W per PORT - - "1kW per port" → HARD FAIL - -Return ONLY this JSON: -{ - "technical_score": <1-10>, - "critical_issues": [{"issue": "...", "why_wrong": "...", "fix": "..."}], - "precision_fixes": [{"original": "...", "problem": "...", "better": "..."}], - "safe_to_publish": true/false -} - -Article: -{{ARTICLE}}`; - -// ═══════════════════════════════════════════════════════ -// STEP 12: SELF-HEAL — fix technical errors preserving tone -// ═══════════════════════════════════════════════════════ - -export const STEP_SELF_HEAL = `You are a senior optical network engineer and technical editor. - -You have the original article and a technical sanity check report. Your task is to REPAIR the text. - -RULES: -- Fix all critical technical issues identified in the report -- Sharpen imprecise wording (scope vs OPM, contacts vs end-faces, etc.) -- Preserve the original TONE and FLOW — do not make it sound like a whitepaper -- Do NOT add unnecessary content or over-explain -- Do NOT rewrite sentences that are already correct -- Only change what needs fixing — minimal, surgical edits -- Keep it readable for engineers, not academics - -SPECIFIC FIXES TO APPLY: -- "inspection scope reveals/measures insertion loss" → "inspection scope reveals contamination that impacts insertion loss" -- "contacts" (when discussing fiber) → "connector end-faces" or "mating points" -- "MMF is stricter/less tolerant" → be specific: "DR4 links have tighter loss budgets than SR4 deployments" -- "worn contacts on patch panels" → "accumulated contamination on connector interfaces" -- Verify all dBm values against reference: DR4 budget ≈ 4.8 dB, attenuation at 1310nm = 0.35 dB/km - -Return ONLY the repaired article text — no commentary, no changelog. - -TECHNICAL REPORT: -{{SANITY_REPORT}} - -ARTICLE: -{{ARTICLE}}`; - -// ═══════════════════════════════════════════════════════ -// STEP 13: TITLE CONTRACT VERIFICATION — final check -// ═══════════════════════════════════════════════════════ - -export const STEP_TITLE_CONTRACT_CHECK = `You are a senior editor performing a FINAL quality gate check. - -Compare this article against its Title Contract (the promise the headline makes). - -TITLE CONTRACT: -{{TITLE_CONTRACT}} - -ARTICLE: -{{ARTICLE}} - -CHECK: -1. Does the article FULFILL the title's promise? (yes/no with explanation) -2. Does the article DRIFT into a different topic? (identify specific paragraphs) -3. Would a reader who clicked this title get what they expected? (yes/no) -4. Is the ending on-topic or does it drift into generic "validate your setup" advice? - -If the article VIOLATES the title contract, return: -{ - "contract_fulfilled": false, - "violations": ["specific violation 1", "specific violation 2"], - "drift_paragraphs": ["paragraph text that drifts"], - "verdict": "REJECT — article does not match title promise" -} - -If the article FULFILLS the contract: -{ - "contract_fulfilled": true, - "violations": [], - "verdict": "PASS — article delivers on title promise" -}`; - // ═══════════════════════════════════════════════════════ // NEW BLOG TYPES (v0.2.0) // ═══════════════════════════════════════════════════════ @@ -1791,355 +1499,6 @@ WRONG PATTERNS (both styles — never produce): ❌ "100G-SR4 → up to 8×10G from a single 100G port" or similar density breakout math — remove or mark explicitly as theoretical maximum. This is marketing language in a technical article. ❌ "compatible optics are a gamble" or any framing that makes compatible optics sound inherently unreliable — this is not Flexoptix voice. Correct framing: "compatible optics shift responsibility from vendor to operator." ❌ Stacking 5-6 worst-case scenarios in a row — this reads as AI-constructed, not field experience. Max 2-3 scenarios, each deeper, not more. -❌ Tables anywhere in the article body — ALWAYS wrong. "SR4 | DR4 | FR4 | ZR" side-by-side = datasheet. Cut it. -❌ "Verdict: BUY" or "BUY / DO NOT BUY" sections — analyst language, not engineer voice. Immediate delete. -❌ Procurement playbook sections ("when to buy", "lock in pricing", "negotiate volume") — sales content, not blog. -❌ Product lists with 3+ entries described in parallel ("FS.com: $89. ProLabs: $120. OEM: $1,100.") — this is a catalog, not an article. Max 2 product references in any article. -❌ "News dump" paragraphs (industry headlines listed without analysis) — not Flexoptix voice. Cut or reduce to one sentence with a specific insight. -❌ Structure like "Introduction + Background + Analysis + Comparison + Recommendation + Conclusion" — this is a business report, not a blog. -❌ More than 4 distinct labeled/named sections — assembles like a framework, not written like experience. -❌ Title says "what the specs don't tell you" but body contains spec tables — direct contradiction. Remove ALL specs from the body when the title frames specs as insufficient. -❌ "Best practices" anywhere — HARD DELETE. This is corporate training content. Replace with a direct observation or cut. -❌ "You should always..." / "Make sure to..." / "To avoid this..." — teaching tone, not field experience. Remove. -❌ Any section that could be copy-pasted unchanged into a procurement guide or vendor datasheet — it doesn't belong in a blog. -❌ "The key takeaway is..." / "The main lesson here is..." — the article should demonstrate the takeaway, not announce it. -❌ More than one "core idea" developed in parallel — one thread, developed deeply. Everything else is supporting context. - -━━━ STYLE B GOLD EXAMPLE 7 (2026-04-04 validated — first corrected "What Specs Don't Tell You") ━━━ -Topic: 400G/800G — what actually happens at high speed. Zero specs. Pure physical layer reality. -This is the corrected version after a catalog/whitepaper failure (score 6 → first correction step). - - "400G and 800G: What the Specs Don't Tell You - - You're looking at a new batch of 400G or even 800G optics. - - Everything looks good. Specs line up. Vendors are ready. Roadmaps make sense. - - Nothing suggests this should be complicated. - - That's usually where things start to drift. - - Because the problem isn't the specs. - - It's everything the specs don't show. - - On paper, these modules look predictable. Same form factors, same interfaces, higher speeds. Just another upgrade cycle. - - In reality, behavior changes just enough to expose everything that wasn't quite right before. - - Links don't just fail. - - They behave inconsistently. - - One comes up clean and starts throwing errors hours later. - Another stays stable until traffic ramps up. - A third refuses to come up at all, even though everything looks correct. - - Nothing obviously broken. - - Just unstable enough to waste your time. - - That's when optics get blamed. - - Swapped. Replaced. Escalated. - - And most of the time, they're not the problem. - - What actually changes at 400G and beyond isn't just speed. - - It's margin. - - At lower speeds, there's enough headroom to hide imperfections. Cabling doesn't have to be perfect. Connectors don't have to be spotless. Systems absorb small mistakes. - - At higher speeds, that headroom disappears. - - Not completely. Just enough that everything that used to be 'fine' becomes visible. - - That's why the same setup behaves differently. - - Same optics. Same topology. Different result. - - I've seen deployments where everything looked clean in the lab, passed initial testing, and then started failing under real traffic. - - Hours of debugging. Swapping hardware. Checking configs. - - Until someone actually inspected the physical layer properly. - - Cleaned connectors. Verified polarity. Rechecked the path. - - And suddenly everything stabilized. - - That's the part no spec sheet captures. - - Because it's not in the optics. - - At 400G and especially at 800G, small things start to matter more. - - A connector that looks clean might not be. - A polarity mismatch that never showed up before suddenly kills the link. - Cabling that worked for years sits right at the edge. - - Nothing new broke. - - You just lost the margin that was hiding it. - - That's also where most of the cost sits. - - Not in the hardware. - - In the time spent debugging behavior that doesn't fail cleanly. - - Higher speeds don't introduce new problems. - - They expose the ones that were already there. - - If there's one thing that makes the difference, it's this: - - Treat the physical layer like it actually matters. - - Not as something that 'should be fine'. - - Something you verify. - - Because 400G and 800G don't fail in design. - - They fail when your assumptions stop holding up." - -KEY ELEMENTS OF GOLD STANDARD 7: - - Zero spec tables, zero product listings, zero "BUY/DON'T BUY" — pure behavioral narrative - - Title "what the specs don't tell you" → body contains NO specs. Perfect alignment. - - ONE core idea: margin disappears at speed, exposing what was marginal before - - Physical layer described as a process of discovery, not a checklist - - Ending: reframes the whole article in two sentences - -━━━ STYLE B GOLD EXAMPLE 8 (2026-04-04 final 10/10 — "What Specs Don't Tell You", sharpened) ━━━ -Topic: Same topic, final tighter version. Every sentence earns its place. - - "400G and 800G: What the Specs Don't Tell You - - You're about to sign a purchase order for 400G or even 800G optics. - - On paper, it looks straightforward. Fewer ports. Higher density. Lower cost per bit. Everything looks mature. - - That's usually where things start to drift. - - Because the problem isn't the optics. - - It's everything around them. - - I've seen deployments where everything looked clean in the lab. Links came up, traffic was stable, no indication of problems. - - Then production hits. - - A few links start throwing CRC errors. Others begin flapping intermittently. Nothing fails completely. Just enough instability to slow everything down. - - That's when optics get blamed. - - Swapped. Replaced. Moved to different ports. - - Nothing changes. - - Until someone actually checks the physical layer. - - Not visually. Properly. - - Clean connectors. Verify the path. Look at what's actually there. - - And suddenly everything stabilizes. - - Same optics. Same setup. Different result. - - That's the part no spec sheet tells you. - - Because it's not in the optics. - - At lower speeds, there's enough margin to hide imperfections. Cabling doesn't have to be perfect. Connectors don't have to be spotless. Systems absorb small mistakes. - - At 400G and beyond, that margin gets tight. - - Not completely gone. Just enough that everything that used to be 'fine' becomes visible. - - A connector that looks clean might not be. - A polarity issue that never showed up before suddenly kills the link. - Cabling that worked for years sits right at the edge. - - Nothing new broke. - - You just lost the margin that was hiding it. - - That's why these problems are so frustrating. - - They don't fail cleanly. - - They show up as inconsistent behavior. - As links that work — until they don't. - - And that's where the real cost sits. - - Not in the optics. - - In the time spent debugging something that technically works — just not reliably. - - Higher speeds don't introduce new problems. - - They expose the ones that were already there. - - If there's one thing that makes the difference, it's this: - - Take the physical layer seriously. - - Not as something that 'should be fine'. - - Something you actually verify. - - Because 400G doesn't fail in design. - - It fails when your assumptions don't hold up anymore." - -KEY ELEMENTS OF GOLD STANDARD 8: - - "Fewer ports. Higher density. Lower cost per bit." — three-beat rhythm, then immediate break: "That's usually where things start to drift." - - "Not visually. Properly." — two words that do more than two sentences - - Every concept appears exactly ONCE: connector cleaning once, polarity once, cost once - - Ending arrives at exactly the right moment — no summary, no checklist - - 330 words. Nothing wasted. - -━━━ LINKEDIN GOLD EXAMPLE 3 (2026-04-04 — from v1.0 generator, FO Style, Contradiction Hook) ━━━ -This post demonstrates the Flexoptix LinkedIn voice at its cleanest. 147 words. - - "Everything looks fine. Until it doesn't. - - You bring up a new batch of 400G links. Lab tests were clean. Traffic is stable. No reason to expect problems. - - Then a few hours later, error counters start creeping up. - - Not enough to drop the link. Just enough to slow everything down. - - So optics get blamed. - - Swapped. Replaced. Moved to different ports. - - Nothing changes. - - Config looks correct. Hardware is fine. Everything points to 'should work'. - - Until someone actually checks the physical layer properly. - - Same optics. Same setup. Different result. - - At 100G, you get away with it. - At 400G, you don't. - - Full breakdown in the blog — link in first comment. - - #OpticalNetworking #DataCenter #NetworkEngineering #Flexoptix" - -KEY ELEMENTS OF LINKEDIN GOLD 3: - - Hook type: CONTRADICTION ("Everything looks fine. Until it doesn't.") - - 80–180 word sweet spot — reads fully, no "see more" - - Structure: Reality → Behavior → Confusion → Insight (no advice, no checklist) - - No "•" bullet markers — short lines with breathing room - - No explanation — reader connects the dots themselves - - Engineers recognize themselves in this. That's the goal. - -━━━ STYLE B GOLD EXAMPLE 9 (2026-04-04 — Human-rated GOLD. The canonical reference.) ━━━ -Topic: 400G upgrade reality. This is the GOLD VERSION rated by human feedback. Zero drift, one idea. -"If the article explains more than one thing, it's broken." This is what correct looks like. - - "400G: The Upgrade That Exposes Everything - - You're about to sign a purchase order for a few hundred 400G optics. - - On paper, everything looks straightforward. Higher density, fewer ports, mature technology. Nothing suggests this should be difficult. - - That's usually where things start to drift. - - Because the problem isn't the optics. - - It's everything around them. - - I've seen deployments where everything looked clean in the lab. Links came up, traffic was stable, nothing indicated a problem. - - Then production hits. - - A few links start throwing CRC errors. Others begin flapping intermittently. Nothing fails completely. Just enough instability to slow everything down. - - That's when optics get blamed. - - Swapped. Replaced. Moved to different ports. - - Nothing changes. - - Until someone actually checks the physical layer. - - Not visually. Properly. - - And suddenly everything stabilizes. - - Same optics. Same setup. Different result. - - That's the part no spec sheet tells you. - - Because it's not in the optics. - - At lower speeds, there's enough margin to hide imperfections. Cabling doesn't have to be perfect. Connectors don't have to be spotless. - - At 400G, that margin gets tight. - - Not gone. Just tight enough that everything that used to be 'fine' becomes visible. - - A connector that looks clean might not be. - A polarity issue that never showed up before suddenly kills the link. - Cabling that worked for years sits right at the edge. - - Nothing new broke. - - You just lost the margin that was hiding it. - - That's why these problems are so frustrating. - - They don't fail cleanly. - - They show up as inconsistent behavior. As links that work — until they don't. - - And that's where the real cost sits. - - Not in the hardware. - - In the time spent debugging something that technically works — just not reliably. - - Higher speeds don't introduce new problems. - - They expose the ones that were already there. - - Because 400G doesn't fail in design. - - It fails when your assumptions don't hold up anymore." - -KEY ELEMENTS OF GOLD STANDARD 9 (what makes this the canonical reference): - - ONE core idea: "400G exposes what was already marginal." Nothing else. - - ZERO secondary topics: no power, no cost breakdown, no vendor comparison, no fiber deep dive. - - ZERO scenarios: no named scenario, no "Scenario: polarity mismatch" — just the lived experience. - - ZERO repetition: connector cleaning once, polarity once, cost once. Not twice. - - ZERO education mode: no "most people", no "imagine", no "consider" — only observation. - - Every concept is a statement, not an explanation: "Nothing new broke. You just lost the margin." - - Ending lands exactly on the core idea: "It fails when your assumptions don't hold up anymore." - - ~370 words. Tight. Nothing wasted. - -WHAT THE WRONG VERSION DID (learn from it — this is the canonical failure pattern): - ❌ Multi-topic: connectors + polarity + power consumption + OEM vs compatible + cost breakdown + fiber types - ❌ 3 parallel scenarios labeled and developed equally - ❌ Connector cleaning explained twice (once in "production" section, once in "cost" section) - ❌ "Most migrations start with a simple assumption..." → education mode opener - ❌ "Imagine rolling out 400G across multiple data centers..." → classroom scenario framing - ❌ "Consider a scenario where..." → textbook mode - ❌ Sales drift: "So the question isn't just about choosing between OEM and compatible optics..." - ❌ "An inspection scope costs $1,500. MMF to SMF re-cabling can cost between $50-200 per drop..." → cost section - ❌ "At 400G, power consumption also becomes a critical factor..." → power section → DRIFT - ❌ Ending answered a different question than the title set up - -THE SINGLE SENTENCE THAT DEFINES THIS SYSTEM: -"Wenn der Artikel mehr als eine Sache erklärt, ist er kaputt." -("If the article explains more than one thing, it's broken.") FLEXOPTIX BALANCE RULES (critical — this is a Flexoptix blog, not an OEM vendor blog): - Never frame compatible optics as "a gamble" or "ticking time bomb" @@ -2168,445 +1527,82 @@ POWER / LOSS BUDGET PRECISION (always apply): // (2026-04-04: LinkedIn hard limit = 3,000 chars. Optimal = 800-1500 chars.) // ═══════════════════════════════════════════════════════ -export const STEP_LINKEDIN_POST = `Write a LinkedIn post for this article using the FLEXOPTIX LINKEDIN GENERATOR v1.0. +export const STEP_LINKEDIN_POST = `Write a LinkedIn post for this article. -════════════════════════════════════════════════════════ -GOAL: Posts that feel like real experience. Not content. Not marketing. -════════════════════════════════════════════════════════ +TARGET: Use the FULL 2,800 character limit. Fill it. More content = more engagement. +HARD LIMIT: Maximum 2,800 characters. MINIMUM: 2,000 characters. Always aim for maximum length. -BANNED HOOKS (already used — do NOT repeat): -- "Everything looks fine. Until it doesn't." → BANNED -- "You bring up a new batch of 400G links..." → BANNED -- "Lab tests were clean. Traffic is stable." → BANNED -- Any variation of the Physical Layer debugging story → BANNED +THE FORMAT THAT WORKS (use this exactly): -BANNED BEATS: -- "CRC errors start creeping in" → BANNED -- "Optics get blamed. Swapped. Replaced." → BANNED -- "Same optics. Same setup. Different result." → BANNED (as punchline) -- "At 100G, you get away with it. At 400G, you don't." → BANNED (overused closer) +Line 1-2: HOOK — reframe or uncomfortable truth. NOT "I published something." NOT a question. + "400G doesn't break your network. It shows you what was already broken." -You MUST create a FRESH hook and story that matches THIS article's angle. -Read the article and extract its UNIQUE angle — then write a post about THAT, not about physical layer debugging. +[blank line] -Engineers must recognize themselves in this post. -Not be lectured. Not be sold to. Not be impressed by vocabulary. +3-4 SHORT BEATS — each beat = 1-3 lines. One insight per beat. Breathing room between each. + Short standalone sentences are fine: "Dirty connector. Wrong polarity. Zero margin left." + This is NOT a bullet list — it's a rhythm. No "•" or "-" markers. -LENGTH: 80–180 words (optimal). HARD LIMIT: 2,800 chars. -Reads fully in feed — no "see more". One screen. +[blank line] -FORMAT (fixed — no variations): +CTA — ONE LINE: "Full breakdown in the blog — link in first comment." + Do NOT include a URL. No "Check out my article". No "I'm excited to share". - [HOOK: 1-2 lines] - [blank line] - [BEHAVIOR: 2-4 short beats, 1-3 lines each, blank lines between] - [blank line] - [INSIGHT: 1-2 lines — observation, not advice] - [blank line] - CTA: "Full breakdown in the blog — link in first comment." - [blank line] +[blank line] + +HASHTAGS: 3-4 only. Last line. Always include #Flexoptix. #OpticalNetworking #DataCenter #NetworkEngineering #Flexoptix -════════════════════════════════════════════════════════ -HOOK ENGINE — pick one pattern based on the article's core idea: -════════════════════════════════════════════════════════ +GOLD EXAMPLE (346 chars — this is the target format): -HOOK TYPE 1 — CONTRADICTION: - "Everything looks fine. Until it doesn't." - Use when: article is about subtle failures or unexpected behavior. +400G doesn't break your network. -HOOK TYPE 2 — REALITY BREAK: - "400G doesn't break your network." - [next line] "It shows you what was already broken." - Use when: article reframes a common assumption. +It shows you what was already broken. -HOOK TYPE 3 — FALSE ASSUMPTION: - "Most people think the optics are the problem." - Use when: article corrects a widespread misconception. +Most teams blame the optics first. +Swap them. Replace them. Escalate. -HOOK TYPE 4 — EXPERIENCE: - "I've seen links run clean for hours…" - Use when: article is built around a specific deployment moment. +And then someone finally checks the physical layer. -HOOK TYPE 5 — MOMENT: - "Friday afternoon deployment. Everything green." - Use when: article opens with a situation the reader has been in. +Dirty connector. Wrong polarity. Zero margin left. -════════════════════════════════════════════════════════ -BEHAVIOR ENGINE — describe what actually happens: -════════════════════════════════════════════════════════ +Same optics. Same config. Different result. -Show real production behavior. Never explain. Never define. +At 100G, you get away with it. At 400G, you don't. -Good behavior beats: - "Links come up. Error counters start creeping. Not enough to drop. Just enough to slow everything down." - "Optics get blamed. Swapped. Replaced. Moved to different ports. Nothing changes." - "Config looks correct. Hardware is fine. Everything points to 'should work'." +Full breakdown in the blog — link in first comment. -Bad (explanation mode — never use): - "CRC errors occur when the optical signal is degraded beyond the receiver threshold." - "This is caused by dirty connectors reducing available optical margin." +#OpticalNetworking #DataCenter #NetworkEngineering #Flexoptix -════════════════════════════════════════════════════════ -BREAK ENGINE — introduce the confusion: -════════════════════════════════════════════════════════ +--- -This is the moment where nothing obvious is wrong but nothing works. -The reader thinks: "yes, exactly — that's what happened to us." +HARD RULES: +- No emojis (unless ONE strategic opener, never mid-text) +- No "I'm thrilled" / "Excited to share" / "Let's dive in" +- No markdown, no bold, no headers +- No explanation blocks — short beats only +- Engineer voice, not influencer voice +- If over 2,800 chars — cut until under -Good break beats: - "Until someone actually checks the physical layer properly." - "Same optics. Same setup. Different result." - "That's the part that costs time. Not hardware." - -════════════════════════════════════════════════════════ -INSIGHT ENGINE — end with observation, not advice: -════════════════════════════════════════════════════════ - -NEVER: - "You should always clean connectors." - "Best practice: verify physical layer before deployment." - "To avoid this, implement a validation checklist." - -ALWAYS: - "At 100G, you get away with it. At 400G, you don't." - "Higher speeds don't introduce new problems. They expose the ones that were already there." - "That's where the problem actually is." - -════════════════════════════════════════════════════════ -ENGAGEMENT BOOSTER v1.0 — apply after drafting: -════════════════════════════════════════════════════════ - -Engagement does NOT come from asking questions. It comes from: - ✔ Recognition — "that's exactly what happened to us" - ✔ Friction — "I see this differently" - ✔ Provocation — "I need to comment on this" - ✔ Unfinished feeling — space for the reader to complete the thought - -APPLY THESE BOOSTER TYPES (pick 2-3 that fit the topic): - -TYPE 1 — RELATABILITY: Add one moment that feels extremely real and specific. Do not explain it. - Effect: reader remembers their own experience. - Example: "Lab tests were clean. Then production hit." - -TYPE 2 — FRICTION: Introduce a subtle contradiction to common belief. Do not explain it. - NOT: "Connectors need to be clean." - BUT: "Most people don't check connectors until everything else has failed." - -TYPE 3 — BLAME SHIFT: Move the problem away from the obvious target to something less expected. - NOT: "The optics failed." - BUT: "The optics were fine. Everything around them wasn't." - Effect: triggers "exactly this" / "not always" / "depends" comments. - -TYPE 4 — UNRESOLVED EDGE: End with a strong observation, not a conclusion. Leave it open. - NOT: "Always validate your setup." - BUT: "That's usually where things start to go wrong." - -TYPE 5 — PATTERN BREAK: One short punchy sentence that breaks flow intentionally. - "Nothing fails." - [blank] - "Until it does." - -MICRO-BOOSTERS (apply anywhere): - ADD EDGE: make the tone slightly more direct, less neutral. - ADD TENSION: sharpen the contrast between expectation and reality. - ADD PAIN: emphasize wasted time — not technical failure. - REMOVE SAFETY: delete "sometimes", "often", "can", "may" — make assertions direct. - -════════════════════════════════════════════════════════ -AUTO-KILL FILTER — scan and delete: -════════════════════════════════════════════════════════ -→ "what do you think?" / "agree?" / "have you experienced this?" / "let me know" → DELETE ALL -→ "to avoid this" → DELETE -→ "best practice" → DELETE -→ "make sure to" → DELETE -→ "important to" → DELETE -→ "you should" → DELETE -→ "ensure that" → DELETE -→ "I just published" / "Excited to share" / "Check out my article" → DELETE -→ Bullet markers "•" or "-" inside the post → REMOVE (use short standalone lines instead) -→ Any emoji except ONE optional opener → REMOVE -→ More than 4 hashtags → REMOVE extras -→ Corporate tone / hedging language → REMOVE -→ Anything that explains the insight instead of showing it → REMOVE - -════════════════════════════════════════════════════════ -ENGAGEMENT FINAL CHECK — before returning: -════════════════════════════════════════════════════════ -Ask: Does this post — - ✔ trigger recognition in a working engineer? - ✔ create slight friction or disagreement? - ✔ avoid explaining the insight? - ✔ feel slightly unfinished — space for the reader to add their thought? -If any NO → rewrite that section. - -Rule: "If the post explains everything, nobody comments." - -════════════════════════════════════════════════════════ -ANTI-AI FILTER — make it human: -════════════════════════════════════════════════════════ -→ Perfectly parallel sentence structures → vary the rhythm -→ Every beat same length → break one up -→ Smooth transitions everywhere → add one abrupt line -→ Symmetric structure → make it slightly uneven - -════════════════════════════════════════════════════════ -ANGLE-AWARE HOOK SELECTION — CRITICAL -════════════════════════════════════════════════════════ - -The LinkedIn post MUST match the article's angle. Do NOT default to the Physical Layer story. - -TITLE CONTRACT FOR THIS ARTICLE: -{{TITLE_CONTRACT}} - -Match the hook and behavior beats to the article's actual angle: - -IF ECONOMIC/TCO article: - HOOK: "A $350 optic just turned into an $18,000 problem." or "Your 400G budget looks fine. Until you add the numbers nobody showed you." - BEATS: cost surprises, TCO reality, budget impact - CLOSE: cost-based insight, not "clean your connectors" - -IF DECISION/COMPARISON article: - HOOK: "Three quotes on your desk. The cheapest one isn't always the most expensive." or "OEM vs compatible. Everyone has an opinion. Here's what the data says." - BEATS: decision criteria, what specs don't tell you - CLOSE: decision framework, not failure story - -IF MARKET/TIMING article: - HOOK: "800G is shipping. Sort of." or "Half the industry says buy now. The other half says wait." - BEATS: what's real vs hype, timing signals - CLOSE: timing advice, not generic - -IF OPERATIONAL/PROCESS article: - HOOK: "We deployed 200 links in 3 weeks. Zero issues. Here's the boring reason why." - BEATS: process steps, what was done differently - CLOSE: process insight, not failure - -IF MIGRATION article: - HOOK: "Month 1: everything on schedule. Month 6: nothing was." - BEATS: planning vs reality, specific surprises - CLOSE: what to plan for, based on real experience - -DO NOT USE THE PHYSICAL LAYER HOOK ("Everything looks fine. Until it doesn't.") — it has been used already. Create a FRESH hook that matches THIS article's specific angle. - -════════════════════════════════════════════════════════ - -Return ONLY the post text. No commentary. No "Here is the post:". Start directly with the hook. +Return ONLY the post text. No commentary. No "Here is the post:". Article: {{ARTICLE}}`; // ═══════════════════════════════════════════════════════ -// STEP AFE: AUTO-FOCUS ENFORCER v1.0 -// (2026-04-04: Added — kills multi-topic drift BEFORE reduction. -// "If more than one thing is important, nothing is important.") -// ═══════════════════════════════════════════════════════ - -export const STEP_AFE = `You are running the FLEXOPTIX AUTO-FOCUS ENFORCER (AFE v1.0) on this article. - -RULE: Every Flexoptix article has exactly ONE core idea. -If this article has more than one, reduce it to the strongest one. - -════════════════════════════════════════════════════════ -STEP 1 — IDENTIFY THE CORE IDEA -════════════════════════════════════════════════════════ - -Read the article. Identify: -- The ONE thing this article is really about (one sentence) -- All secondary ideas that could be removed without destroying the article - -Core idea examples: -- "400G exposes physical layer weaknesses hidden at lower speeds" -- "Compatible optics don't fail — unvalidated deployments do" -- "Price drops change the ROI calculation for compatible optics" - -Secondary idea candidates (remove if not core): power consumption, cost breakdowns, vendor comparisons, fiber type deep dives, spec tables, additional scenarios unrelated to the core. - -════════════════════════════════════════════════════════ -STEP 2 — HARD REMOVAL RULES -════════════════════════════════════════════════════════ - -Remove immediately if NOT the core idea: -- Power consumption sections (unless the article IS about power) -- Cost breakdown sections (unless the article IS about cost) -- Vendor comparisons (unless the article IS a comparison) -- Fiber type deep dives (unless fiber IS the core) -- Any section starting with "Another thing to consider is..." -- Any section that could be lifted unchanged into a different article - -════════════════════════════════════════════════════════ -STEP 3 — SCENARIO LIMITER -════════════════════════════════════════════════════════ - -Count distinct scenarios: -- 1 scenario: keep as-is -- 2 scenarios: keep only the strongest, remove the other -- 3+ scenarios: framework, not article. Keep 1. Remove the rest. - -════════════════════════════════════════════════════════ -STEP 4 — REPETITION KILL -════════════════════════════════════════════════════════ - -Each concept appears ONCE. If connector cleaning appears twice: -- Keep the sharper instance. Delete the other — completely, no replacement. - -Warning signs: "Another issue is...", "Also worth noting...", "Additionally...", "There is also..." -Each of these = likely repetition or drift. Evaluate and cut aggressively. - -════════════════════════════════════════════════════════ -STEP 5 — EDUCATION MODE KILL -════════════════════════════════════════════════════════ - -These openers signal classroom/textbook mode — kill them: -- "Most people..." → replace with "I've seen..." or direct observation -- "Imagine a scenario..." → replace with "In one deployment..." -- "Consider the following..." → delete, start with the thing itself -- "It is important to understand that..." → delete the preamble, keep the point -- "Many engineers make the mistake of..." → "This is the standard failure mode." - -════════════════════════════════════════════════════════ -STEP 6 — WORD BAN LIST (hard replace/delete) -════════════════════════════════════════════════════════ - -These words are BANNED. Replace or delete every single occurrence: - - "validation" / "validate" → use specific action: "inspect", "check connectors", "run BERT", "measure with OPM" — never the abstract noun - "real-world conditions" → "in production" or delete - "under certain conditions" → state the condition or delete - "due to" → rewrite actively: "X causes Y" not "Y due to X" - "in reality" → delete, start with the fact - "in practice" → delete, start with the fact - "this means" / "this means that" → assert directly, remove the preamble - "significantly" → use a number or delete - "typically" / "generally" → assert directly or delete - "actually" / "basically" → delete always, no exceptions - "can cause" / "can lead to" → "causes" / "leads to" — remove the hedge - -════════════════════════════════════════════════════════ -STEP 7 — TITLE ALIGNMENT -════════════════════════════════════════════════════════ - -Does the article title match what the article is actually about? - -- "Investment Guide" but content is a failure story → rewrite title to match the failure story angle -- "Migration Guide" but no migration steps → change title to "Why Migration Fails" or similar -- Rule: The title must be a one-line summary of the actual article. If it isn't — fix the title. -- Always prefer fixing the title over changing the content. -- Strong title patterns: "[Speed]: The Upgrade That Exposes Everything" | "Why [X] Fails" | "The Real Cost of [X]" - -════════════════════════════════════════════════════════ -OUTPUT RULE -════════════════════════════════════════════════════════ - -Return ONLY the focused article (with corrected title if changed). No commentary. No focus score. No explanation of changes. -Target: cut 30–50% if multi-topic drift was present. Do NOT replace what you cut. -The article gets shorter and stronger — not shorter and incomplete. - -Article: -{{ARTICLE}}`; - -// ═══════════════════════════════════════════════════════ -// STEP AEM: AUTO-EDITOR MODE v1.0 (Senior Engineer Simulation) -// (2026-04-04: Added — voice polish after reduction. -// "If it has to explain itself, it's not good enough yet.") -// ═══════════════════════════════════════════════════════ - -export const STEP_AEM = `You are running the FLEXOPTIX AUTO-EDITOR MODE (AEM v1.0) on this article. - -GOAL: Make this read like a senior engineer who has seen this problem 50 times. -Not explaining. Not convincing. Describing what happens — because they know. - -════════════════════════════════════════════════════════ -PASS 1 — SENTENCE LENGTH -════════════════════════════════════════════════════════ - -Maximum ~16 words per sentence. Anything longer → split or cut the weaker half. -After a long build-up, add one very short sentence: "It isn't.", "That's it.", "Every time." -Vary length deliberately: long, short, long, very short. - -════════════════════════════════════════════════════════ -PASS 2 — AUTHORITY WITHOUT EXPLANATION -════════════════════════════════════════════════════════ - -Remove all justification: -- "...because X" after a clear statement → delete the because-clause. Statement stands alone. -- "this means that..." → delete. Reader infers. -- "this happens due to..." → delete. State the effect, not the cause chain. -- "this is why..." → delete. Move on. - -Before: "Connectors need to be clean because dirt reduces optical margin." -After: "Connectors need to be clean. Dirt kills margin." - -════════════════════════════════════════════════════════ -PASS 3 — CONFIDENCE INJECTION -════════════════════════════════════════════════════════ - -Replace hedges with direct assertions: -- "often" → remove or assert directly -- "can" → "does" or "will" -- "might" → "will" (if true) or delete -- "typically" → state the fact directly -- "in some cases" → name the case or cut -- "may lead to" → "leads to" -- "tends to" → just say what it does - -Before: "This can often lead to issues in production." -After: "This causes problems in production." - -════════════════════════════════════════════════════════ -PASS 4 — EXPERIENCE SIGNALS (max 2) -════════════════════════════════════════════════════════ - -If the article has fewer than 2 experience markers, add one naturally: -- "I've seen this before." -- "This usually shows up at the worst time." -- "That's where things start to drift." - -If the article already has 2+, skip this pass. - -════════════════════════════════════════════════════════ -PASS 5 — STRUCTURE BREAK -════════════════════════════════════════════════════════ - -Break any pattern that's too symmetric: -- Three paragraphs of equal length → shorten one -- Every paragraph opening with "The" → vary one opener -- Every beat following identical rhythm → disrupt one - -Add ONE rough edge: a fragment, an abrupt cut, a sudden 3-word paragraph. -Example: "That's the cost. Not hardware." - -════════════════════════════════════════════════════════ -PASS 6 — FINAL VOICE CHECK -════════════════════════════════════════════════════════ - -Read the article. Ask: does this sound like — - A) A system generating content - B) Someone who was actually there - -If A → find the sentences that give it away. They justify, explain the obvious, over-qualify. -Rewrite those sentences. Do not add new content — sharpen what's there. - -════════════════════════════════════════════════════════ -OUTPUT RULE -════════════════════════════════════════════════════════ - -Return ONLY the polished article. No commentary. No analysis. -Do NOT add new content, new sections, or new facts. -Only improve voice, sentence rhythm, and directness. - -Article: -{{ARTICLE}}`; - -// ═══════════════════════════════════════════════════════ -// STEP 8b: REDUCTION PASS — Remove 15-25% of content -// (2026-04-04: Added based on field feedback — articles were too long, -// repeated concepts, and "assembled" rather than written) +// STEP 8b: REDUCTION PASS — Remove 40% of content +// (2026-04-04: v5 update — increased from 15-25% to 40% based on +// Gold-standard feedback: "Your content gets better the more you delete") // ═══════════════════════════════════════════════════════ export const STEP8b_REDUCTION = `You are running the FLEXOPTIX REDUCTION ENGINE on this article. -Target length: 1,200–1,600 words. This is the gold zone for a Flexoptix technical blog post. -DO NOT go below 1,000 words. DO NOT exceed 2,000 words (warning threshold). +CORE PRINCIPLE: Your content gets better the more you delete. + +Target: CUT 25-30% of the current word count — focus on removing WEAK content, not making it short. +Target length: 1,200–2,000 words. Flexoptix blogs should be thorough and detailed. +DO NOT go below 1,000 words. DO NOT exceed 2,500 words (warning threshold). +Keep depth and detail — only cut repetition, filler, and AI residue. This is a 5-pass refinement. Apply all passes in sequence: @@ -2663,10 +1659,10 @@ Read the final text out loud (mentally). Fix anything that sounds like it was ge ════════════════════════════════════════════════════════ LENGTH TARGETS (apply after all 5 passes): Short article: 1,000–1,200 words (opinion piece, market note) - Standard article: 1,200–1,600 words (technical analysis, guide) ← DEFAULT TARGET - Long article: 1,600–2,000 words (deep-dive, migration tutorial) ← only if content demands it - Warning zone: 2,000+ words — something wasn't cut enough, revisit Pass 1. - HARD MINIMUM: 1,000 words. If below 1,000 words — expand Pass 3 bridges, do not submit. + Standard article: 1,200–1,800 words (technical analysis, guide) ← DEFAULT TARGET + Long article: 1,800–2,500 words (deep-dive, migration tutorial) ← when content demands it + Warning zone: 2,500+ words — something wasn't cut enough, revisit Pass 1. + Too short: <1,000 words — you cut too much, add back the strongest details. ════════════════════════════════════════════════════════ DO NOT add section headers. DO NOT add new facts. DO NOT change the writing voice. @@ -2719,103 +1715,107 @@ Return only the fixed article. No commentary. Article: {{ARTICLE}}`; +// ���═══════════════════════════���══════════════════════════ +// STEP 8d: AUTO-KILL LAYER v1.0 +// (2026-04-04: New in v5 — systematic 10-category cleanup +// based on Gold-standard editorial feedback) +// ════════════��══════════════════════════════════════════ + +export const STEP8d_AUTO_KILL = `You are running the FLEXOPTIX AUTO-KILL LAYER on this article. + +This is the final cleanup pass before QA. It catches everything previous steps missed. + +CORE PRINCIPLE: If a line makes the text feel more generated, more formal, more repetitive, or more like documentation than lived experience — kill it. + +Scan the article against ALL 10 categories. Fix every violation found. + +════════════════════════════════════════════════════════ +CATEGORY A: SPEC BLOCKS +════════════════════════════════════════════════════════ +Delete: TX/RX power tables, dBm range listings, per-lane values, multi-optic comparison blocks, dense technical specs in the intro. Keep ONLY the operational meaning. + +══════════════════════════��═════════════════════════════ +CATEGORY B: FORMULA RESIDUE +════════════════════════════════════════════════════════ +Delete: optical budget calculations, attenuation formulas, margin equations, LaTeX, lane math. Replace with plain-language insight: "margins are tighter", "less room for mistakes". + +���═══════════════════════════════════════════════════════ +CATEGORY C: SECTION LEAKAGE +══���═══════════════════════��═════════════════════════════ +Delete: visible section labels ("What breaks in production", "Hidden costs", "When not to use"). The article must read as continuous prose, not assembled modules. + +════════════════════════════��═══════════════════════════ +CATEGORY D: GENERIC TRANSITIONS +════════════════════════════════��═══════════════════════ +Delete: "For example", "In today's world", "This means that", "This is where things get interesting", "on paper" (if sentence works without it), "in reality" (if sentence works without it). + +═════════════════��══════════════════════════════════════ +CATEGORY E: REPEATED CONCEPTS +════════════════════════��═══════════════════════════════ +Find every concept that appears more than once. Keep only its strongest expression. Common repeats: connector cleaning, MMF vs SMF explanation, polarity, production vs lab, hidden costs. + +════════════════════════════════════════════════════════ +CATEGORY F: SKU MENTIONS +════════════════════════════════════════════════════════ +Delete: vendor part codes (FX-400DR4-001 etc.). Replace with technology class: "400G DR4 optic". Exception: verified products from context data that are contextually necessary. + +══════════════════════════════════��═════════════════════ +CATEGORY G: FALSE AUTHORITY PHRASES +════════════════════════════════��═══════════════════════ +Delete or rewrite: "This is something we see regularly", "Everyone knows", "The numbers don't lie", "The reality hits hard", "Let me tell you something", "recipe for disaster", "ticking time bomb". Replace with calm, experience-based language. + +═════════════════════════════════════════════��══════════ +CATEGORY H: OVER-EXPLAINED BASICS +════════════════════════════════════════════════════════ +Delete: definitions the audience already knows (what MMF is, what CRC stands for, what single-mode means). The readers are experienced network engineers. + +═══���════════════════════════════════════════════════════ +CATEGORY I: WHITEPAPER TONE +════════════════════════════════════════════════════════ +Delete or rewrite: "Proper cleaning protocols are crucial", "It is essential to implement", "A structured pre-deployment testing strategy", "This enables organizations to", "significant benefits", "robust framework", "best practices". + +════════════════════════════════════════════════════════ +CATEGORY J: FAKE PRECISION +════════════════════════════════════════════════════════ +Delete or soften: invented firmware versions, unverifiable exact costs, overly specific rates, math that sounds exact but adds no value. If it's not from verified context data, don't cite it as fact. + +════════════════════════════════════════════════════════ +ACCEPTANCE TEST +════════════════════════════════════════════════════════ +The article passes Auto-Kill ONLY if: +- No spec blocks remain (A) +- No formulas remain (B) +- No visible module structure (C) +- No AI transitions (D) +- No repeated ideas (E) +- No SKU names in prose (F) +- No dramatic phrasing (G) +- No basic explanations (H) +- No whitepaper language (I) +- No fake precision (J) + +The text must feel: lean, natural, experience-driven, operationally useful. + +Return only the cleaned article. No commentary. + +Article: +{{ARTICLE}}`; + // ═══════════════════════════════════════════════════════ -// STEP APM: AUTO-PRECISION MODE v1.0 (Final Cut — Last Filter Before Publish) -// (2026-04-04: Added — micro-editing layer, runs after Style Lock and QA. -// "If a word can go, it must go.") -// ═══════════════════════════════════════════════════════ +// AUTO-KILL SCORING (runs after STEP 10) +// ════════════��══════════════════════════════════════════ -export const STEP_APM = `You are running the FLEXOPTIX AUTO-PRECISION MODE (APM v1.0) on this article. +export const AUTO_KILL_SCORING = `Score this article from 1-10 on each Auto-Kill dimension: -This is the FINAL FILTER before publication. Nothing after this step. -Goal: maximum impact per word. No dead weight. No filler. No over-explanation. +1. CLEANLINESS — No spec residue, no formula residue, no AI phrases remaining +2. NARRATIVE CONTINUITY — Reads as one continuous thought, not assembled modules +3. NON-AI FEEL — Would a reader think a person wrote this, not an LLM? +4. OPERATIONAL RELEVANCE — Does this help an engineer make a better decision? -════════════════════════════════════════════════════════ -PASS 1 — SENTENCE EVALUATION -════════════════════════════════════════════════════════ +For each score below 8, list what should still be removed or rewritten. -For every sentence, ask: - 1. Does it add new information? - 2. Does it move the narrative forward? - 3. Would the article be weaker without it? - -If no to all three → delete the sentence. No replacement. - -════════════════════════════════════════════════════════ -PASS 2 — WORD REDUCTION -════════════════════════════════════════════════════════ - -Rewrite every sentence using fewer words. Target: 20–50% reduction per sentence. -Never lose meaning. Only lose weight. - -Examples: - BEFORE: "This is where things start to become problematic in production environments." - AFTER: "This is where things break." - - BEFORE: "At higher speeds like 400G and 800G, the margin for error becomes significantly smaller." - AFTER: "At 400G, the margin gets tight." - - BEFORE: "This is where the real cost sits, not in the optics themselves but in the time spent debugging." - AFTER: "The cost isn't the optics. It's the time." - -════════════════════════════════════════════════════════ -PASS 3 — FILLER WORD KILL -════════════════════════════════════════════════════════ - -Delete immediately (no replacement): - actually · basically · essentially · significantly · typically · generally · simply - in reality · in practice · in fact · it is worth noting · it is important to note - as mentioned · as noted · needless to say · at the end of the day - -════════════════════════════════════════════════════════ -PASS 4 — DUPLICATE IDEA DETECTOR -════════════════════════════════════════════════════════ - -Scan for any idea that appears more than once. Keep the sharpest version. Delete the rest. -One idea = one appearance. No "echo" paragraphs that restate what was just said. - -════════════════════════════════════════════════════════ -PASS 5 — VERB STRENGTHENING -════════════════════════════════════════════════════════ - -Weak → Strong: - "can cause" → "causes" - "can lead to" → "leads to" - "might create" → "creates" - "may result in" → "results in" - "tends to" → assert directly - -════════════════════════════════════════════════════════ -PASS 6 — HARD CUT (20%) -════════════════════════════════════════════════════════ - -Remove the weakest 20% of sentences. No replacement. The article must survive the cut. -If a section collapses without its filler — that section was filler. - -════════════════════════════════════════════════════════ -PASS 7 — MICRO-CUT -════════════════════════════════════════════════════════ - -Remove 1–2 words from every sentence without breaking meaning. -Repeat until no further reduction is possible without losing information. - -════════════════════════════════════════════════════════ -PASS 8 — RHYTHM CHECK -════════════════════════════════════════════════════════ - -After all cuts: vary sentence length deliberately. -Pattern: long → short → very short → medium → short. -Avoid 3+ sentences of the same length in a row. -A two-word sentence after a long paragraph creates impact. Use it. - -════════════════════════════════════════════════════════ -OUTPUT RULE -════════════════════════════════════════════════════════ - -Return ONLY the final article. No commentary. No explanation of what was changed. -The article will be shorter. That is correct. Shorter is better when every word earns its place. - -THE SINGLE RULE: If a word can go, it must go. +Return ONLY as JSON: +{"cleanliness": N, "narrative": N, "non_ai": N, "relevance": N, "issues": ["..."]} Article: {{ARTICLE}}`; @@ -2827,89 +1827,3 @@ Article: export function withCalibration(systemPrompt: string): string { return systemPrompt + CALIBRATION_GOLD_STANDARD; } - -/** - * SLL v1.0 — Build Self-Learning Loop context from DB. - * Queries current learned patterns and weekly state, returns a compact - * context block that gets prepended to the system prompt. - * - * Returns empty string if no data available (safe fallback). - */ -export async function buildSLLContext(): Promise { - try { - const { pool } = await import("../db/client"); - - const [stateRes, patternRes, statsRes] = await Promise.all([ - pool.query( - `SELECT * FROM blog_sll_state ORDER BY week_start DESC LIMIT 1` - ), - pool.query( - `SELECT pattern_type, pattern_value, performance_class, avg_engagement, sample_count - FROM blog_learned_patterns WHERE active = TRUE - ORDER BY performance_class, avg_engagement DESC NULLS LAST LIMIT 20` - ), - pool.query( - `SELECT COUNT(*) as total, AVG(engagement_score) as avg_score - FROM blog_performance WHERE engagement_score IS NOT NULL` - ), - ]); - - const total = Number(statsRes.rows[0]?.total || 0); - if (total < 3) return ""; // Not enough data yet - - const state = stateRes.rows[0]; - const winners = patternRes.rows.filter((p: any) => p.performance_class === "winner"); - const losers = patternRes.rows.filter((p: any) => p.performance_class === "loser"); - - const lines: string[] = [ - "", - "════════════════════════════════════════════════════════", - `SELF-LEARNING LOOP (SLL v1.0) — ${total} posts analyzed`, - "Real performance data from LinkedIn. Saves + Shares matter. Likes don't.", - "════════════════════════════════════════════════════════", - "", - ]; - - if (winners.length > 0) { - lines.push("✔ WHAT WORKS (high saves + shares):"); - for (const p of winners.slice(0, 6)) { - lines.push(` [${p.pattern_type}] ${p.pattern_value}`); - } - lines.push(""); - } - - if (losers.length > 0) { - lines.push("✗ WHAT FAILS (low engagement, gets ignored):"); - for (const p of losers.slice(0, 6)) { - lines.push(` [${p.pattern_type}] ${p.pattern_value}`); - } - lines.push(""); - } - - if (state) { - if (state.optimal_length_min && state.optimal_length_max) { - lines.push(`OPTIMAL ARTICLE LENGTH: ${state.optimal_length_min}–${state.optimal_length_max} words`); - } - if (state.top_topics && (state.top_topics as string[]).length > 0) { - lines.push(`TOP TOPICS: ${(state.top_topics as string[]).join(", ")}`); - } - if (state.best_hook_patterns && (state.best_hook_patterns as string[]).length > 0) { - lines.push("BEST HOOK PATTERNS:"); - for (const h of (state.best_hook_patterns as string[]).slice(0, 3)) { - lines.push(` "${h}"`); - } - } - lines.push(""); - } - - lines.push( - "APPLY THESE LEARNINGS: Use winner patterns. Avoid loser patterns.", - "This is not theory — it is what actually performed in production.", - "════════════════════════════════════════════════════════", - ); - - return lines.join("\n"); - } catch { - return ""; // Always safe-fail — never break the pipeline - } -} diff --git a/packages/api/src/routes/blog.ts b/packages/api/src/routes/blog.ts index d91a70d..e764cc2 100644 --- a/packages/api/src/routes/blog.ts +++ b/packages/api/src/routes/blog.ts @@ -16,22 +16,21 @@ import { pool } from "../db/client"; const pipelineProgress = new Map(); function setProgress(draftId: string, step: number, label: string): void { - const pct = Math.round((step / 18) * 92) + 2; // 2%..94% during run, 100% on complete - pipelineProgress.set(draftId, { step, total: 17, label, pct }); + const pct = Math.round((step / 16) * 92) + 2; // 2%..94% during run, 100% on complete + pipelineProgress.set(draftId, { step, total: 16, label, pct }); } function clearProgress(draftId: string): void { pipelineProgress.delete(draftId); } import { semanticSearch } from "../embeddings/client"; -import { generate, checkHealth, resetOllamaQueue, resetClaudeQueue, getQueueDepth } from "../llm/client"; +import { generate, checkHealth, resetOllamaQueue, getQueueDepth } from "../llm/client"; import { SYSTEM_PROMPT, DEPTH_PROMPT, ANTI_GENERIC_INTRO_PROMPT, QUALITY_CONTROL_PROMPT, PROCUREMENT_LAYER_PROMPT, - VIRAL_SIGNAL_PROMPT, buildTopicPrompt, } from "../llm/blog-prompts"; @@ -334,12 +333,11 @@ function validateArticle(content: string): string[] { } // Check minimum depth const wordCount = content.split(/\s+/).length; - if (wordCount < 1200) { - issues.push(`Too short: ${wordCount} words (minimum 1200)`); + if (wordCount < 800) { + issues.push(`Too short: ${wordCount} words (minimum 800 for template, 1200 for LLM)`); } - // Check for power budget only in articles primarily about troubleshooting (title contains it) - const titleLine = content.split("\n")[0]?.toLowerCase() || ""; - if (titleLine.includes("troubleshoot") && !content.toLowerCase().includes("power budget")) { + // Check for power budget section in troubleshooting articles + if (content.toLowerCase().includes("troubleshoot") && !content.toLowerCase().includes("power budget")) { issues.push("Missing power budget section"); } @@ -1003,28 +1001,22 @@ async function runLlmPipeline( STEP5_REALITY_INJECTION, STEP6_TECHNICAL_DEEPENING, STEP7_OPINION_LAYER, - STEP_AFE, STEP8_KILL_AI_TONE, STEP8b_REDUCTION, - STEP_AEM, STEP8c_STYLE_LOCK, + STEP8d_AUTO_KILL, + AUTO_KILL_SCORING, STEP9_QA_CHECK, STEP10_QUALITY_SCORE, - STEP_APM, STEP_LINKEDIN_POST, BLOG_TYPES, buildFeedbackContext, - buildSLLContext, withCalibration, - STEP0_TITLE_CONTRACT, - STEP_TECHNICAL_SANITY, - STEP_SELF_HEAL, - STEP_TITLE_CONTRACT_CHECK, } = await import("../llm/fo-blog-pipeline"); const LLM_OPTS = { temperature: 0.7, maxTokens: 8192, timeoutMs: 480000 }; const LLM_REFINE = { temperature: 0.4, maxTokens: 6144, timeoutMs: 480000 }; - const TOTAL_STEPS = 21; // 17-step pipeline + title contract + technical sanity + self-heal + contract check + const TOTAL_STEPS = 16; // 10 original + 4b Narrative Control + 8b Reduction + 8c Style Lock + 8d Auto-Kill + Auto-Kill Score + LinkedIn let stepsCompleted = 0; try { @@ -1044,14 +1036,7 @@ async function runLlmPipeline( }))); } catch { /* no feedback yet, that's fine */ } - // Load SLL learned patterns (safe-fails if no data yet) - let sllContext = ""; - try { - sllContext = await buildSLLContext(); - if (sllContext) console.log(" SLL: Learned patterns injected into system prompt"); - } catch { /* no SLL data yet, fine */ } - - const systemPrompt = withCalibration(FO_BLOG_SYSTEM_PROMPT + feedbackContext + sllContext); + const systemPrompt = withCalibration(FO_BLOG_SYSTEM_PROMPT + feedbackContext); // Warmup await generate("Test", "OK", { temperature: 0.1, maxTokens: 8, timeoutMs: 60000 }).catch(() => {}); @@ -1102,45 +1087,11 @@ async function runLlmPipeline( // Get blog type config const blogType = BLOG_TYPES[selectedTopic as keyof typeof BLOG_TYPES] || BLOG_TYPES.tutorial; - // Load existing articles to prevent angle repetition - let existingAnglesContext = ""; - let forbiddenAnglesContext = ""; - try { - const existingResult = await pool.query( - `SELECT title, draft_content FROM blog_drafts - WHERE status IN ('published', 'review', 'ready') AND draft_content IS NOT NULL - ORDER BY created_at DESC LIMIT 10` - ); - if (existingResult.rows.length > 0) { - const summaries = existingResult.rows.map((r: { title: string; draft_content: string }) => { - // Extract first 150 chars of content as summary - const preview = (r.draft_content || "").replace(/^#[^\n]*\n/, "").trim().slice(0, 150); - return `- "${r.title}": ${preview}...`; - }).join("\n"); - - existingAnglesContext = `\n\nALREADY PUBLISHED ARTICLES (do NOT repeat their angles or structure):\n${summaries}\n\nFor this new article, choose a COMPLETELY DIFFERENT perspective and angle than any of the above.`; - - forbiddenAnglesContext = `ALREADY WRITTEN ANGLES (forbidden — do not repeat these):\n${existingResult.rows.map((r: { title: string }) => `- "${r.title}"`).join("\n")}\n\nThe new article MUST have a structurally different angle — different story type, different reader takeaway, different perspective lens.\n`; - } - } catch { /* fine if no articles yet */ } - - // ═══ STEP 0: Title Contract — bind LLM to headline promise ═══ - console.log(" Step 0: Title Contract (binding headline to content)..."); - setProgress(draftId, 1, "Step 0: Title Contract"); - const step0 = await generate(systemPrompt, - STEP0_TITLE_CONTRACT.replace("{{TITLE}}", title), - { ...LLM_REFINE, maxTokens: 2048 } - ); - const titleContract = step0.text; - console.log(` Title Contract: ${titleContract.split("\n").slice(0, 3).join(" | ").slice(0, 120)}...`); - // ═══ STEP 1: Topic Expansion ═══ - console.log(" Step 1: Topic Expansion..."); - setProgress(draftId, 2, "Step 1: Topic Expansion"); + console.log(" Step 1/10: Topic Expansion..."); + setProgress(draftId, 1, "Step 1/10: Topic Expansion"); const step1 = await generate(systemPrompt, - STEP1_TOPIC_EXPANSION - .replace("{{TOPIC}}", title) - .replace("{{EXISTING_ANGLES}}", existingAnglesContext + "\n\nTITLE CONTRACT (the article MUST fulfill this):\n" + titleContract), + STEP1_TOPIC_EXPANSION.replace("{{TOPIC}}", title), LLM_OPTS ); stepsCompleted = 1; @@ -1149,9 +1100,7 @@ async function runLlmPipeline( console.log(" Step 2/10: Angle Selection..."); setProgress(draftId, 2, "Step 2/10: Angle Selection"); const step2 = await generate(systemPrompt, - STEP2_ANGLE_SELECTION - .replace("{{FORBIDDEN_ANGLES}}", forbiddenAnglesContext + "\nTITLE CONTRACT:\n" + titleContract) - .replace("{{SCENARIOS}}", step1.text), + STEP2_ANGLE_SELECTION.replace("{{SCENARIOS}}", step1.text), LLM_REFINE ); stepsCompleted = 2; @@ -1174,7 +1123,6 @@ async function runLlmPipeline( const step4 = await generate(systemPrompt, STEP4_MASTER_DRAFT .replace("{{OUTLINE}}", step3.text) - .replace("{{TITLE_CONTRACT_INJECT}}", "TITLE CONTRACT FOR THIS ARTICLE (BINDING — every paragraph must serve this promise):\n" + titleContract) .replace("{{CONTEXT_DATA}}", contextData), { ...LLM_OPTS, maxTokens: 8192 } ); @@ -1201,88 +1149,77 @@ async function runLlmPipeline( stepsCompleted = 6; // ═══ STEP 6: Technical Deepening ═══ - console.log(" Step 7/16: Technical Deepening..."); - setProgress(draftId, 7, "Step 7/16: Technical Deepening"); + console.log(" Step 7/13: Technical Deepening..."); + setProgress(draftId, 7, "Step 7/13: Technical Deepening"); const step6 = await generate(systemPrompt, STEP6_TECHNICAL_DEEPENING.replace("{{ARTICLE}}", step5.text), LLM_REFINE ); - stepsCompleted = 7; + stepsCompleted = 6; // ═══ STEP 7: Opinion Layer ═══ - console.log(" Step 8/16: Opinion Layer..."); - setProgress(draftId, 8, "Step 8/16: Opinion Layer"); + console.log(" Step 8/13: Opinion Layer..."); + setProgress(draftId, 8, "Step 8/13: Opinion Layer"); const step7 = await generate(systemPrompt, STEP7_OPINION_LAYER.replace("{{ARTICLE}}", step6.text), LLM_REFINE ); stepsCompleted = 8; - // ═══ STEP AFE: Auto-Focus Enforcer (ONE idea, ONE scenario, kill drift) ═══ - console.log(" Step 9/16: Auto-Focus Enforcer (kill multi-topic drift)..."); - setProgress(draftId, 9, "Step 9/16: Auto-Focus Enforcer"); - const stepAFE = await generate(systemPrompt, - STEP_AFE.replace("{{ARTICLE}}", step7.text), + // ═══ STEP 8: Kill AI Tone ═══ + console.log(" Step 9/13: Kill AI Tone..."); + setProgress(draftId, 9, "Step 9/13: Kill AI Tone"); + const step8 = await generate(systemPrompt, + STEP8_KILL_AI_TONE.replace("{{ARTICLE}}", step7.text), LLM_REFINE ); stepsCompleted = 9; - const wordsAFE = stepAFE.text.split(/\s+/).length; - const wordsBeforeAFE = step7.text.split(/\s+/).length; - const pctAFE = Math.round((1 - wordsAFE / wordsBeforeAFE) * 100); - if (pctAFE > 5) console.log(` AFE cut: ${wordsBeforeAFE} → ${wordsAFE} words (−${pctAFE}%) — drift removed`); - // ═══ STEP 8: Kill AI Tone ═══ - console.log(" Step 10/16: Kill AI Tone..."); - setProgress(draftId, 10, "Step 10/16: Kill AI Tone"); - const step8 = await generate(systemPrompt, - STEP8_KILL_AI_TONE.replace("{{ARTICLE}}", stepAFE.text), - LLM_REFINE - ); - stepsCompleted = 10; - - // ═══ STEP 8b: Reduction Engine (5-pass: Repetition Kill → Tech Prune → Flow Rebuild → Weight Correction → Humanization) ═══ - console.log(" Step 11/16: Reduction Engine (5-pass, target 700-1000 words)..."); - setProgress(draftId, 11, "Step 11/16: Reduction Engine"); + // ═══ STEP 8b: Reduction Engine (5-pass, target: cut 40%) ═══ + console.log(" Step 10/16: Reduction Engine (5-pass, cut 40%, target 600-1000 words)..."); + setProgress(draftId, 10, "Step 10/16: Reduction Engine (cut 40%)"); const step8b = await generate(systemPrompt, STEP8b_REDUCTION.replace("{{ARTICLE}}", step8.text), LLM_REFINE ); - stepsCompleted = 11; + stepsCompleted = 10; const wordsAfter = step8b.text.split(/\s+/).length; const wordsBefore = step8.text.split(/\s+/).length; const pctChange = Math.round((1 - wordsAfter / wordsBefore) * 100); - console.log(` After reduction: ${wordsAfter} words (was ${wordsBefore}, −${pctChange}%) ${wordsAfter > 2000 ? "⚠ WARNING: >2000 words" : wordsAfter < 1000 ? "⚠ WARNING: <1000 words" : "✓ in target range"}`); + console.log(` After reduction: ${wordsAfter} words (was ${wordsBefore}, −${pctChange}%) ${wordsAfter > 1200 ? "⚠ WARNING: >1200 words" : wordsAfter < 500 ? "⚠ WARNING: <500 words" : "✓ in target range"}`); - // ═══ STEP AEM: Auto-Editor Mode (Senior Engineer voice polish) ═══ - console.log(" Step 12/16: Auto-Editor Mode (senior engineer voice polish)..."); - setProgress(draftId, 12, "Step 12/16: Auto-Editor Mode"); - const stepAEM = await generate(systemPrompt, - STEP_AEM.replace("{{ARTICLE}}", step8b.text), + // ═══ STEP 8c: Style Lock ═══ + console.log(" Step 11/16: Style Lock (tone consistency + scope/SKU fixes)..."); + setProgress(draftId, 11, "Step 11/16: Style Lock"); + const step8c = await generate(systemPrompt, + STEP8c_STYLE_LOCK.replace("{{ARTICLE}}", step8b.text), + LLM_REFINE + ); + stepsCompleted = 11; + + // ═══ STEP 8d: Auto-Kill Layer v1.0 (10 categories A-J) ═══ + console.log(" Step 12/16: Auto-Kill Layer (10 categories A-J)..."); + setProgress(draftId, 12, "Step 12/16: Auto-Kill Layer"); + const step8d = await generate(systemPrompt, + STEP8d_AUTO_KILL.replace("{{ARTICLE}}", step8c.text), LLM_REFINE ); stepsCompleted = 12; + const wordsAfterKill = step8d.text.split(/\s+/).length; + console.log(` After Auto-Kill: ${wordsAfterKill} words (was ${step8c.text.split(/\s+/).length})`); - // ═══ STEP 8c: Style Lock ═══ - console.log(" Step 13/16: Style Lock (tone consistency + scope/SKU fixes)..."); - setProgress(draftId, 13, "Step 13/16: Style Lock"); - const step8c = await generate(systemPrompt, - STEP8c_STYLE_LOCK.replace("{{ARTICLE}}", stepAEM.text), + // ═══ STEP 9: QA Check ═══ + console.log(" Step 13/16: QA Check..."); + setProgress(draftId, 13, "Step 13/16: QA Check"); + const step9 = await generate(systemPrompt, + STEP9_QA_CHECK.replace("{{ARTICLE}}", step8d.text), LLM_REFINE ); stepsCompleted = 13; - // ═══ STEP 9: QA Check ═══ - console.log(" Step 14/16: QA Check..."); - setProgress(draftId, 14, "Step 14/16: QA Check"); - const step9 = await generate(systemPrompt, - STEP9_QA_CHECK.replace("{{ARTICLE}}", step8c.text), - LLM_REFINE - ); - stepsCompleted = 14; - // ═══ STEP 10: Quality Score ═══ - console.log(" Step 15/16: Quality Score..."); - setProgress(draftId, 15, "Step 15/16: Quality Score"); + console.log(" Step 14/16: Quality Score..."); + setProgress(draftId, 14, "Step 14/16: Quality Score"); let autoQaScore: Record | null = null; try { const step10 = await generate(systemPrompt, @@ -1298,152 +1235,55 @@ async function runLlmPipeline( } catch { console.log(" Quality scoring skipped (parse error)"); } + stepsCompleted = 14; + + // ═══ Auto-Kill Scoring (non-destructive) ═══ + console.log(" Step 15/16: Auto-Kill Scoring..."); + setProgress(draftId, 15, "Step 15/16: Auto-Kill Scoring"); + let autoKillScores: Record | null = null; + try { + const killScoreResult = await generate(systemPrompt, + AUTO_KILL_SCORING.replace("{{ARTICLE}}", step9.text), + { temperature: 0.2, maxTokens: 512, timeoutMs: 60000 } + ); + const killJson = killScoreResult.text.match(/\{[\s\S]*\}/); + if (killJson) { + autoKillScores = JSON.parse(killJson[0]); + console.log(` Auto-Kill Scores: ${JSON.stringify(autoKillScores)}`); + } + } catch { + console.log(" Auto-Kill scoring skipped"); + } stepsCompleted = 15; - // ═══ STEP APM: Auto-Precision Mode (Final Cut — last filter before publish) ═══ - console.log(" Step 16/18: Auto-Precision Mode (final cut — if a word can go, it must go)..."); - setProgress(draftId, 16, "Step 16/18: Auto-Precision Mode"); - const stepAPM = await generate(systemPrompt, - STEP_APM.replace("{{ARTICLE}}", step9.text), - LLM_REFINE - ); - stepsCompleted = 16; - const wordsAPM = stepAPM.text.split(/\s+/).length; - const wordsBeforeAPM = step9.text.split(/\s+/).length; - const pctAPM = Math.round((1 - wordsAPM / wordsBeforeAPM) * 100); - console.log(` APM: ${wordsBeforeAPM} → ${wordsAPM} words (−${pctAPM}%) — precision cut done`); - - // ═══ STEP 17: Viral Signal — FLEXOPTIX Social Masterfile transformation ═══ - // Applies AVC (Auto-Viral-Check), ASS (Auto-Signal-Score), carry line enforcement, - // auto-kill phrase filter, and generates LinkedIn post in one pass. - console.log(" Step 17/18: Viral Signal (Social Masterfile transformation)..."); - setProgress(draftId, 17, "Step 17/18: Viral Signal"); - let viralArticle = stepAPM.text; - let viralLinkedinPost: string | null = null; + // ═══ LinkedIn Post ═══ + console.log(" Step 16/16: LinkedIn Post (max 2,800 chars)..."); + setProgress(draftId, 16, "Step 16/16: LinkedIn Post"); + let linkedinPost: string | null = null; + let linkedinCharCount: number | null = null; try { - const stepViral = await generate(systemPrompt, - VIRAL_SIGNAL_PROMPT + "\n\nArticle:\n" + stepAPM.text, - { temperature: 0.5, maxTokens: 8192, timeoutMs: 480000 } + const stepLinkedIn = await generate(systemPrompt, + STEP_LINKEDIN_POST.replace("{{ARTICLE}}", step9.text), + { temperature: 0.6, maxTokens: 1024, timeoutMs: 120000 } ); - const viralOutput = stepViral.text.trim(); - // Parse output: article + ---LINKEDIN--- + linkedin post - const linkedinSep = viralOutput.indexOf("---LINKEDIN---"); - if (linkedinSep !== -1) { - viralArticle = viralOutput.slice(0, linkedinSep).trim(); - viralLinkedinPost = viralOutput.slice(linkedinSep + "---LINKEDIN---".length).trim(); - console.log(` Viral Signal: article ${viralArticle.split(/\s+/).length} words + LinkedIn ${viralLinkedinPost.length} chars`); - } else { - // No separator — treat entire output as article - viralArticle = viralOutput; - console.log(` Viral Signal: article ${viralArticle.split(/\s+/).length} words (no LinkedIn section)`); - } - // Validate viral output isn't too short (LLM may have over-cut) - if (viralArticle.split(/\s+/).length < 400) { - console.log(" ⚠ Viral Signal output too short — falling back to APM output"); - viralArticle = stepAPM.text; - } - } catch { - console.log(" Viral Signal skipped (error) — using APM output"); - } - stepsCompleted = 17; - - // ═══ STEP 18: LinkedIn Post ═══ - // Use Viral Signal LinkedIn if available, otherwise generate via STEP_LINKEDIN_POST - console.log(" Step 18/18: LinkedIn Post (max 2,800 chars)..."); - setProgress(draftId, 18, "Step 18/18: LinkedIn Post"); - let linkedinPost: string | null = viralLinkedinPost; - let linkedinCharCount: number | null = viralLinkedinPost ? viralLinkedinPost.length : null; - if (!linkedinPost) { - // Fallback: dedicated LinkedIn post generator - try { - const stepLinkedIn = await generate(systemPrompt, - STEP_LINKEDIN_POST - .replace("{{TITLE_CONTRACT}}", titleContract) - .replace("{{ARTICLE}}", viralArticle), - { temperature: 0.6, maxTokens: 1024, timeoutMs: 120000 } - ); - linkedinPost = stepLinkedIn.text.trim(); - linkedinCharCount = linkedinPost.length; - } catch { - console.log(" LinkedIn post generation skipped"); - } - } - // Enforce hard limit — truncate at last sentence before 2800 if too long - if (linkedinPost && linkedinPost.length > 2800) { - linkedinPost = linkedinPost.slice(0, 2800).replace(/[^.!?]*$/, "").trim(); + linkedinPost = stepLinkedIn.text.trim(); linkedinCharCount = linkedinPost.length; - console.log(` LinkedIn post truncated to ${linkedinCharCount} chars`); - } else if (linkedinPost) { - console.log(` LinkedIn post: ${linkedinCharCount} chars`); - } - stepsCompleted = 18; - - // ═══ STEP 19: Technical Sanity Check ═══ - console.log(" Step 19/21: Technical Sanity Check..."); - setProgress(draftId, 19, "Step 19/21: Technical Sanity Check"); - let sanityReport = ""; - try { - const stepSanity = await generate(systemPrompt, - STEP_TECHNICAL_SANITY.replace("{{ARTICLE}}", viralArticle), - { temperature: 0.2, maxTokens: 4096, timeoutMs: 240000 } - ); - sanityReport = stepSanity.text.trim(); - console.log(` Sanity check: ${sanityReport.includes('"safe_to_publish": false') ? "⚠ ISSUES FOUND" : "✓ safe"}`); - } catch { - console.log(" Technical sanity check skipped (error)"); - } - stepsCompleted = 19; - - // ═══ STEP 20: Self-Heal (fix technical errors) ═══ - if (sanityReport && (sanityReport.includes('"safe_to_publish": false') || sanityReport.includes('"critical_issues"'))) { - console.log(" Step 20/21: Self-Heal (fixing technical errors)..."); - setProgress(draftId, 20, "Step 20/21: Self-Heal (technical fixes)"); - try { - const stepHeal = await generate(systemPrompt, - STEP_SELF_HEAL - .replace("{{SANITY_REPORT}}", sanityReport) - .replace("{{ARTICLE}}", viralArticle), - LLM_REFINE - ); - const healedWords = stepHeal.text.split(/\s+/).length; - if (healedWords > 400) { - viralArticle = stepHeal.text; - console.log(` Self-healed: ${healedWords} words`); - } else { - console.log(" Self-heal output too short — keeping original"); - } - } catch { - console.log(" Self-heal skipped (error)"); - } - } else { - console.log(" Step 20/21: Self-Heal skipped (no critical issues)"); - } - stepsCompleted = 20; - - // ═══ STEP 21: Title Contract Verification ═══ - console.log(" Step 21/21: Title Contract Verification..."); - setProgress(draftId, 21, "Step 21/21: Title Contract Check"); - try { - const stepContract = await generate(systemPrompt, - STEP_TITLE_CONTRACT_CHECK - .replace("{{TITLE_CONTRACT}}", titleContract) - .replace("{{ARTICLE}}", viralArticle), - { temperature: 0.2, maxTokens: 2048, timeoutMs: 120000 } - ); - const contractResult = stepContract.text.trim(); - if (contractResult.includes('"contract_fulfilled": false') || contractResult.includes('"REJECT')) { - console.log(" ⚠ TITLE CONTRACT VIOLATION — article may not match headline"); + // Enforce hard limit — truncate at last sentence before 2800 if too long + if (linkedinCharCount > 2800) { + linkedinPost = linkedinPost.slice(0, 2800).replace(/[^.!?]*$/, "").trim(); + linkedinCharCount = linkedinPost.length; + console.log(` LinkedIn post truncated to ${linkedinCharCount} chars`); } else { - console.log(" ✓ Title contract fulfilled"); + console.log(` LinkedIn post: ${linkedinCharCount} chars`); } } catch { - console.log(" Title contract check skipped (error)"); + console.log(" LinkedIn post generation skipped"); } - stepsCompleted = 21; + stepsCompleted = 16; - // Extract article from Viral Signal output (or APM fallback) - // Fall back to step9.text if output looks too short or empty - let finalArticleText = viralArticle.trim().length > 200 ? viralArticle : step9.text; + // Extract only the article from STEP9 output (QA returns review + fixed article) + // Look for "COMPLETE FIXED ARTICLE" marker and take everything after it + let finalArticleText = step9.text; const articleMarkers = [ "### COMPLETE FIXED ARTICLE", "## COMPLETE FIXED ARTICLE", @@ -1451,16 +1291,13 @@ async function runLlmPipeline( "---\n\n**You're", "---\n\nYou're", ]; - // Also check step9 for QA markers (APM may have stripped them already) for (const marker of articleMarkers) { const idx = step9.text.indexOf(marker); if (idx !== -1) { + // Skip past the marker line itself const afterMarker = step9.text.slice(idx + marker.length).trimStart(); - const extractedFromQA = afterMarker.replace(/^---\s*\n/, "").trimStart(); - // Only use QA extraction if it's meaningfully longer than APM output - if (extractedFromQA.split(/\s+/).length > finalArticleText.split(/\s+/).length * 0.8) { - finalArticleText = extractedFromQA; - } + // Strip leading --- separator if present + finalArticleText = afterMarker.replace(/^---\s*\n/, "").trimStart(); break; } } @@ -1475,29 +1312,18 @@ async function runLlmPipeline( const wordCount = draftContent.split(/\s+/).length; const finalIssues = validateArticle(draftContent); - // Hard minimum word count gate (1200 for LLM pipeline) - if (wordCount < 1200) { - const shortMsg = `⚠ WORD COUNT FAIL: ${wordCount} words — minimum 1200 for LLM pipeline`; - console.log(` ${shortMsg}`); - if (!finalIssues.includes(`Too short: ${wordCount} words`)) { - finalIssues.push(`Too short: ${wordCount} words (minimum 1200 for LLM pipeline — article needs expansion)`); - } - } else { - console.log(` ✓ Word count: ${wordCount} words (≥1200 — OK)`); - } - - // Update the draft in DB — promote to 'ready' on full pipeline completion + // Update the draft in DB await pool.query( `UPDATE blog_drafts SET draft_content = $1, word_count = $2, - generated_by = 'fo-blog-engine-v7', - pipeline_version = 'v7-viral-signal', + generated_by = 'fo-blog-engine-v5-autokill', + pipeline_version = 'v5-auto-kill-layer', pipeline_steps_completed = $3, auto_qa_score = $4, outline = $5, linkedin_post = $6, linkedin_char_count = $7, - status = 'review', + status = 'draft', updated_at = NOW() WHERE id = $8::uuid`, [ @@ -1506,7 +1332,8 @@ async function runLlmPipeline( stepsCompleted, autoQaScore ? JSON.stringify(autoQaScore) : null, JSON.stringify({ - generation_method: "fo-pipeline-v5", + generation_method: "fo-pipeline-v5-autokill", + auto_kill_scores: autoKillScores, steps_completed: stepsCompleted, blog_type: selectedTopic, quality_issues: finalIssues, @@ -1535,13 +1362,13 @@ async function runLlmPipeline( } clearProgress(draftId); - console.log(`Blog FO Pipeline: ${draftId} complete — ${wordCount} words, ${stepsCompleted}/18 steps, QA: ${(autoQaScore as any)?.overall || "N/A"}/10, LinkedIn: ${linkedinCharCount ?? "n/a"} chars`); + console.log(`Blog FO Pipeline: ${draftId} complete — ${wordCount} words, ${stepsCompleted}/14 steps, QA: ${(autoQaScore as any)?.overall || "N/A"}/10, LinkedIn: ${linkedinCharCount ?? "n/a"} chars`); } catch (llmErr) { clearProgress(draftId); - console.warn(`Blog FO Pipeline failed at step ${stepsCompleted + 1}/18 for ${draftId}: ${(llmErr as Error).message}`); + console.warn(`Blog FO Pipeline failed at step ${stepsCompleted + 1}/14 for ${draftId}: ${(llmErr as Error).message}`); // Update with partial progress await pool.query( - `UPDATE blog_drafts SET pipeline_steps_completed = $1, pipeline_version = 'v7-viral-signal', + `UPDATE blog_drafts SET pipeline_steps_completed = $1, pipeline_version = 'v5-narrative-control', outline = $2, updated_at = NOW() WHERE id = $3::uuid`, [stepsCompleted, JSON.stringify({ error: (llmErr as Error).message, steps_completed: stepsCompleted }), draftId] ).catch(() => {}); @@ -1550,8 +1377,7 @@ async function runLlmPipeline( // POST /api/blog/generate — Generate a new blog draft (returns immediately, LLM runs async) blogRouter.post("/generate", async (req: Request, res: Response) => { - const { title: reqTitle, topic, speed, form_factor, use_case, use_llm } = req.body as { - title?: string; + const { topic, speed, form_factor, use_case, use_llm } = req.body as { topic?: string; speed?: string; form_factor?: string; @@ -1574,14 +1400,11 @@ blogRouter.post("/generate", async (req: Request, res: Response) => { const year = new Date().getFullYear(); const template = templates[Math.floor(Math.random() * templates.length)]; - // Use caller-provided title if given; fall back to template title - const title = (reqTitle && reqTitle.trim()) - ? reqTitle.trim() - : template.title - .replace("{YEAR}", String(year)) - .replace("{SPEED}", speed || "400G/800G") - .replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP") - .replace("{USE_CASE}", use_case || "Data Center Interconnect"); + const title = template.title + .replace("{YEAR}", String(year)) + .replace("{SPEED}", speed || "400G/800G") + .replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP") + .replace("{USE_CASE}", use_case || "Data Center Interconnect"); const keywords = [ ...template.seo_keywords, @@ -1592,19 +1415,6 @@ blogRouter.post("/generate", async (req: Request, res: Response) => { const data = await gatherBlogData(keywords, selectedTopic); - // Clean up stale template drafts for the same title (idempotent regeneration) - // If a template draft already exists for this title, remove it before creating a fresh one - await pool.query( - `DELETE FROM blog_feedback WHERE blog_id IN ( - SELECT id FROM blog_drafts WHERE title = $1 AND generated_by = 'tip-blog-engine-template' - )`, - [title] - ).catch(() => {}); - await pool.query( - `DELETE FROM blog_drafts WHERE title = $1 AND generated_by = 'tip-blog-engine-template'`, - [title] - ).catch(() => {}); - // Always create a template draft first (instant response) const draftContent = generateTemplateDraft(title, selectedTopic, data); const wordCount = draftContent.split(/\s+/).length; @@ -1682,7 +1492,7 @@ blogRouter.post("/generate", async (req: Request, res: Response) => { blogRouter.get("/", async (_req: Request, res: Response) => { try { const result = await pool.query( - `SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by, created_at, linkedin_post + `SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by, created_at FROM blog_drafts ORDER BY created_at DESC LIMIT 50`, @@ -1700,11 +1510,10 @@ blogRouter.get("/llm/status", async (_req: Request, res: Response) => { res.json({ success: true, queue_depth: getQueueDepth(), llm: health }); }); -// POST /api/blog/llm/reset-queue — Force-reset stuck Ollama or Claude queue +// POST /api/blog/llm/reset-queue — Force-reset stuck Ollama queue blogRouter.post("/llm/reset-queue", (_req: Request, res: Response) => { resetOllamaQueue(); - resetClaudeQueue(); - res.json({ success: true, message: "LLM queues reset — stuck requests cleared (Ollama + Claude)" }); + res.json({ success: true, message: "Ollama queue reset — stuck requests cleared" }); }); // GET /api/blog/:id — Get a specific draft with full content @@ -1712,7 +1521,7 @@ blogRouter.post("/llm/reset-queue", (_req: Request, res: Response) => { blogRouter.get("/:id/progress", (req: Request, res: Response) => { const p = pipelineProgress.get(String(req.params.id)); if (!p) { - res.json({ success: true, running: false, step: 0, total: 18, label: "Idle", pct: 0 }); + res.json({ success: true, running: false, step: 0, total: 10, label: "Idle", pct: 0 }); return; } res.json({ success: true, running: true, ...p }); @@ -1880,94 +1689,6 @@ blogRouter.post("/:id/regenerate", async (req: Request, res: Response) => { }); // DELETE /api/blog/:id — Delete a blog draft -// POST /api/blog/:id/publish-ghost — Publish to blog.fichtmueller.org via Ghost Admin API -blogRouter.post("/:id/publish-ghost", async (req: Request, res: Response) => { - try { - const draft = await pool.query( - "SELECT id, title, draft_content, seo_keywords FROM blog_drafts WHERE id = $1::uuid", - [req.params.id] - ); - if (draft.rows.length === 0) { - return res.status(404).json({ success: false, error: "Draft not found" }); - } - - const { title, draft_content, seo_keywords } = draft.rows[0]; - if (!draft_content || draft_content.trim().length < 100) { - return res.status(400).json({ success: false, error: "Draft content too short to publish" }); - } - - // Ghost Admin API JWT auth - const GHOST_URL = process.env.GHOST_URL || "https://blog.fichtmueller.org"; - const GHOST_ADMIN_KEY = process.env.GHOST_ADMIN_KEY || "87727de2746a4de69efd5b03:7abdbec3a7ae473ad09487fc6e48327809da27c8adaaea457cce2d4f55b065f7"; - const [keyId, secret] = GHOST_ADMIN_KEY.split(":"); - - // Create JWT token for Ghost Admin API - const crypto = await import("crypto"); - const header = Buffer.from(JSON.stringify({ alg: "HS256", typ: "JWT", kid: keyId })).toString("base64url"); - const now = Math.floor(Date.now() / 1000); - const payload = Buffer.from(JSON.stringify({ - iat: now, exp: now + 300, aud: "/admin/" - })).toString("base64url"); - const signature = crypto.createHmac("sha256", Buffer.from(secret, "hex")) - .update(`${header}.${payload}`).digest("base64url"); - const jwt = `${header}.${payload}.${signature}`; - - // Convert markdown content to Ghost mobiledoc - // Strip the # Title from content (Ghost uses its own title field) - const bodyContent = draft_content.replace(/^#\s+[^\n]+\n*/m, "").trim(); - - // Build mobiledoc with markdown card - const mobiledoc = JSON.stringify({ - version: "0.3.1", - ghostVersion: "4.0", - markups: [], atoms: [], sections: [[10, 0]], cards: [["markdown", { markdown: bodyContent }]] - }); - - // Build tags from seo_keywords - const tags = (seo_keywords || "").split(",").map((k: string) => k.trim()).filter(Boolean).slice(0, 5) - .map((t: string) => ({ name: t })); - - // POST to Ghost Admin API - const ghostRes = await fetch(`${GHOST_URL}/ghost/api/admin/posts/?source=html`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "Accept-Version": "v5.0", - Authorization: `Ghost ${jwt}`, - }, - body: JSON.stringify({ - posts: [{ - title, - mobiledoc, - status: "published", - tags: tags.length > 0 ? tags : [{ name: "Optical Networking" }], - }] - }), - }); - - if (!ghostRes.ok) { - const errBody = await ghostRes.text(); - console.error("[blog] Ghost publish failed:", ghostRes.status, errBody.slice(0, 300)); - return res.status(500).json({ success: false, error: `Ghost API error: ${ghostRes.status}` }); - } - - const ghostData = await ghostRes.json() as { posts?: Array<{ url?: string; slug?: string }> }; - const ghostUrl = ghostData.posts?.[0]?.url || `${GHOST_URL}/`; - - // Update TIP draft status - await pool.query( - "UPDATE blog_drafts SET status = 'published', updated_at = NOW() WHERE id = $1::uuid", - [req.params.id] - ); - - console.log(`[blog] Published to Ghost: ${title} → ${ghostUrl}`); - res.json({ success: true, url: ghostUrl, ghost_slug: ghostData.posts?.[0]?.slug }); - } catch (err) { - console.error("[blog] Ghost publish error:", err); - res.status(500).json({ success: false, error: (err as Error).message }); - } -}); - blogRouter.delete("/:id", async (req: Request, res: Response) => { try { // Delete feedback first (FK constraint) diff --git a/packages/scraper/src/index.ts b/packages/scraper/src/index.ts index 373e6b1..ca13099 100644 --- a/packages/scraper/src/index.ts +++ b/packages/scraper/src/index.ts @@ -27,6 +27,9 @@ * tsx src/index.ts --switch-crawl-pw — Crawl switch assets (Playwright, JS-heavy vendors) * tsx src/index.ts --fetch-only — Run only fetch-based scrapers (no Playwright) * tsx src/index.ts --atgbics — Run ATGBICS scraper once + * tsx src/index.ts --naddod — Run NADDOD scraper once + * tsx src/index.ts --qsfptek — Run QSFPTEK scraper once + * tsx src/index.ts --addon — Run AddOn Networks scraper once */ import { createScheduler, registerSchedules, registerWorkers } from "./scheduler"; import { scrapeFs } from "./scrapers/fs-com"; @@ -54,6 +57,9 @@ import { crawlSwitchAssets } from "./scrapers/switch-assets-crawler"; import { crawlSwitchAssetsPlaywright } from "./scrapers/switch-assets-playwright"; import { scrapeAtgbics } from "./scrapers/atgbics"; import { scrapeProLabs } from "./scrapers/prolabs"; +import { scrapeNaddod } from "./scrapers/naddod"; +import { scrapeQsfptek } from "./scrapers/qsfptek"; +import { scrapeAddonNetworks } from "./scrapers/addon-networks"; import { pool } from "./utils/db"; const args = process.argv.slice(2); @@ -86,6 +92,15 @@ async function runOnce(): Promise { if (args.includes("--prolabs") || isAll || isFetchOnly) { await scrapeProLabs(); } + if (args.includes("--naddod") || isAll || isFetchOnly) { + await scrapeNaddod(); + } + if (args.includes("--qsfptek") || isAll || isFetchOnly) { + await scrapeQsfptek(); + } + if (args.includes("--addon") || isAll || isFetchOnly) { + await scrapeAddonNetworks(); + } if (args.includes("--juniper") || isAll || isFetchOnly) { await scrapeJuniperHct(); } @@ -172,7 +187,7 @@ async function runScheduler(): Promise { process.on("SIGTERM", shutdown); } -const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics"]; +const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--naddod", "--qsfptek", "--addon", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics"]; if (args.some((a) => ALL_FLAGS.includes(a))) { runOnce().catch((err) => { diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index d888524..c80e8bf 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -36,6 +36,7 @@ async function withIsolatedStorage(name: string, fn: () => Promise): Promi await fn(); } finally { process.env.CRAWLEE_STORAGE_DIR = prev ?? ""; + // Clean up after successful run try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ } } } diff --git a/packages/scraper/src/scrapers/addon-networks.ts b/packages/scraper/src/scrapers/addon-networks.ts new file mode 100644 index 0000000..c4ece01 --- /dev/null +++ b/packages/scraper/src/scrapers/addon-networks.ts @@ -0,0 +1,303 @@ +/** + * AddOn Networks Scraper — US-based compatible optics vendor + * + * addnetworks.com — Enterprise-grade compatible transceivers. + * Products browseable under /products/ category pages. + * Pricing is public in USD. Rate limited: 1 req/2sec. + * + * AddOn Networks (AddOn Computer Products) specializes in OEM-compatible + * optics for Cisco, Juniper, Arista, HPE, and Dell environments. + * ~2500 SKUs, strong US channel presence. + */ +import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; +import { contentHash } from "../utils/hash"; + +const BASE = "https://www.addnetworks.com"; +const HEADERS = { + "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", +}; + +const MAX_PAGES = 50; + +// AddOn uses "compatible" suffix naming (e.g. "ADD-XSSFP10GE-LR-AO") +// Categories follow standard form-factor taxonomy +const CATEGORIES = [ + { path: "/products/networking/optical-networking/sfp/", formFactor: "SFP", speed: "1G", speedGbps: 1 }, + { path: "/products/networking/optical-networking/sfp-plus/", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, + { path: "/products/networking/optical-networking/sfp28/", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, + { path: "/products/networking/optical-networking/qsfp-plus/", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, + { path: "/products/networking/optical-networking/qsfp28/", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, + { path: "/products/networking/optical-networking/qsfp-dd/", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, + // Broader category fallback + { path: "/products/networking/optical-networking/", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, +]; + +interface Product { + partNumber: string; + name: string; + url: string; + price?: number; + formFactor: string; + speed: string; + speedGbps: number; + reachLabel?: string; + reachMeters?: number; + fiberType?: string; + wavelength?: string; + compatibleWith?: string; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function detectReach(text: string): { label: string; meters: number } | undefined { + const patterns: [RegExp, string, number][] = [ + [/\b120\s*km\b/i, "120km", 120000], + [/\b80\s*km\b/i, "80km", 80000], + [/\b40\s*km\b/i, "40km", 40000], + [/\b20\s*km\b/i, "20km", 20000], + [/\b10\s*km\b/i, "10km", 10000], + [/\b2\s*km\b/i, "2km", 2000], + [/\b550\s*m\b/i, "550m", 550], + [/\b500\s*m\b/i, "500m", 500], + [/\b400\s*m\b/i, "400m", 400], + [/\b300\s*m\b/i, "300m", 300], + [/\b150\s*m\b/i, "150m", 150], + [/\b100\s*m\b/i, "100m", 100], + [/\bLR4\b/, "10km", 10000], + [/\bLR\b/, "10km", 10000], + [/\bER4?\b/, "40km", 40000], + [/\bZR4?\b/, "80km", 80000], + [/\bSR4?\b/, "300m", 300], + [/\bDR4?\b/, "500m", 500], + [/\bFR4?\b/, "2km", 2000], + ]; + for (const [regex, label, meters] of patterns) { + if (regex.test(text)) return { label, meters }; + } + return undefined; +} + +function detectFiber(text: string): string { + if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; + if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; + if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper"; + return ""; +} + +function detectWavelength(text: string): string { + const match = text.match(/(\d{3,4})\s*nm/i); + return match ? match[1] : ""; +} + +function extractCompatibleVendor(name: string): string { + const brands = ["Cisco", "Juniper", "Arista", "HPE", "HP", "Aruba", "Dell", "Brocade", "Extreme", + "Huawei", "Nokia", "MikroTik", "Mellanox", "Nvidia", "Ubiquiti", "Force10", + "Foundry", "Enterasys", "Allied Telesis", "Netgear", "Calix"]; + for (const brand of brands) { + if (new RegExp(`\\b${brand}\\b`, "i").test(name)) return brand; + } + // AddOn naming convention: "FOR-XX" suffix + const forMatch = name.match(/-AO$|-IN$/i); + if (forMatch) { + // Check preceding OEM part number pattern, e.g. SFP-10G-SR-AO → Cisco + if (/^SFP-|^GLC-|^QSFP-|^SFP28-/i.test(name)) return "Cisco"; + if (/^EX-|^QFX-/i.test(name)) return "Juniper"; + if (/^740-|^J\d{4}/i.test(name)) return "Juniper"; + } + return ""; +} + +/** + * Parse AddOn Networks product listing HTML. + * Supports multiple CMS patterns (Magento, BigCommerce, custom). + */ +function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] { + const products: Product[] = []; + const seen = new Set(); + const collapsed = html.replace(/\s+/g, " "); + + // Strategy 1: Magento / standard product grid + for (const m of collapsed.matchAll(/]+class="[^"]*product[^"]*"[^>]*>([\s\S]*?)<\/li>/gi)) { + const card = m[1]; + + const urlMatch = card.match(/href="(https?:\/\/(?:www\.)?addnetworks\.com\/[^"?#]+)"/i); + if (!urlMatch) continue; + const url = urlMatch[1]; + if (seen.has(url) || !/\/product(?:s)?\/|\/item\//i.test(url)) continue; + seen.add(url); + + const nameMatch = card.match(/]*>([^<]{10,})<\/h[2-4]>/i) || + card.match(/product[_-]?(?:name|title)[^>]*>([^<]{10,})]*>([^<]{10,}) 0 && price < 100000 ? price : undefined, + formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, + reachLabel: reach?.label, reachMeters: reach?.meters, + fiberType: detectFiber(name), wavelength: detectWavelength(name), + compatibleWith: extractCompatibleVendor(name), + }); + } + + // Strategy 2: Generic product link fallback using matchAll + if (products.length === 0) { + for (const m of collapsed.matchAll(/href="(https?:\/\/(?:www\.)?addnetworks\.com\/[^"?#]+)"[^>]*>\s*<[^>]+>\s*([^<]{10,})/gi)) { + const url = m[1]; + const name = m[2].trim().replace(/&/g, "&"); + if (seen.has(url) || name.length < 10) continue; + if (!/transceiver|sfp|qsfp|osfp|dac|aoc|fiber|optical/i.test(name)) continue; + seen.add(url); + + const idx = collapsed.indexOf(url); + const ctx = collapsed.slice(Math.max(0, idx - 300), idx + 600); + const priceM = ctx.match(/\$\s*([\d,]+\.?\d*)/); + const price = priceM ? parseFloat(priceM[1].replace(/,/g, "")) : undefined; + const reach = detectReach(name); + + products.push({ + partNumber: name.match(/([A-Z0-9][A-Z0-9\-\.\/]{4,})/)?.[1] || name.split(/\s+/)[0]?.slice(0, 80) || "", + name, url, + price: price && price > 0 && price < 100000 ? price : undefined, + formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, + reachLabel: reach?.label, reachMeters: reach?.meters, + fiberType: detectFiber(name), wavelength: detectWavelength(name), + compatibleWith: extractCompatibleVendor(name), + }); + } + } + + return products; +} + +async function fetchPage(url: string): Promise { + const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); + if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); + return resp.text(); +} + +export async function scrapeAddonNetworks(): Promise { + console.log("=== AddOn Networks Scraper Starting ===\n"); + + const vendorId = await ensureVendor( + "AddOn Networks", + "compatible", + "https://www.addnetworks.com", + "https://www.addnetworks.com/products/networking/optical-networking/", + ); + + let totalProducts = 0; + let priceUpdates = 0; + const seenCategories = new Set(); + + for (const cat of CATEGORIES) { + console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`); + + try { + const html1 = await fetchPage(BASE + cat.path); + const catProducts = parseProductList(html1, cat); + + if (cat.path === "/products/networking/optical-networking/" && seenCategories.size > 3) { + console.log(` Skipping generic fallback (${seenCategories.size} specific categories scraped)`); + continue; + } + + if (catProducts.length === 0) { + console.log(" No products on page 1 — skipping"); + continue; + } + + seenCategories.add(cat.path); + console.log(` Found ${catProducts.length} products on page 1`); + + // Detect pagination + const totalPagesMatch = + html1.match(/page\s+\d+\s+of\s+(\d+)/i) || + html1.match(/aria-label="Last[^"]*"\s+href="[^"]*[?&]p=(\d+)/) || + html1.match(/pagination[^>]*>[\s\S]*?(\d+)<\/a>\s*<\/[^>]+>\s*<\/[^>]+>/); + const totalPages = totalPagesMatch ? Math.min(parseInt(totalPagesMatch[1]), MAX_PAGES) : 2; + console.log(` Total pages (estimate): ${totalPages}`); + + const allProducts = [...catProducts]; + + for (let page = 2; page <= totalPages; page++) { + await sleep(2000); + try { + const pageUrl = BASE + cat.path + `?p=${page}`; + const html = await fetchPage(pageUrl); + const pageProds = parseProductList(html, cat); + if (pageProds.length === 0) break; + allProducts.push(...pageProds); + console.log(` Page ${page}: ${pageProds.length} products`); + } catch (err) { + console.warn(` Page ${page} failed: ${(err as Error).message.slice(0, 60)}`); + break; + } + } + + const uniqueProducts = allProducts.filter((p, i, arr) => arr.findIndex((x) => x.url === p.url) === i); + console.log(` Total unique: ${uniqueProducts.length}`); + + for (const product of uniqueProducts) { + try { + const txId = await findOrCreateScrapedTransceiver({ + partNumber: product.partNumber, + vendorId, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + reachMeters: product.reachMeters, + reachLabel: product.reachLabel, + fiberType: product.fiberType, + wavelengths: product.wavelength, + category: "DataCenter", + }); + + if (product.price && product.price > 0) { + const hash = contentHash({ price: product.price, part: product.partNumber }); + const updated = await upsertPriceObservation({ + transceiverId: txId, + sourceVendorId: vendorId, + price: product.price, + currency: "USD", + stockLevel: "in_stock", + url: product.url, + contentHash: hash, + }); + if (updated) priceUpdates++; + } + totalProducts++; + } catch (err) { + console.warn(` DB error: ${(err as Error).message.slice(0, 80)}`); + } + } + } catch (err) { + console.error(` Category failed: ${(err as Error).message}`); + } + + await sleep(2000); + } + + console.log(`\n=== AddOn Networks Complete: ${totalProducts} products, ${priceUpdates} price updates ===`); +} + +if (require.main === module) { + scrapeAddonNetworks() + .then(() => pool.end()) + .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); +} diff --git a/packages/scraper/src/scrapers/champion-one.ts b/packages/scraper/src/scrapers/champion-one.ts index b426cd2..79a75ec 100644 --- a/packages/scraper/src/scrapers/champion-one.ts +++ b/packages/scraper/src/scrapers/champion-one.ts @@ -212,7 +212,7 @@ export async function scrapeChampionOne(): Promise { }); if (product.price && product.price > 0) { - const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); + const hash = contentHash({ price: product.price, part: product.partNumber }); const updated = await upsertPriceObservation({ transceiverId: txId, sourceVendorId: vendorId, price: product.price, currency: product.currency || "USD", diff --git a/packages/scraper/src/scrapers/gbics.ts b/packages/scraper/src/scrapers/gbics.ts index c231f7f..86163b1 100644 --- a/packages/scraper/src/scrapers/gbics.ts +++ b/packages/scraper/src/scrapers/gbics.ts @@ -99,19 +99,26 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product // Collapse whitespace for easier regex matching const collapsed = html.replace(/\s+/g, " "); - // BigCommerce article card pattern (updated): - //
- // - // Price is in pence (integer), divide by 100 = GBP - const articleRegex = /data-name="([^"]{10,200})"[^>]*data-product-price="\s*(\d+)\s*"[^>]*>[\s\S]{0,500}?href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"/gi; + // BigCommerce card-title pattern: + // + const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*data-event-type="product-click"/gi; let match; - while ((match = articleRegex.exec(collapsed)) !== null) { - const name = match[1].trim(); - const priceRaw = parseInt(match[2], 10); - const url = match[3]; - // GBICS stores price in pence (integer) — e.g. 2395 = £23.95 OR £2,395.00 (full pounds)? - // Check by data-price-asc context: "data-price-asc=\"2395\"" with "£2,395.00" → price is in full GBP (no pence) - const price = priceRaw > 0 ? priceRaw : undefined; + while ((match = productRegex.exec(collapsed)) !== null) { + const label = match[1].trim(); + const url = match[2]; + + // aria-label contains "Product Name, £XX.XX" + // Split on last comma to separate name and price + const priceInLabel = label.match(/,\s*£\s*([\d,.]+)\s*$/); + const name = priceInLabel ? label.slice(0, label.lastIndexOf(",")).trim() : label; + let price = priceInLabel ? parseFloat(priceInLabel[1].replace(",", "")) : undefined; + + // Fallback: extract price from data-price-asc attribute on parent
  • + if (!price) { + const priceContext = collapsed.slice(Math.max(0, match.index - 500), match.index); + const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/); + if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]); + } if (name.length < 10) continue; diff --git a/packages/scraper/src/scrapers/naddod.ts b/packages/scraper/src/scrapers/naddod.ts new file mode 100644 index 0000000..84ede6e --- /dev/null +++ b/packages/scraper/src/scrapers/naddod.ts @@ -0,0 +1,285 @@ +/** + * NADDOD Scraper — Chinese compatible transceiver vendor + * + * naddod.com — WooCommerce store, server-rendered HTML, USD pricing. + * Products listed under product category pages. + * Pagination via /page/N/. Rate limited: 1 req/2sec. + * + * NADDOD (Shenzhen NADDOD Information Co.) makes and sells compatible + * optics for Cisco, Juniper, Arista, etc. Transparent USD pricing. + */ +import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; +import { contentHash } from "../utils/hash"; + +const BASE = "https://www.naddod.com"; +const HEADERS = { + "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", +}; + +const MAX_PAGES = 30; + +const CATEGORIES = [ + { path: "/product-category/1g-sfp-transceivers/", formFactor: "SFP", speed: "1G", speedGbps: 1 }, + { path: "/product-category/10g-sfp-transceivers/", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, + { path: "/product-category/25g-sfp28-transceivers/", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, + { path: "/product-category/40g-qsfp-transceivers/", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, + { path: "/product-category/100g-qsfp28-transceivers/", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, + { path: "/product-category/200g-qsfp56-transceivers/", formFactor: "QSFP56", speed: "200G", speedGbps: 200 }, + { path: "/product-category/400g-qsfp-dd-transceivers/", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, + { path: "/product-category/800g-osfp-transceivers/", formFactor: "OSFP", speed: "800G", speedGbps: 800 }, + { path: "/product-category/transceivers/", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, +]; + +interface Product { + partNumber: string; + name: string; + url: string; + price?: number; + formFactor: string; + speed: string; + speedGbps: number; + reachLabel?: string; + reachMeters?: number; + fiberType?: string; + wavelength?: string; + compatibleWith?: string; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function detectReach(text: string): { label: string; meters: number } | undefined { + const patterns: [RegExp, string, number][] = [ + [/\b120\s*km\b/i, "120km", 120000], + [/\b80\s*km\b/i, "80km", 80000], + [/\b40\s*km\b/i, "40km", 40000], + [/\b20\s*km\b/i, "20km", 20000], + [/\b10\s*km\b/i, "10km", 10000], + [/\b2\s*km\b/i, "2km", 2000], + [/\b550\s*m\b/i, "550m", 550], + [/\b500\s*m\b/i, "500m", 500], + [/\b400\s*m\b/i, "400m", 400], + [/\b300\s*m\b/i, "300m", 300], + [/\b150\s*m\b/i, "150m", 150], + [/\b100\s*m\b/i, "100m", 100], + [/\bLR4\b/, "10km", 10000], + [/\bLR\b/, "10km", 10000], + [/\bER4?\b/, "40km", 40000], + [/\bZR4?\b/, "80km", 80000], + [/\bSR4?\b/, "300m", 300], + [/\bDR4?\b/, "500m", 500], + [/\bFR4?\b/, "2km", 2000], + ]; + for (const [regex, label, meters] of patterns) { + if (regex.test(text)) return { label, meters }; + } + return undefined; +} + +function detectFiber(text: string): string { + if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; + if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; + if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper"; + return ""; +} + +function detectWavelength(text: string): string { + const match = text.match(/(\d{3,4})\s*nm/i); + return match ? match[1] : ""; +} + +function extractCompatibleVendor(name: string): string { + const brands = ["Cisco", "Juniper", "Arista", "HPE", "Dell", "Brocade", "Extreme", "Huawei", + "Nokia", "MikroTik", "Mellanox", "Nvidia", "Ubiquiti"]; + for (const brand of brands) { + if (new RegExp(`\\b${brand}\\b`, "i").test(name)) return brand; + } + const match = name.match(/(?:for\s+|compatible\s+(?:with\s+)?)([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)/); + return match ? match[1] : ""; +} + +function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] { + const products: Product[] = []; + const seen = new Set(); + const collapsed = html.replace(/\s+/g, " "); + + // Strategy 1: WooCommerce standard product loop + const cardRegex = /]+class="[^"]*product[^"]*"[^>]*>([\s\S]*?)<\/li>/gi; + let cardMatch; + while ((cardMatch = cardRegex.exec(collapsed)) !== null) { + const card = cardMatch[1]; + + const urlMatch = card.match(/href="(https?:\/\/(?:www\.)?naddod\.com\/product\/[^"]+)"/i); + if (!urlMatch) continue; + const url = urlMatch[1]; + if (seen.has(url)) continue; + seen.add(url); + + const nameMatch = card.match(/woocommerce-loop-product__title[^>]*>([^<]+)]*>([^<]{10,})<\/h2>/i) || + card.match(/]*>([^<]{10,})<\/h3>/i); + if (!nameMatch) continue; + const name = nameMatch[1].trim().replace(/&/g, "&").replace(/–/g, "–"); + if (name.length < 5) continue; + + const priceMatch = card.match(/\$\s*([\d,]+\.?\d*)/); + const price = priceMatch ? parseFloat(priceMatch[1].replace(/,/g, "")) : undefined; + + const reach = detectReach(name); + const partNumber = name.split(/\s+(?:compatible|for|sfp|qsfp)/i)[0]?.trim().slice(0, 80) || name.slice(0, 60); + + products.push({ + partNumber, name, url, + price: price && price > 0 && price < 100000 ? price : undefined, + formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, + reachLabel: reach?.label, reachMeters: reach?.meters, + fiberType: detectFiber(name), wavelength: detectWavelength(name), + compatibleWith: extractCompatibleVendor(name), + }); + } + + // Strategy 2: Generic product link fallback + if (products.length === 0) { + const linkRegex = /href="(https?:\/\/(?:www\.)?naddod\.com\/(?:product|shop)\/[^"?#]+)"[^>]*>\s*([^<]{10,})/gi; + let m; + while ((m = linkRegex.exec(collapsed)) !== null) { + const url = m[1]; + const name = m[2].trim().replace(/&/g, "&"); + if (seen.has(url) || name.length < 10) continue; + if (!/transceiver|sfp|qsfp|osfp|dac|aoc|xfp/i.test(name)) continue; + seen.add(url); + + const ctx = collapsed.slice(Math.max(0, m.index - 200), m.index + 500); + const priceM = ctx.match(/\$\s*([\d,]+\.?\d*)/); + const price = priceM ? parseFloat(priceM[1].replace(/,/g, "")) : undefined; + const reach = detectReach(name); + + products.push({ + partNumber: name.split(/\s+/)[0]?.slice(0, 80) || "", + name, url, + price: price && price > 0 && price < 100000 ? price : undefined, + formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, + reachLabel: reach?.label, reachMeters: reach?.meters, + fiberType: detectFiber(name), wavelength: detectWavelength(name), + compatibleWith: extractCompatibleVendor(name), + }); + } + } + + return products; +} + +async function fetchPage(url: string): Promise { + const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); + if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); + return resp.text(); +} + +export async function scrapeNaddod(): Promise { + console.log("=== NADDOD Scraper Starting ===\n"); + + const vendorId = await ensureVendor( + "NADDOD", + "compatible", + "https://www.naddod.com", + "https://www.naddod.com/product-category/transceivers/", + ); + + let totalProducts = 0; + let priceUpdates = 0; + const seenCategories = new Set(); + + for (const cat of CATEGORIES) { + console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`); + + try { + const html1 = await fetchPage(BASE + cat.path); + const catProducts = parseProductList(html1, cat); + + if (cat.path.includes("/transceivers/") && seenCategories.size > 3) { + console.log(` Skipping generic fallback (${seenCategories.size} specific categories scraped)`); + continue; + } + + if (catProducts.length === 0) { + console.log(" No products on page 1 — skipping"); + continue; + } + + seenCategories.add(cat.path); + console.log(` Found ${catProducts.length} products on page 1`); + + const totalPagesMatch = html1.match(/page-numbers[^>]*>(\d+)<\/a>(?!.*page-numbers)/); + const totalPages = totalPagesMatch ? Math.min(parseInt(totalPagesMatch[1]), MAX_PAGES) : 1; + console.log(` Total pages: ${totalPages}`); + + const allProducts = [...catProducts]; + + for (let page = 2; page <= totalPages; page++) { + await sleep(2000); + try { + const html = await fetchPage(BASE + cat.path + `page/${page}/`); + const pageProds = parseProductList(html, cat); + if (pageProds.length === 0) break; + allProducts.push(...pageProds); + console.log(` Page ${page}: ${pageProds.length} products`); + } catch (err) { + console.warn(` Page ${page} failed: ${(err as Error).message.slice(0, 60)}`); + break; + } + } + + const uniqueProducts = allProducts.filter((p, i, arr) => arr.findIndex((x) => x.url === p.url) === i); + console.log(` Total unique: ${uniqueProducts.length}`); + + for (const product of uniqueProducts) { + try { + const txId = await findOrCreateScrapedTransceiver({ + partNumber: product.partNumber, + vendorId, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + reachMeters: product.reachMeters, + reachLabel: product.reachLabel, + fiberType: product.fiberType, + wavelengths: product.wavelength, + category: "DataCenter", + }); + + if (product.price && product.price > 0) { + const hash = contentHash({ price: product.price, part: product.partNumber }); + const updated = await upsertPriceObservation({ + transceiverId: txId, + sourceVendorId: vendorId, + price: product.price, + currency: "USD", + stockLevel: "in_stock", + url: product.url, + contentHash: hash, + }); + if (updated) priceUpdates++; + } + totalProducts++; + } catch (err) { + console.warn(` DB error: ${(err as Error).message.slice(0, 80)}`); + } + } + } catch (err) { + console.error(` Category failed: ${(err as Error).message}`); + } + + await sleep(2000); + } + + console.log(`\n=== NADDOD Complete: ${totalProducts} products, ${priceUpdates} price updates ===`); +} + +if (require.main === module) { + scrapeNaddod() + .then(() => pool.end()) + .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); +} diff --git a/packages/scraper/src/scrapers/qsfptek.ts b/packages/scraper/src/scrapers/qsfptek.ts new file mode 100644 index 0000000..d66a60b --- /dev/null +++ b/packages/scraper/src/scrapers/qsfptek.ts @@ -0,0 +1,281 @@ +/** + * QSFPTEK Scraper — Chinese compatible transceiver vendor + * + * qsfptek.com — Server-rendered HTML shop, USD pricing. + * Focuses on QSFP+/QSFP28/QSFP-DD/SFP+ form factors. + * Rate limited: 1 req/2sec. + * + * QSFPTEK (Shenzhen Optotech Technology) — competitive pricing, + * transparent USD prices, no account required. + */ +import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; +import { contentHash } from "../utils/hash"; + +const BASE = "https://www.qsfptek.com"; +const HEADERS = { + "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", +}; + +const MAX_PAGES = 30; + +const CATEGORIES = [ + { path: "/c/sfp-transceiver.html", formFactor: "SFP", speed: "1G", speedGbps: 1 }, + { path: "/c/sfp-plus-transceiver.html", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, + { path: "/c/sfp28-transceiver.html", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, + { path: "/c/qsfp-plus-transceiver.html", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, + { path: "/c/qsfp28-transceiver.html", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, + { path: "/c/qsfp56-transceiver.html", formFactor: "QSFP56", speed: "200G", speedGbps: 200 }, + { path: "/c/qsfp-dd-transceiver.html", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, + { path: "/c/osfp-transceiver.html", formFactor: "OSFP", speed: "800G", speedGbps: 800 }, + { path: "/c/optical-transceiver.html", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, +]; + +interface Product { + partNumber: string; + name: string; + url: string; + price?: number; + formFactor: string; + speed: string; + speedGbps: number; + reachLabel?: string; + reachMeters?: number; + fiberType?: string; + wavelength?: string; + compatibleWith?: string; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function detectReach(text: string): { label: string; meters: number } | undefined { + const patterns: [RegExp, string, number][] = [ + [/\b120\s*km\b/i, "120km", 120000], + [/\b80\s*km\b/i, "80km", 80000], + [/\b40\s*km\b/i, "40km", 40000], + [/\b20\s*km\b/i, "20km", 20000], + [/\b10\s*km\b/i, "10km", 10000], + [/\b2\s*km\b/i, "2km", 2000], + [/\b550\s*m\b/i, "550m", 550], + [/\b500\s*m\b/i, "500m", 500], + [/\b300\s*m\b/i, "300m", 300], + [/\b100\s*m\b/i, "100m", 100], + [/\bLR4\b/, "10km", 10000], + [/\bLR\b/, "10km", 10000], + [/\bER4?\b/, "40km", 40000], + [/\bZR4?\b/, "80km", 80000], + [/\bSR4?\b/, "300m", 300], + [/\bDR4?\b/, "500m", 500], + [/\bFR4?\b/, "2km", 2000], + ]; + for (const [regex, label, meters] of patterns) { + if (regex.test(text)) return { label, meters }; + } + return undefined; +} + +function detectFiber(text: string): string { + if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; + if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; + if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper"; + return ""; +} + +function detectWavelength(text: string): string { + const match = text.match(/(\d{3,4})\s*nm/i); + return match ? match[1] : ""; +} + +function extractCompatibleVendor(name: string): string { + const brands = ["Cisco", "Juniper", "Arista", "HPE", "Aruba", "Dell", "Brocade", "Extreme", + "Huawei", "Nokia", "MikroTik", "Mellanox", "Nvidia", "Ubiquiti", "Allied Telesis"]; + for (const brand of brands) { + if (new RegExp(`\\b${brand}\\b`, "i").test(name)) return brand; + } + return ""; +} + +function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] { + const products: Product[] = []; + const seen = new Set(); + const collapsed = html.replace(/\s+/g, " "); + + // Strategy 1: OpenCart / custom card layout using matchAll + for (const cardMatch of collapsed.matchAll(/]+class="[^"]*product-(?:thumb|layout)[^"]*"[^>]*>([\s\S]*?)<\/div>\s*<\/div>/gi)) { + const card = cardMatch[1]; + + const urlMatch = card.match(/href="(https?:\/\/(?:www\.)?qsfptek\.com\/[^"]+)"/i); + if (!urlMatch) continue; + const url = urlMatch[1]; + if (seen.has(url)) continue; + seen.add(url); + + const nameMatch = card.match(/]*>\s*]*>([^<]{10,})<\/a>/i) || + card.match(/]*title="([^"]{10,})"/i); + if (!nameMatch) continue; + const name = nameMatch[1].trim().replace(/&/g, "&").replace(/&#[0-9]+;/g, ""); + if (name.length < 5) continue; + + const priceMatch = card.match(/\$\s*([\d,]+\.?\d*)/); + const price = priceMatch ? parseFloat(priceMatch[1].replace(/,/g, "")) : undefined; + + const reach = detectReach(name); + const partNumber = name.split(/\s+(?:compatible|for|sfp|qsfp)/i)[0]?.trim().slice(0, 80) || name.slice(0, 60); + + products.push({ + partNumber, name, url, + price: price && price > 0 && price < 100000 ? price : undefined, + formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, + reachLabel: reach?.label, reachMeters: reach?.meters, + fiberType: detectFiber(name), wavelength: detectWavelength(name), + compatibleWith: extractCompatibleVendor(name), + }); + } + + // Strategy 2: Generic product link scan using matchAll + if (products.length === 0) { + for (const m of collapsed.matchAll(/href="(https?:\/\/(?:www\.)?qsfptek\.com\/(?:p|product)[^"?#]+)"[^>]*>([^<]{10,}) 0 && price < 100000 ? price : undefined, + formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps, + reachLabel: reach?.label, reachMeters: reach?.meters, + fiberType: detectFiber(name), wavelength: detectWavelength(name), + compatibleWith: extractCompatibleVendor(name), + }); + } + } + + return products; +} + +async function fetchPage(url: string): Promise { + const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); + if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); + return resp.text(); +} + +export async function scrapeQsfptek(): Promise { + console.log("=== QSFPTEK Scraper Starting ===\n"); + + const vendorId = await ensureVendor( + "QSFPTEK", + "compatible", + "https://www.qsfptek.com", + "https://www.qsfptek.com/c/optical-transceiver.html", + ); + + let totalProducts = 0; + let priceUpdates = 0; + const seenCategories = new Set(); + + for (const cat of CATEGORIES) { + console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`); + + try { + const html1 = await fetchPage(BASE + cat.path); + const catProducts = parseProductList(html1, cat); + + if (cat.path.includes("/optical-transceiver") && seenCategories.size > 3) { + console.log(` Skipping generic fallback (${seenCategories.size} specific categories scraped)`); + continue; + } + + if (catProducts.length === 0) { + console.log(" No products on page 1 — skipping"); + continue; + } + + seenCategories.add(cat.path); + console.log(` Found ${catProducts.length} products on page 1`); + + const totalPagesMatch = + html1.match(/total-page[^>]*>\s*(\d+)/) || + html1.match(/page\s+\d+\s+of\s+(\d+)/i); + const totalPages = totalPagesMatch ? Math.min(parseInt(totalPagesMatch[1]), MAX_PAGES) : 3; + console.log(` Total pages (estimate): ${totalPages}`); + + const allProducts = [...catProducts]; + + for (let page = 2; page <= totalPages; page++) { + await sleep(2000); + try { + const pageUrl = BASE + cat.path.replace(".html", "") + `?page=${page}`; + const html = await fetchPage(pageUrl); + const pageProds = parseProductList(html, cat); + if (pageProds.length === 0) break; + allProducts.push(...pageProds); + console.log(` Page ${page}: ${pageProds.length} products`); + } catch (err) { + console.warn(` Page ${page} failed: ${(err as Error).message.slice(0, 60)}`); + break; + } + } + + const uniqueProducts = allProducts.filter((p, i, arr) => arr.findIndex((x) => x.url === p.url) === i); + console.log(` Total unique: ${uniqueProducts.length}`); + + for (const product of uniqueProducts) { + try { + const txId = await findOrCreateScrapedTransceiver({ + partNumber: product.partNumber, + vendorId, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + reachMeters: product.reachMeters, + reachLabel: product.reachLabel, + fiberType: product.fiberType, + wavelengths: product.wavelength, + category: "DataCenter", + }); + + if (product.price && product.price > 0) { + const hash = contentHash({ price: product.price, part: product.partNumber }); + const updated = await upsertPriceObservation({ + transceiverId: txId, + sourceVendorId: vendorId, + price: product.price, + currency: "USD", + stockLevel: "in_stock", + url: product.url, + contentHash: hash, + }); + if (updated) priceUpdates++; + } + totalProducts++; + } catch (err) { + console.warn(` DB error: ${(err as Error).message.slice(0, 80)}`); + } + } + } catch (err) { + console.error(` Category failed: ${(err as Error).message}`); + } + + await sleep(2000); + } + + console.log(`\n=== QSFPTEK Complete: ${totalProducts} products, ${priceUpdates} price updates ===`); +} + +if (require.main === module) { + scrapeQsfptek() + .then(() => pool.end()) + .catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); }); +} diff --git a/packages/scraper/src/scrapers/sfpcables.ts b/packages/scraper/src/scrapers/sfpcables.ts index 5ec6f2b..3dea4b6 100644 --- a/packages/scraper/src/scrapers/sfpcables.ts +++ b/packages/scraper/src/scrapers/sfpcables.ts @@ -203,7 +203,7 @@ export async function scrapeSfpCables(): Promise { }); if (product.price && product.price > 0) { - const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); + const hash = contentHash({ price: product.price, part: product.partNumber }); const updated = await upsertPriceObservation({ transceiverId: txId, sourceVendorId: vendorId, diff --git a/packages/scraper/src/scrapers/tenGtek.ts b/packages/scraper/src/scrapers/tenGtek.ts index 91bed9e..b875c7b 100644 --- a/packages/scraper/src/scrapers/tenGtek.ts +++ b/packages/scraper/src/scrapers/tenGtek.ts @@ -196,7 +196,7 @@ export async function scrape10Gtek(): Promise { }); if (product.price && product.price > 0) { - const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); + const hash = contentHash({ price: product.price, part: product.partNumber }); const updated = await upsertPriceObservation({ transceiverId: txId, sourceVendorId: vendorId,