diff --git a/packages/api/src/llm/fo-blog-pipeline.ts b/packages/api/src/llm/fo-blog-pipeline.ts new file mode 100644 index 0000000..07882a1 --- /dev/null +++ b/packages/api/src/llm/fo-blog-pipeline.ts @@ -0,0 +1,398 @@ +/** + * FLEXOPTIX BLOG ENGINE v3 — "Less bullshit. More engineering." + * + * 10-Step Pipeline: + * 1. Topic Expansion (real scenarios + wrong assumptions + risks) + * 2. Angle Selection (single strong angle + target audience) + * 3. Outline Generation (decision-driven structure) + * 4. Draft Generation (Flexoptix Style MASTER prompt) + * 5. Reality Injection (failure scenarios + operational pain) + * 6. Technical Deepening (specific optics, power, density) + * 7. Opinion Layer (positions, challenges, no neutrality) + * 8. Kill AI Tone (remove all AI fingerprints) + * 9. QA Check (technical accuracy + weak section fixes) + * 10. Quality Score (1-10 ratings + improvement suggestions) + * + * Dedicated FO_Blog_LLM: + * - Model: qwen2.5:14b on .213 (or override via FO_BLOG_MODEL env) + * - System prompt loaded with accumulated feedback + * - Feedback loop: every blog gets rated, feedback trains next generation + */ + +// ═══════════════════════════════════════════════════════ +// FO BLOG SYSTEM PROMPT — The Flexoptix Mindset +// ═══════════════════════════════════════════════════════ + +export const FO_BLOG_SYSTEM_PROMPT = `You are a senior network engineer with 20+ years of real-world experience in optical networking, data centers, and ISP infrastructure. + +You write for the FLEXOPTIX technical blog. Your readers are network engineers who build and operate real infrastructure. + +YOUR MINDSET: +- You write like an engineer at 2:17 AM in the DC, not like a marketing department +- You base everything on real problems, not spec sheets +- You call things by their name +- You show trade-offs, not "best practices" +- You have a clear opinion, even when it's uncomfortable +- You've personally debugged every scenario you describe + +VOICE: +- Direct, opinionated, pragmatic +- No buzzwords, no corporate language +- Short, clear sentences +- Occasionally blunt or sarcastic +- Prioritize clarity over completeness + +STRICTLY FORBIDDEN: +- "In today's fast-paced world" or ANY generic intro +- "leverage", "optimize", "enhance", "plays a key role" +- Empty bullet lists without context +- Neutral non-advice ("it depends on your requirements") +- Textbook explanations of basic concepts +- Perfect summaries that add nothing +- Press release language ("revolutionary", "industry-leading") +- Repeating obvious facts + +EVERY ARTICLE MUST: +- Start with a real-world scenario or problem +- Help the reader make a specific decision +- Include at least 2 real-world failure scenarios +- Include at least 1 "this sounds good but is actually wrong" correction +- Have a strong, opinionated takeaway +- Reference specific optics (SR, DR, LR, ZR, etc.) +- Include real numbers (dBm, watts, price per port) + +REFERENCE VALUES: +- SFP+ SR: Tx -8.2 to +0.5 dBm, Rx sensitivity -18.0 dBm, 1.0W typical +- QSFP28 LR4: Tx -4.3 to +4.5 dBm, Rx -13.7 dBm, 3.5W typical +- QSFP-DD DR4: Tx -2.9 to +3.0 dBm/lane, Rx -7.7 dBm, 12W typical +- 400ZR: Tx -10 to +2 dBm, OSNR >20dB, 15-20W typical +- Fiber loss: 0.35 dB/km @ 1310nm, 0.22 dB/km @ 1550nm +- Connector loss: 0.3 dB (clean), 1-3 dB (dirty) +- Power budget margin: minimum 3 dB recommended +- BER: pre-FEC <2.4×10^-4 (KP4), post-FEC <10^-15 + +CONTENT MODULES (use 2-3 per article): +- What breaks in production +- Migration pain (old → new) +- Cost nobody calculates +- Cleaning / contamination reality +- Wrong assumptions engineers make +- Vendor bullshit vs reality +- When NOT to use this technology`; + +// ═══════════════════════════════════════════════════════ +// STEP 1: TOPIC EXPANSION +// ═══════════════════════════════════════════════════════ + +export const STEP1_TOPIC_EXPANSION = `You are a senior network engineer. + +Given the topic below, expand it into: +- 5 real-world scenarios where this topic becomes a problem +- 5 common wrong assumptions engineers make about this +- 5 operational risks nobody talks about + +Topic: {{TOPIC}} + +Keep it practical, not theoretical. Think about what actually goes wrong in production.`; + +// ═══════════════════════════════════════════════════════ +// STEP 2: ANGLE SELECTION +// ═══════════════════════════════════════════════════════ + +export const STEP2_ANGLE_SELECTION = `Based on the expanded scenarios below, select ONE strong angle for a technical blog post. + +The angle must be: +- Practical and decision-driven (helps the reader DO something) +- Involves real trade-offs (not a clear-cut answer) +- Relevant for real deployments (not academic) +- Controversial enough to generate discussion + +Then define: +- Target audience (e.g., DC leaf-spine engineer, ISP architect, enterprise campus) +- Core decision question the article answers +- The one thing the reader should DO after reading + +Expanded scenarios: +{{SCENARIOS}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 3: OUTLINE GENERATION +// ═══════════════════════════════════════════════════════ + +export const STEP3_OUTLINE = `Create a blog outline for a technical article. + +Requirements: +1. Start with a real-world scenario (NOT "Introduction" or "Overview") +2. Focus on decisions, not definitions +3. Must include: + - "What people think vs reality" section + - "What breaks in production" section + - Trade-offs with real numbers + - A clear, opinionated recommendation +4. No generic sections like "Conclusion" or "Summary" +5. Each section has a specific purpose that helps the reader decide + +Angle: {{ANGLE}} +Target audience: {{AUDIENCE}} +Decision question: {{DECISION}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 4: DRAFT GENERATION (MASTER) +// ═══════════════════════════════════════════════════════ + +export const STEP4_MASTER_DRAFT = `Write the full technical blog article based on the outline below. + +Follow these rules EXACTLY: + +STRUCTURE: +1. Hook: real scenario (3-5 sentences, specific, relatable) +2. What people think vs reality +3. Technical breakdown (only what matters for the decision) +4. What breaks in production (2-3 real failure scenarios) +5. Cost and operational trade-offs (real numbers) +6. Clear recommendation (with specific conditions) + +STYLE: +- Direct, opinionated, pragmatic +- No buzzwords, no corporate language +- No generic intros +- Short sentences, clear paragraphs +- Slightly sarcastic where it fits +- Include specific transceiver types (SR4, DR4, LR4, FR4, ZR, etc.) +- Include real numbers (dBm, watts, $/port, €/Gbit) + +MINIMUM 2000 words. No placeholders. No TODO markers. Complete article. + +Outline: +{{OUTLINE}} + +Context data: +{{CONTEXT_DATA}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 5: REALITY INJECTION +// ═══════════════════════════════════════════════════════ + +export const STEP5_REALITY_INJECTION = `Improve this article by adding real-world engineering experience. + +ADD: +- 2-3 realistic failure scenarios (specific, with model numbers and symptoms) +- Operational pain points (cleaning, cabling mistakes, wrong polarity, firmware issues) +- Things that go wrong during deployment (not just theory) +- At least 1 "I've seen this happen" story +- Specific CLI output examples where relevant + +Make it feel like it comes from someone who has spent nights in data centers. + +Do NOT add generic filler. Every addition must add real value. + +Article: +{{DRAFT}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 6: TECHNICAL DEEPENING +// ═══════════════════════════════════════════════════════ + +export const STEP6_TECHNICAL_DEEPENING = `Increase the technical depth of this article. + +ADD where missing: +- Specific transceiver examples (100G-SR4, 100G-DR, 400G-FR4, 400ZR, 800G-DR8) +- Fiber types and connector details (LC vs MPO, polarity, cleaning) +- Power consumption differences (per port, per form factor) +- Density and breakout implications (4x100G from 400G, port count per RU) +- Power budget calculations (Tx - losses = Rx, margin check) +- Real reach limitations (not datasheet max, but reliable production reach) + +REMOVE: +- Vague statements without numbers +- "May", "could", "typically" — replace with "is", "will", "does" +- Generic descriptions that any reader could write themselves + +Article: +{{ARTICLE}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 7: OPINION LAYER +// ═══════════════════════════════════════════════════════ + +export const STEP7_OPINION_LAYER = `Make this article more opinionated. Remove all neutrality. + +ADD: +- Clear positions on every technology mentioned +- Challenge at least 1 common industry assumption +- At least 1 statement that vendors would never publish +- Explicit BUY / WAIT / SKIP recommendations where relevant +- Statements that experienced engineers nod at but marketing teams hate + +REMOVE: +- "It depends on your use case" — instead say WHAT it depends on specifically +- Hedging language ("could potentially", "in some cases") +- Both-sides-ism when one side is clearly better + +The reader should finish the article knowing exactly what to do. + +Article: +{{ARTICLE}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 8: KILL AI TONE +// ═══════════════════════════════════════════════════════ + +export const STEP8_KILL_AI_TONE = `Rewrite this article to remove ALL signs of AI-generated text. + +REMOVE: +- Overly perfect sentence structures +- Repetitive paragraph patterns (same opening, same length) +- Generic transition phrases ("Furthermore", "Additionally", "It's worth noting") +- Lists that all follow identical format +- Perfect grammar everywhere — add occasional conversational shortcuts +- Phrases like "it is important to note", "one should consider" + +REPLACE WITH: +- Natural, slightly imperfect flow +- Varied sentence lengths (some very short, some longer) +- Conversational asides ("Look, ...", "Here's the thing:", "Don't get me started on...") +- Direct address ("You know this is true if...") +- Specific instead of generic ("the Nexus 93180 in rack 14" not "your network switch") + +The article should read like a human engineer wrote it during a long flight. +Keep it clear and professional, but natural. + +Article: +{{ARTICLE}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 9: QA CHECK +// ═══════════════════════════════════════════════════════ + +export const STEP9_QA_CHECK = `Review this article critically as a senior engineer. + +CHECK: +1. Any technical inaccuracies? (wrong dBm values, wrong reach, wrong form factor specs) +2. Any over-simplifications that could mislead? (missing important caveats) +3. Missing real-world considerations? (power, cooling, cleaning, density) +4. Any sections that feel generic or weak? +5. Does the article have a clear opinion throughout? +6. Are there enough specific numbers and examples? +7. Would an experienced engineer learn something or just nod along? + +For each issue found: +- Quote the problematic text +- Explain what's wrong +- Provide the corrected version + +Then return the complete fixed article. + +Article: +{{ARTICLE}}`; + +// ═══════════════════════════════════════════════════════ +// STEP 10: QUALITY SCORE +// ═══════════════════════════════════════════════════════ + +export const STEP10_QUALITY_SCORE = `Rate this article from 1 to 10 in each category: + +1. **Technical Depth** — Are specs, calculations, and details accurate and sufficient? +2. **Real-World Relevance** — Would this help someone in an actual deployment? +3. **Clarity** — Is it easy to follow and act on? +4. **Originality** — Does it say something you can't find in a vendor datasheet? +5. **Engineer Voice** — Does it sound like a real engineer or like AI/marketing? +6. **Decision Value** — Can the reader make a concrete decision after reading? +7. **Failure Scenarios** — Are the production failure examples realistic and useful? +8. **Opinion Strength** — Does the article take clear positions? + +Return ONLY a JSON object: +{ + "scores": { + "technical_depth": <1-10>, + "real_world_relevance": <1-10>, + "clarity": <1-10>, + "originality": <1-10>, + "engineer_voice": <1-10>, + "decision_value": <1-10>, + "failure_scenarios": <1-10>, + "opinion_strength": <1-10> + }, + "overall": <1-10>, + "improvements": ["", "", ""] +} + +Article: +{{ARTICLE}}`; + +// ═══════════════════════════════════════════════════════ +// NEW BLOG TYPES (v0.2.0) +// ═══════════════════════════════════════════════════════ + +export const BLOG_TYPES = { + market_alert: { + name: "Market Alert", + description: "Price drops, supply changes, market shifts — urgent, data-driven", + hook: "Open with the specific data point that triggered the alert. Example: 'FS.com dropped 400G DR4 pricing by 23% this week. Here's what that means for your Q3 procurement.'", + modules: ["vendor_bullshit_vs_reality", "cost_nobody_calculates", "what_breaks_in_production"], + }, + migration_guide: { + name: "Migration Guide", + description: "Step-by-step technology migration with real pain points", + hook: "Open with the migration trigger. Example: 'Your CTO just approved the 400G budget. You have 6 months to migrate 200 100G links. Here's the plan that actually works.'", + modules: ["migration_pain", "what_breaks_in_production", "wrong_assumptions"], + }, + competitor_analysis: { + name: "Competitor Analysis", + description: "Honest comparison of vendor options — not a shill piece", + hook: "Open with the procurement decision. Example: 'Three quotes on your desk. FS.com at $89, ProLabs at $120, OEM at $1,100. The spec sheets look identical. They're not.'", + modules: ["vendor_bullshit_vs_reality", "cost_nobody_calculates"], + }, + technology_deep_dive: { + name: "Technology Deep Dive", + description: "One technology explained through the lens of real deployment", + hook: "Open with what makes this technology different in practice (not in theory). Example: 'Silicon Photonics sounds like the future. In production, it's already the present — but not for the reasons vendors tell you.'", + modules: ["what_breaks_in_production", "when_not_to_use"], + }, + buying_guide: { + name: "Buying Guide", + description: "Procurement-focused decision framework with real costs", + hook: "Open with the budget reality. Example: 'You have €200K for optics this quarter. Here's how to spend it without regret in 12 months.'", + modules: ["cost_nobody_calculates", "wrong_assumptions", "vendor_bullshit_vs_reality"], + }, + tutorial: { + name: "Troubleshooting Tutorial", + description: "Diagnosis guide written by someone who's been paged at 2 AM", + hook: "Open with the alarm. Example: 'It's 2 AM. NOC pager goes off. Core spine link is flapping.'", + modules: ["what_breaks_in_production", "cleaning_contamination", "wrong_assumptions"], + }, + comparison: { + name: "Product Comparison", + description: "Head-to-head with real performance data, not spec sheets", + hook: "Open with the choice. Example: 'QSFP-DD or OSFP? The answer isn't as obvious as the vendors want you to believe.'", + modules: ["vendor_bullshit_vs_reality", "cost_nobody_calculates", "migration_pain"], + }, +}; + +// ═══════════════════════════════════════════════════════ +// FEEDBACK INTEGRATION +// ═══════════════════════════════════════════════════════ + +/** + * Build a feedback context string from stored feedback entries. + * This is prepended to the system prompt to train the LLM on past corrections. + */ +export function buildFeedbackContext(feedback: Array<{ score: number; feedback_text: string; blog_type: string }>): string { + if (feedback.length === 0) return ""; + + const lines: string[] = [ + "\n\n--- LEARNED FROM PREVIOUS FEEDBACK (apply these corrections) ---", + ]; + + // Sort by score ascending (worst first, so worst mistakes are top of mind) + const sorted = [...feedback].sort((a, b) => a.score - b.score); + + for (const f of sorted.slice(0, 20)) { + if (f.feedback_text) { + lines.push(`[Score ${f.score}/10, Type: ${f.blog_type}]: ${f.feedback_text}`); + } + } + + lines.push("--- END FEEDBACK ---\n"); + return lines.join("\n"); +} diff --git a/packages/api/src/routes/blog.ts b/packages/api/src/routes/blog.ts index f0dc262..51140f4 100644 --- a/packages/api/src/routes/blog.ts +++ b/packages/api/src/routes/blog.ts @@ -1111,3 +1111,66 @@ blogRouter.put("/:id/status", async (req: Request, res: Response) => { res.status(500).json({ success: false, error: (err as Error).message }); } }); + +// ═══════════════════════════════════════════════════════ +// FEEDBACK SYSTEM (v0.2.0 — FO_Blog_LLM Training Loop) +// ═══════════════════════════════════════════════════════ + +/** + * POST /api/blog/:id/feedback — Submit rating + feedback. Fed back to LLM. + */ +blogRouter.post("/:id/feedback", async (req: Request, res: Response) => { + const { + score_overall, score_technical_depth, score_real_world, score_clarity, + score_originality, score_engineer_voice, score_decision_value, + score_failure_scenarios, score_opinion_strength, + feedback_text, reviewer = "human", improvements + } = req.body; + + if (!score_overall) return res.status(400).json({ error: "score_overall required (1-10)" }); + + try { + const blog = await pool.query("SELECT topic, title FROM blog_drafts WHERE id = $1::uuid", [req.params.id]); + const bd = blog.rows[0]; + + const result = await pool.query( + `INSERT INTO blog_feedback (blog_id, score_overall, score_technical_depth, score_real_world, + score_clarity, score_originality, score_engineer_voice, score_decision_value, + score_failure_scenarios, score_opinion_strength, feedback_text, reviewer, + blog_type, blog_topic, improvements) + VALUES ($1::uuid,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15) RETURNING id`, + [req.params.id, score_overall, score_technical_depth ?? null, score_real_world ?? null, + score_clarity ?? null, score_originality ?? null, score_engineer_voice ?? null, + score_decision_value ?? null, score_failure_scenarios ?? null, score_opinion_strength ?? null, + feedback_text ?? null, reviewer, bd?.topic ?? null, bd?.title ?? null, + improvements ? JSON.stringify(improvements) : null] + ); + res.json({ success: true, feedback_id: result.rows[0].id }); + } catch (err) { + console.error("Feedback error:", err); + res.status(500).json({ error: "Failed to save feedback" }); + } +}); + +/** GET /api/blog/feedback/stats — Aggregate feedback for LLM improvement tracking */ +blogRouter.get("/feedback/stats", async (_req: Request, res: Response) => { + try { + const [overall, byType] = await Promise.all([ + pool.query(`SELECT COUNT(*) AS total, AVG(score_overall)::numeric(3,1) AS avg FROM blog_feedback`), + pool.query(`SELECT blog_type, COUNT(*) AS cnt, AVG(score_overall)::numeric(3,1) AS avg + FROM blog_feedback WHERE blog_type IS NOT NULL GROUP BY blog_type ORDER BY avg ASC`), + ]); + res.json({ total: parseInt(overall.rows[0]?.total||"0"), avg_score: overall.rows[0]?.avg, by_type: byType.rows }); + } catch (err) { res.status(500).json({ error: "Failed" }); } +}); + +/** GET /api/blog/feedback/training-data — Export for FO_Blog_LLM injection */ +blogRouter.get("/feedback/training-data", async (_req: Request, res: Response) => { + try { + const result = await pool.query( + `SELECT score_overall, feedback_text, blog_type, improvements FROM blog_feedback + WHERE feedback_text IS NOT NULL ORDER BY score_overall ASC LIMIT 30`); + await pool.query(`UPDATE blog_feedback SET fed_to_llm=true, fed_at=NOW() WHERE fed_to_llm=false AND feedback_text IS NOT NULL`); + res.json({ entries: result.rows, count: result.rowCount }); + } catch (err) { res.status(500).json({ error: "Failed" }); } +}); diff --git a/scripts/validate-specs.ts b/scripts/validate-specs.ts new file mode 100644 index 0000000..bc0c32a --- /dev/null +++ b/scripts/validate-specs.ts @@ -0,0 +1,158 @@ +/** + * Spec Validation Script + * + * Cross-checks enriched data against optical physics rules: + * - 850nm MUST be MMF (not SMF) + * - 1310nm/1550nm MUST be SMF (not MMF) + * - Copper transceivers MUST NOT have optical wavelengths + * - DAC/AOC reach must be < 100m (DAC) / < 300m (AOC) + * - Power consumption must be plausible for form factor + * - Connector must match fiber type (MPO for parallel, LC for duplex) + */ +import { config } from "dotenv"; +import { join } from "path"; +import { Pool } from "pg"; + +config({ path: join(__dirname, "..", ".env") }); + +const pool = new Pool({ + host: process.env.POSTGRES_HOST || "localhost", + port: parseInt(process.env.POSTGRES_PORT || "5433"), + database: process.env.POSTGRES_DB || "transceiver_db", + user: process.env.POSTGRES_USER || "tip", + password: process.env.POSTGRES_PASSWORD || "***REDACTED***", + max: 3, +}); + +interface ValidationError { + id: string; + slug: string; + field: string; + value: string; + rule: string; + severity: "error" | "warning"; +} + +async function main() { + console.log("Running spec validation...\n"); + + const result = await pool.query(` + SELECT id, slug, form_factor, speed_gbps, reach_label, reach_meters, + fiber_type, connector, wavelengths, power_consumption_w, category + FROM transceivers + WHERE data_confidence = 'enriched_estimated' OR data_confidence = 'unknown' + `); + + console.log(`Validating ${result.rows.length} enriched/unknown products\n`); + + const errors: ValidationError[] = []; + + for (const row of result.rows) { + const ft = row.fiber_type || ""; + const conn = row.connector || ""; + const wl = row.wavelengths || ""; + const reach = row.reach_meters || 0; + const speed = parseFloat(row.speed_gbps || 0); + const power = row.power_consumption_w ? parseFloat(row.power_consumption_w) : null; + const ff = row.form_factor || ""; + + // Rule 1: 850nm wavelength MUST be MMF (not SMF) + if (wl.includes("850") && ft === "SMF") { + errors.push({ id: row.id, slug: row.slug, field: "fiber_type", value: ft, + rule: "850nm wavelength requires MMF, not SMF", severity: "error" }); + } + + // Rule 2: 1310nm/1550nm wavelength MUST be SMF (not MMF, unless SWDM) + if ((wl.includes("1310") || wl.includes("1550")) && ft === "MMF" && !wl.includes("SWDM")) { + errors.push({ id: row.id, slug: row.slug, field: "fiber_type", value: ft, + rule: "1310/1550nm requires SMF, not MMF", severity: "error" }); + } + + // Rule 3: Copper must not have optical wavelengths + if (ft === "Copper" && wl !== "N/A" && wl !== "" && !wl.includes("N/A")) { + errors.push({ id: row.id, slug: row.slug, field: "wavelengths", value: wl, + rule: "Copper transceiver should have wavelengths=N/A", severity: "warning" }); + } + + // Rule 4: DAC reach must be <= 7m (passive) or <= 30m (active) + if (conn === "DAC" && reach > 30) { + errors.push({ id: row.id, slug: row.slug, field: "reach_meters", value: String(reach), + rule: "DAC reach > 30m is implausible (max ~7m passive, ~30m active)", severity: "warning" }); + } + + // Rule 5: AOC reach should be <= 300m + if ((conn === "AOC" || ft === "AOC") && reach > 300) { + errors.push({ id: row.id, slug: row.slug, field: "reach_meters", value: String(reach), + rule: "AOC reach > 300m is implausible", severity: "warning" }); + } + + // Rule 6: Power consumption plausibility + if (power !== null) { + const maxPower: Record = { + "SFP": 1.5, "SFP+": 2.0, "SFP28": 2.0, "SFP56": 2.5, + "QSFP+": 4.0, "QSFP28": 5.0, "QSFP56": 7.0, + "QSFP-DD": 18.0, "OSFP": 22.0, "QSFP-DD800": 25.0, + "CFP2": 12.0, "CFP2-DCO": 25.0, "XFP": 5.0, + }; + const max = maxPower[ff]; + if (max && power > max * 1.5) { + errors.push({ id: row.id, slug: row.slug, field: "power_consumption_w", value: String(power), + rule: `Power ${power}W exceeds plausible max ${max}W for ${ff}`, severity: "error" }); + } + } + + // Rule 7: MPO connector should be for parallel optics (40G+, SR4/DR4/PSM4) + if (conn.startsWith("MPO") && speed < 40 && ft !== "SMF") { + errors.push({ id: row.id, slug: row.slug, field: "connector", value: conn, + rule: "MPO connector unusual for <40G non-SMF", severity: "warning" }); + } + + // Rule 8: LC connector for SMF is standard, but QSFP+ SR4 should be MPO + if (conn === "LC" && ft === "MMF" && speed >= 40 && row.reach_label?.includes("SR")) { + errors.push({ id: row.id, slug: row.slug, field: "connector", value: conn, + rule: "SR4 on MMF at 40G+ typically uses MPO, not LC", severity: "warning" }); + } + } + + // Print results + const errs = errors.filter(e => e.severity === "error"); + const warns = errors.filter(e => e.severity === "warning"); + + console.log(`\nValidation Results:`); + console.log(` Errors: ${errs.length}`); + console.log(` Warnings: ${warns.length}`); + console.log(` Total: ${errors.length}`); + + if (errs.length > 0) { + console.log(`\n=== ERRORS (need fixing) ===`); + for (const e of errs.slice(0, 30)) { + console.log(` ${e.slug}: ${e.field}="${e.value}" — ${e.rule}`); + } + } + + if (warns.length > 0) { + console.log(`\n=== WARNINGS (review) ===`); + for (const w of warns.slice(0, 20)) { + console.log(` ${w.slug}: ${w.field}="${w.value}" — ${w.rule}`); + } + } + + // Auto-fix clear errors + let fixed = 0; + for (const e of errs) { + if (e.rule.includes("850nm wavelength requires MMF")) { + await pool.query(`UPDATE transceivers SET fiber_type = 'MMF' WHERE id = $1`, [e.id]); + fixed++; + } + if (e.rule.includes("1310/1550nm requires SMF")) { + await pool.query(`UPDATE transceivers SET fiber_type = 'SMF' WHERE id = $1`, [e.id]); + fixed++; + } + } + + if (fixed > 0) console.log(`\nAuto-fixed ${fixed} errors`); + + await pool.end(); +} + +main().catch(err => { console.error(err); process.exit(1); }); diff --git a/sql/015-blog-feedback.sql b/sql/015-blog-feedback.sql new file mode 100644 index 0000000..5080e00 --- /dev/null +++ b/sql/015-blog-feedback.sql @@ -0,0 +1,43 @@ +-- Migration 015: Blog Feedback System for FO_Blog_LLM training + +CREATE TABLE IF NOT EXISTS blog_feedback ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + blog_id UUID REFERENCES blog_drafts(id) ON DELETE CASCADE, + + -- Ratings (1-10) + score_overall INTEGER CHECK (score_overall BETWEEN 1 AND 10), + score_technical_depth INTEGER CHECK (score_technical_depth BETWEEN 1 AND 10), + score_real_world INTEGER CHECK (score_real_world BETWEEN 1 AND 10), + score_clarity INTEGER CHECK (score_clarity BETWEEN 1 AND 10), + score_originality INTEGER CHECK (score_originality BETWEEN 1 AND 10), + score_engineer_voice INTEGER CHECK (score_engineer_voice BETWEEN 1 AND 10), + score_decision_value INTEGER CHECK (score_decision_value BETWEEN 1 AND 10), + score_failure_scenarios INTEGER CHECK (score_failure_scenarios BETWEEN 1 AND 10), + score_opinion_strength INTEGER CHECK (score_opinion_strength BETWEEN 1 AND 10), + + -- Free text feedback (this gets fed back to the LLM) + feedback_text TEXT, + reviewer TEXT DEFAULT 'human', -- human, auto_qa, llm_self + + -- Context for LLM training + blog_type TEXT, -- market_alert, migration_guide, etc. + blog_topic TEXT, -- original topic + improvements JSONB, -- array of specific improvement suggestions + + -- Was this feedback used for training? + fed_to_llm BOOLEAN DEFAULT FALSE, + fed_at TIMESTAMPTZ, + + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_blog_feedback_blog ON blog_feedback(blog_id); +CREATE INDEX IF NOT EXISTS idx_blog_feedback_score ON blog_feedback(score_overall); +CREATE INDEX IF NOT EXISTS idx_blog_feedback_unfed ON blog_feedback(fed_to_llm) WHERE fed_to_llm = FALSE; + +-- Track which LLM model/config was used for each blog +ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS llm_model TEXT DEFAULT 'qwen2.5:14b'; +ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS llm_temperature NUMERIC DEFAULT 0.7; +ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS pipeline_version TEXT DEFAULT 'v3-flexoptix-style'; +ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS pipeline_steps_completed INTEGER DEFAULT 0; +ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS auto_qa_score JSONB; diff --git a/sql/016-data-confidence.sql b/sql/016-data-confidence.sql new file mode 100644 index 0000000..082f063 --- /dev/null +++ b/sql/016-data-confidence.sql @@ -0,0 +1,50 @@ +-- Migration 016: Data confidence tracking +-- Mark every spec field as either vendor_verified or enriched_estimated + +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS data_confidence TEXT DEFAULT 'unknown' + CHECK (data_confidence IN ('vendor_verified', 'enriched_estimated', 'scraped_unverified', 'unknown')); +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_at TIMESTAMPTZ; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_fields TEXT[]; + +-- Mark all products that were just enriched by our script +UPDATE transceivers SET + data_confidence = 'enriched_estimated', + enriched_at = NOW(), + enriched_fields = ARRAY_REMOVE(ARRAY[ + CASE WHEN fiber_type IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'fiber_type' END, + CASE WHEN connector IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'connector' END, + CASE WHEN wavelengths IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'wavelengths' END, + CASE WHEN power_consumption_w IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'power_consumption_w' END + ], NULL) +WHERE updated_at > NOW() - INTERVAL '1 day' + AND data_confidence = 'unknown'; + +-- Products with price observations from vendor shops = scraped (higher confidence) +UPDATE transceivers SET data_confidence = 'scraped_unverified' +WHERE data_confidence = 'unknown' + AND EXISTS (SELECT 1 FROM price_observations po WHERE po.transceiver_id = transceivers.id); + +-- Products from Cisco TMG matrix = vendor_verified +UPDATE transceivers SET data_confidence = 'vendor_verified' +WHERE EXISTS ( + SELECT 1 FROM compatibility c WHERE c.transceiver_id = transceivers.id + AND c.verified_by = 'vendor_matrix' +); + +CREATE INDEX IF NOT EXISTS idx_transceivers_confidence ON transceivers(data_confidence); + +-- View: data quality overview +CREATE OR REPLACE VIEW v_data_quality AS +SELECT + data_confidence, + COUNT(*) AS count, + ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM transceivers) * 100, 1) AS pct, + COUNT(*) FILTER (WHERE fiber_type IS NOT NULL AND fiber_type != '') AS has_fiber, + COUNT(*) FILTER (WHERE connector IS NOT NULL AND connector != '' AND connector != '-') AS has_connector, + COUNT(*) FILTER (WHERE wavelengths IS NOT NULL AND wavelengths != '') AS has_wavelength, + COUNT(*) FILTER (WHERE power_consumption_w IS NOT NULL) AS has_power, + COUNT(*) FILTER (WHERE reach_meters > 0) AS has_reach, + COUNT(*) FILTER (WHERE image_url IS NOT NULL AND image_url != '') AS has_image +FROM transceivers +GROUP BY data_confidence +ORDER BY count DESC;