# ═══════════════════════════════════════════════════════════════════════════════ # blog-v7-training.yaml — fo-blog-v7 Training Configuration # # Key improvements over v6: # 1. Anchored system prompt with STRICT length (700-1000w) and structure rules # 2. 350+ training examples (250 generated + 150 RIPE/APNIC/NOG ingested) # 3. Full 700-1000w articles as output_text (not keyword stubs) # 4. DPO phase 2 with negative examples (v6 failure modes) # 5. Diverse topics: transceivers + BGP + IPv6 + RIPE/NOG + data center # # v6 problem analysis: # - Training data avg 1152w but no word-count constraint in system prompt # - Model produces 5000w+ (inherits Qwen base behavior, no stopping signal) # - Topic drift: "400G request" → generic SFP+ content # - Missing intro paragraph # - Repeated sections verbatim # # v7 fix strategy: # - System prompt hard-encodes: 700-1000 words, hook+body+takeaways structure # - Input_text explicitly states word limit # - DPO training on (good, bad) pairs to reinforce constraint adherence # ═══════════════════════════════════════════════════════════════════════════════ job_name: "fo-blog-v7" description: "BlogLLM v7 — anchored constraints, 350+ diverse examples, DPO phase" # ─── Data sources ────────────────────────────────────────────────────────────── data: # Phase 1 SFT data (combine all v7 sources) sft_files: - "~/transceiver-training-data/v7-generated-sft.jsonl" # 250 Claude-generated - "~/transceiver-training-data/v7-ripe-apnic-sft.jsonl" # ~30 RIPE/APNIC/NOG - "~/transceiver-training-data/blog-fichtmueller-posts.jsonl" # 24 real posts (upgrade SP) # Phase 2 DPO data dpo_file: "~/transceiver-training-data/v7-dpo-pairs.jsonl" # ~200 chosen/rejected pairs # Pre-existing high-quality data (optional inclusion) supplemental_files: - path: "~/transceiver-training-data/master-training-dataset.jsonl" filter: "word_count >= 700" # Only include longer examples max_samples: 50 # Limit to best 50 # Dataset stats (updated after generate_v7_data.py completes) estimated_total_sft: 350 estimated_total_dpo: 200 target_word_count_range: "700-1000" # ─── Model ──────────────────────────────────────────────────────────────────── model: base: "Qwen/Qwen2.5-7B-Instruct" # Same as v6 — proven on Mac Studio model_type: "qwen2.5" revision: "main" # ─── Phase 1: SFT (Supervised Fine-Tuning) ──────────────────────────────────── sft: device: "mps" # Apple Silicon MPS (Mac Studio M4 Max) max_seq_length: 2048 # 700-1000w target ≈ 900-1300 tokens + prompt lora_r: 32 # Doubled from v6 (16→32) for stronger signal lora_alpha: 64 # 2x r lora_dropout: 0.05 target_modules: - "q_proj" - "k_proj" - "v_proj" - "o_proj" - "gate_proj" - "up_proj" - "down_proj" num_epochs: 4 # +1 vs v6 (larger adapter capacity) batch_size: 1 # MPS limitation gradient_accumulation: 8 # Effective batch = 8 learning_rate: 1.5e-4 # Slightly lower than v6 (2e-4) for stability warmup_ratio: 0.1 weight_decay: 0.01 lr_scheduler: "cosine" logging_steps: 10 save_steps: 50 eval_steps: 50 eval_split: 0.1 # 10% for eval # MPS-specific gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false bf16: false fp16: false use_cache: false # Output output_dir: "adapters/fo-blog-v7" # ─── Phase 2: DPO (Direct Preference Optimization) ──────────────────────────── dpo: enabled: true base_adapter: "adapters/fo-blog-v7/adapter" # Use SFT adapter as starting point output_dir: "adapters/fo-blog-v7-dpo" num_epochs: 1 batch_size: 1 gradient_accumulation: 4 beta: 0.1 # DPO temperature (lower = stronger preference signal) learning_rate: 5.0e-5 max_seq_length: 2048 max_prompt_length: 512 # DPO-specific loss_type: "sigmoid" # Standard DPO loss (vs ipo, spo) reference_free: false # Use SFT adapter as reference model # ─── Evaluation prompts ──────────────────────────────────────────────────────── evaluation: n_eval_samples: 35 # 10% of ~350 eval_prompts: - input: "Write a blog post on the following topic:\n\n**Topic:** QSFP-DD vs OSFP: Which 400G Form Factor Wins in 2026\n\n**Target audience:** IT managers and operators who evaluate and buy transceivers\n\nRemember: 700–1000 words, hook + technical sections + 3 takeaways. Stay strictly on-topic. Start writing now." check: "word_count_700_1000" - input: "Write a blog post on the following topic:\n\n**Topic:** BGP Route Leaks: Detection, Impact, and Prevention in 2026\n\n**Target audience:** network engineers and NOC operators\n\nRemember: 700–1000 words, hook + technical sections + 3 takeaways. Start writing now." check: "word_count_700_1000" - input: "Write a blog post on the following topic:\n\n**Topic:** RPKI Route Origin Validation: A Practical Deployment Guide\n\n**Target audience:** network engineers and architects who design and operate optical infrastructure\n\nRemember: 700–1000 words, hook + technical sections + 3 takeaways. Start writing now." check: "word_count_700_1000" quality_checks: - word_count_in_range: [700, 1000] - has_intro_paragraph: true # Non-header first paragraph - has_headers: true # At least 2x ## - has_takeaways: true # Bullet points at end - no_topic_drift: true # Topic mentioned in first 100 words - no_repeated_sections: true # No paragraph appears twice # ─── Post-training: GGUF conversion ────────────────────────────────────────── gguf: quantization: "Q4_K_M" output_name: "fo-blog-v7.gguf" ollama_model: "fo-blog-v7" modelfile_template: | FROM {gguf_path} SYSTEM """You are an expert technical writer specializing in optical networking and transceiver technology. STRICT CONSTRAINTS: - LENGTH: 700-1000 words ONLY. Stop at 1000 words maximum. - STRUCTURE: 1) Hook paragraph, 2) Technical sections (## headers), 3) Exactly 3 takeaways - TOPIC DISCIPLINE: Write ONLY about the exact topic requested. - VOICE: Confident and direct. No hedging. - AUDIENCE: Network engineers and IT professionals.""" PARAMETER temperature 0.7 PARAMETER top_p 0.9 PARAMETER top_k 40 PARAMETER repeat_penalty 1.15 PARAMETER num_predict 1500 # ─── Training commands ──────────────────────────────────────────────────────── # # Step 0: Generate training data (run BEFORE training) # cd packages/fine-tuner # python3 scripts/generate_v7_data.py & # Background: 250 blogs # python3 scripts/ingest_ripe_apnic.py \ # --nas-path /Volumes/KnowledgeLake/tashi-crawler/2026-03-06 # RIPE/NOG data # # Step 1: SFT Phase # python3 scripts/train_blog_v7.py --phase sft # # Step 2: Wait for training, then generate DPO pairs # python3 scripts/generate_dpo_pairs.py # # Step 3: DPO Phase # python3 scripts/train_blog_v7.py --phase dpo # # Step 4: Convert to GGUF + register in Ollama # python3 scripts/merge_and_convert.py --version v7 # # Step 5: Test + deploy to Erik # curl -X POST http://localhost:11434/api/generate \ # -d '{"model":"fo-blog-v7","prompt":"Write 700-1000w blog about QSFP-DD..."}' # # Estimated training time on Mac Studio M4 Max 48GB: # SFT 7B, 350 examples, 4 epochs: ~25-35 min # DPO, 200 pairs, 1 epoch: ~10-15 min # ═══════════════════════════════════════════════════════════════════════════════