# ═══════════════════════════════════════════════════════════════════════════════ # blog-v8-training.yaml — fo-blog-v8 Training Configuration # # Base: Qwen/Qwen2.5-14B-Instruct (4× the capacity of v7's 7B) # Target: 700-1000w blog posts, optical networking + BGP + infra # # Key improvements over v7: # - 14B params → better instruction following at higher complexity # - LoRA r=64 (was r=32) → more expressive adapter # - Weighted datasets: human posts × 3.0, external rewritten × 1.5 # - More epochs (5 SFT, 2 DPO) → deeper style absorption # - max_seq_length=4096 → handles longer real posts # - DPO from real v7 quality labels (good/bad scored posts) # ═══════════════════════════════════════════════════════════════════════════════ base_model: "Qwen/Qwen2.5-14B-Instruct" # ─── Dataset Sources (merged by consolidate_v8_dataset.py) ──────────────────── datasets: # Tier 1: Rene's actual blog posts — Gold Standard - path: "~/transceiver-training-data/v8-real-posts-sft.jsonl" weight: 3.0 description: "19 real blog posts from blog.fichtmueller.org (human written)" # Tier 2: v7 generated blogs (Claude-written, 197 topics, validated) - path: "~/transceiver-training-data/v7-generated-sft.jsonl" weight: 1.0 description: "Claude-generated optical networking blogs (v7, 197 topics)" # Tier 2: RIPE / APNIC NAS data - path: "~/transceiver-training-data/v7-ripe-apnic-sft.jsonl" weight: 1.0 description: "RIPE/APNIC BGP and routing content (v7 ingested)" # Tier 3: External crawled + rewritten content - path: "~/transceiver-training-data/v8-external-sft.jsonl" weight: 1.5 description: "APNIC Blog / RIPE Labs / potaroo.net / Cloudflare (Claude rewritten)" # DPO preferences: chosen/rejected pairs for preference learning dpo: - path: "~/transceiver-training-data/v7-dpo-pairs.jsonl" description: "v7 DPO pairs (5 rejection strategies)" - path: "~/transceiver-training-data/v8-quality-dpo.jsonl" description: "v8 real quality labels (good/bad from v7 generated posts)" optional: true # generated by label_v7_quality.py if run # ─── SFT Phase ──────────────────────────────────────────────────────────────── sft: output_dir: "adapters/fo-blog-v8/adapter" merged_dir: "models/fo-blog-v8/merged" # LoRA parameters lora: r: 64 # was 32 in v7 — more expressive alpha: 128 # 2× r for stable training dropout: 0.05 target_modules: - "q_proj" - "k_proj" - "v_proj" - "o_proj" - "gate_proj" - "up_proj" - "down_proj" # Training hyperparameters training: num_train_epochs: 5 # was 4 — extra epoch for 14B per_device_train_batch_size: 1 gradient_accumulation_steps: 8 # effective batch = 8 learning_rate: 1.2e-4 # slightly lower than v7's 1.5e-4 for 14B stability warmup_ratio: 0.05 lr_scheduler_type: "cosine" max_seq_length: 4096 # was 2048 — handles longer real posts fp16: false bf16: true # M4 Max supports bf16 optim: "adamw_torch" weight_decay: 0.01 max_grad_norm: 1.0 logging_steps: 10 save_steps: 100 evaluation_strategy: "no" dataloader_num_workers: 0 # MPS: no multiprocessing remove_unused_columns: false gradient_checkpointing: true # save RAM on 14B # Chat template (Qwen2.5 uses ChatML) chat_template: "chatml" dataset_text_field: "text" # ─── DPO Phase ──────────────────────────────────────────────────────────────── dpo: input_adapter: "adapters/fo-blog-v8/adapter" # start from SFT output_dir: "adapters/fo-blog-v8-dpo/adapter" training: num_train_epochs: 2 # was 1 — more DPO for 14B per_device_train_batch_size: 1 gradient_accumulation_steps: 8 learning_rate: 5e-5 warmup_ratio: 0.05 lr_scheduler_type: "cosine" max_seq_length: 4096 bf16: true optim: "adamw_torch" logging_steps: 5 save_steps: 50 dataloader_num_workers: 0 gradient_checkpointing: true dpo_params: beta: 0.1 # KL penalty (standard) loss_type: "sigmoid" # standard DPO loss max_prompt_length: 512 max_length: 4096 # ─── GGUF Conversion ────────────────────────────────────────────────────────── gguf: output_name: "fo-blog-v8.gguf" quantization: "Q4_K_M" ollama_model_name: "fo-blog-v8" convert_script: "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py" quantize_bin: "/opt/homebrew/bin/llama-quantize" # Ollama Modelfile system prompt modelfile_system: | You are an expert technical writer specializing in optical networking and transceiver technology. STRICT CONSTRAINTS: - LENGTH: 700-1000 words ONLY. Stop at 1000 words maximum. - STRUCTURE: 1) Hook paragraph, 2) Technical sections (## headers), 3) Exactly 3 takeaways - TOPIC DISCIPLINE: Write ONLY about the exact topic requested. Zero drift. - NO REPETITION: Every sentence adds new information. - VOICE: Confident and direct. No hedging phrases. - AUDIENCE: Network engineers and IT professionals. modelfile_params: temperature: 0.7 top_p: 0.9 top_k: 40 repeat_penalty: 1.15 num_predict: 1500 # ─── Hardware ────────────────────────────────────────────────────────────────── hardware: device: "mps" # Apple Silicon M4 Max ram_gb: 48 python: "/opt/homebrew/bin/python3.13" # 14B model in fp16 ≈ 28GB — fits in 48GB with LoRA overhead (~4GB) # Training peak RAM estimate: ~36-40GB # Merge on CPU: device_map="cpu" to avoid MPS OOM during save_pretrained # ─── Expected Timeline ───────────────────────────────────────────────────────── # SFT: ~8-12 hours (5 epochs, 14B, MPS) # DPO: ~2-4 hours (2 epochs, 14B) # Merge: ~30 min (CPU) # GGUF: ~15 min # Total: ~12-16 hours (run overnight)