# ═══════════════════════════════════════════════════════════════ # fixes-training.yaml — Training Config for MAGATAMA Operations AI # Dataset: 254 fixes from production (fixes.json → 4 JSONL formats) # ═══════════════════════════════════════════════════════════════ # Data source (choose one) data: sft_file: "data/fixes-chatml-sft.jsonl" # ChatML format for PEFT/TRL conversational_file: "data/fixes-conversational-sft.jsonl" # messages[] format alpaca_file: "data/fixes-alpaca.jsonl" # instruction/input/output dpo_file: "data/fixes-dpo-pairs.jsonl" # prompt/chosen/rejected # Dataset stats (auto-generated 2026-04-13) total_samples: 254 avg_tokens_per_sample: 402 coverage_at_512: "94%" coverage_at_1024: "100%" # Model selection models: # Primary: Qwen2.5-7B for ops knowledge (smaller, faster training) primary: "Qwen/Qwen2.5-7B-Instruct" # Secondary: Qwen2.5-14B for deeper reasoning (if VRAM allows) secondary: "Qwen/Qwen2.5-14B-Instruct" # SFT Training (Phase 1) sft: device: "mps" # Apple Silicon MPS max_seq_length: 1024 # Covers 100% of samples (avg 402 tokens) lora_r: 16 lora_alpha: 32 lora_dropout: 0.05 target_modules: - "q_proj" - "k_proj" - "v_proj" - "o_proj" - "gate_proj" - "up_proj" - "down_proj" num_epochs: 3 batch_size: 1 # MPS: batch_size=1 required gradient_accumulation: 8 # Effective batch = 8 learning_rate: 2.0e-4 warmup_ratio: 0.1 weight_decay: 0.01 lr_scheduler: "cosine" logging_steps: 10 save_steps: 50 eval_steps: 50 eval_split: 0.1 # 10% = ~25 samples for eval # MPS-specific gradient_checkpointing: false # Not supported on MPS bf16: false # MPS doesn't support bf16 fp16: false # Use fp32 on MPS use_cache: false # Required: false during training # DPO Training (Phase 2 — after SFT) dpo: num_epochs: 1 batch_size: 1 gradient_accumulation: 4 beta: 0.1 # DPO temperature learning_rate: 5.0e-5 max_seq_length: 1024 # Evaluation evaluation: n_eval_samples: 25 # 10% of 254 min_improvement_to_deploy: 0.3 # Confidence delta required eval_prompts: - "System: proxmox | Component: pvestatd\nSymptome:\n- Proxmox GUI: alle Graphs leer\n- pvestatd restart schlaegt fehl" - "System: llm-gateway | Component: free-llm-fallback\nSymptome:\n- Ollama nicht erreichbar\n- Kein LLM-Fallback" - "System: shieldx | Component: cipher-bypass\nSymptome:\n- ROT13/Base64 Injections nicht erkannt" - "System: peercortex | Component: api-ratelimit\nSymptome:\n- HTTP 429 Too Many Requests\n- Hunderte parallele Requests" - "System: ctxevent | Component: database-auth\nSymptome:\n- SASL: SCRAM-SERVER-FIRST-MESSAGE error" # Output output: adapters_dir: "adapters/fixes-ops-ai" gguf_quantization: "Q5_K_M" # Best quality/size ratio for 7B ollama_model_name: "magatama-ops" ollama_tag: "fixes-v1" # Training command reference: # ═══════════════════════════════════════════════════════════════ # Phase 1 — SFT: # cd packages/fine-tuner # python3 scripts/manual_trigger.py --task-type fixes --config config/fixes-training.yaml # # Phase 2 — DPO (after SFT): # python3 scripts/manual_trigger.py --dpo --config config/fixes-training.yaml # # Phase 3 — Convert to GGUF + deploy to Ollama: # python3 -m src.converter --adapter adapters/fixes-ops-ai --quantize Q5_K_M # ollama create magatama-ops:fixes-v1 -f adapters/fixes-ops-ai/Modelfile # # Estimated training time on Mac Studio M4 Max 48GB: # SFT 7B: ~15-20 min (254 samples × 3 epochs, ~762 steps) # SFT 14B: ~45-60 min # DPO: ~5-10 min (1 epoch) # ═══════════════════════════════════════════════════════════════