llm-gateway/packages/fine-tuner/config/fixes-training.yaml

# ═══════════════════════════════════════════════════════════════
# fixes-training.yaml — Training Config for MAGATAMA Operations AI
# Dataset: 254 fixes from production (fixes.json → 4 JSONL formats)
# ═══════════════════════════════════════════════════════════════

# Data source (choose one)
data:
  sft_file: "data/fixes-chatml-sft.jsonl"           # ChatML format for PEFT/TRL
  conversational_file: "data/fixes-conversational-sft.jsonl"  # messages[] format
  alpaca_file: "data/fixes-alpaca.jsonl"             # instruction/input/output
  dpo_file: "data/fixes-dpo-pairs.jsonl"             # prompt/chosen/rejected

  # Dataset stats (auto-generated 2026-04-13)
  total_samples: 254
  avg_tokens_per_sample: 402
  coverage_at_512: "94%"
  coverage_at_1024: "100%"

# Model selection
models:
  # Primary: Qwen2.5-7B for ops knowledge (smaller, faster training)
  primary: "Qwen/Qwen2.5-7B-Instruct"
  # Secondary: Qwen2.5-14B for deeper reasoning (if VRAM allows)
  secondary: "Qwen/Qwen2.5-14B-Instruct"

# SFT Training (Phase 1)
sft:
  device: "mps"                    # Apple Silicon MPS
  max_seq_length: 1024             # Covers 100% of samples (avg 402 tokens)
  lora_r: 16
  lora_alpha: 32
  lora_dropout: 0.05
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  num_epochs: 3
  batch_size: 1                    # MPS: batch_size=1 required
  gradient_accumulation: 8         # Effective batch = 8
  learning_rate: 2.0e-4
  warmup_ratio: 0.1
  weight_decay: 0.01
  lr_scheduler: "cosine"
  logging_steps: 10
  save_steps: 50
  eval_steps: 50
  eval_split: 0.1                  # 10% = ~25 samples for eval

  # MPS-specific
  gradient_checkpointing: false    # Not supported on MPS
  bf16: false                      # MPS doesn't support bf16
  fp16: false                      # Use fp32 on MPS
  use_cache: false                 # Required: false during training

# DPO Training (Phase 2 — after SFT)
dpo:
  num_epochs: 1
  batch_size: 1
  gradient_accumulation: 4
  beta: 0.1                        # DPO temperature
  learning_rate: 5.0e-5
  max_seq_length: 1024

# Evaluation
evaluation:
  n_eval_samples: 25               # 10% of 254
  min_improvement_to_deploy: 0.3   # Confidence delta required
  eval_prompts:
    - "System: proxmox | Component: pvestatd\nSymptome:\n- Proxmox GUI: alle Graphs leer\n- pvestatd restart schlaegt fehl"
    - "System: llm-gateway | Component: free-llm-fallback\nSymptome:\n- Ollama nicht erreichbar\n- Kein LLM-Fallback"
    - "System: shieldx | Component: cipher-bypass\nSymptome:\n- ROT13/Base64 Injections nicht erkannt"
    - "System: peercortex | Component: api-ratelimit\nSymptome:\n- HTTP 429 Too Many Requests\n- Hunderte parallele Requests"
    - "System: ctxevent | Component: database-auth\nSymptome:\n- SASL: SCRAM-SERVER-FIRST-MESSAGE error"

# Output
output:
  adapters_dir: "adapters/fixes-ops-ai"
  gguf_quantization: "Q5_K_M"     # Best quality/size ratio for 7B
  ollama_model_name: "magatama-ops"
  ollama_tag: "fixes-v1"

# Training command reference:
# ═══════════════════════════════════════════════════════════════
# Phase 1 — SFT:
#   cd packages/fine-tuner
#   python3 scripts/manual_trigger.py --task-type fixes --config config/fixes-training.yaml
#
# Phase 2 — DPO (after SFT):
#   python3 scripts/manual_trigger.py --dpo --config config/fixes-training.yaml
#
# Phase 3 — Convert to GGUF + deploy to Ollama:
#   python3 -m src.converter --adapter adapters/fixes-ops-ai --quantize Q5_K_M
#   ollama create magatama-ops:fixes-v1 -f adapters/fixes-ops-ai/Modelfile
#
# Estimated training time on Mac Studio M4 Max 48GB:
#   SFT 7B:  ~15-20 min (254 samples × 3 epochs, ~762 steps)
#   SFT 14B: ~45-60 min
#   DPO:     ~5-10 min (1 epoch)
# ═══════════════════════════════════════════════════════════════