llm-gateway/packages/fine-tuner/config/fixes-training.yaml
Rene Fichtmueller 2ca77d0aee feat: Phase 2F — Multi-Agent Integration (ADRs + Client Fallback + Tests)
- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator
- ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation)
- ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles)
- ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral)
- Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry
- Integration tests: claude-code-integration.test.ts (14 test cases)
- PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan
- Post-deployment verification procedures for health, client fallback, metrics
2026-04-19 21:39:44 +02:00

104 lines
4.3 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ═══════════════════════════════════════════════════════════════
# fixes-training.yaml — Training Config for MAGATAMA Operations AI
# Dataset: 254 fixes from production (fixes.json → 4 JSONL formats)
# ═══════════════════════════════════════════════════════════════
# Data source (choose one)
data:
sft_file: "data/fixes-chatml-sft.jsonl" # ChatML format for PEFT/TRL
conversational_file: "data/fixes-conversational-sft.jsonl" # messages[] format
alpaca_file: "data/fixes-alpaca.jsonl" # instruction/input/output
dpo_file: "data/fixes-dpo-pairs.jsonl" # prompt/chosen/rejected
# Dataset stats (auto-generated 2026-04-13)
total_samples: 254
avg_tokens_per_sample: 402
coverage_at_512: "94%"
coverage_at_1024: "100%"
# Model selection
models:
# Primary: Qwen2.5-7B for ops knowledge (smaller, faster training)
primary: "Qwen/Qwen2.5-7B-Instruct"
# Secondary: Qwen2.5-14B for deeper reasoning (if VRAM allows)
secondary: "Qwen/Qwen2.5-14B-Instruct"
# SFT Training (Phase 1)
sft:
device: "mps" # Apple Silicon MPS
max_seq_length: 1024 # Covers 100% of samples (avg 402 tokens)
lora_r: 16
lora_alpha: 32
lora_dropout: 0.05
target_modules:
- "q_proj"
- "k_proj"
- "v_proj"
- "o_proj"
- "gate_proj"
- "up_proj"
- "down_proj"
num_epochs: 3
batch_size: 1 # MPS: batch_size=1 required
gradient_accumulation: 8 # Effective batch = 8
learning_rate: 2.0e-4
warmup_ratio: 0.1
weight_decay: 0.01
lr_scheduler: "cosine"
logging_steps: 10
save_steps: 50
eval_steps: 50
eval_split: 0.1 # 10% = ~25 samples for eval
# MPS-specific
gradient_checkpointing: false # Not supported on MPS
bf16: false # MPS doesn't support bf16
fp16: false # Use fp32 on MPS
use_cache: false # Required: false during training
# DPO Training (Phase 2 — after SFT)
dpo:
num_epochs: 1
batch_size: 1
gradient_accumulation: 4
beta: 0.1 # DPO temperature
learning_rate: 5.0e-5
max_seq_length: 1024
# Evaluation
evaluation:
n_eval_samples: 25 # 10% of 254
min_improvement_to_deploy: 0.3 # Confidence delta required
eval_prompts:
- "System: proxmox | Component: pvestatd\nSymptome:\n- Proxmox GUI: alle Graphs leer\n- pvestatd restart schlaegt fehl"
- "System: llm-gateway | Component: free-llm-fallback\nSymptome:\n- Ollama nicht erreichbar\n- Kein LLM-Fallback"
- "System: shieldx | Component: cipher-bypass\nSymptome:\n- ROT13/Base64 Injections nicht erkannt"
- "System: peercortex | Component: api-ratelimit\nSymptome:\n- HTTP 429 Too Many Requests\n- Hunderte parallele Requests"
- "System: ctxevent | Component: database-auth\nSymptome:\n- SASL: SCRAM-SERVER-FIRST-MESSAGE error"
# Output
output:
adapters_dir: "adapters/fixes-ops-ai"
gguf_quantization: "Q5_K_M" # Best quality/size ratio for 7B
ollama_model_name: "magatama-ops"
ollama_tag: "fixes-v1"
# Training command reference:
# ═══════════════════════════════════════════════════════════════
# Phase 1 — SFT:
# cd packages/fine-tuner
# python3 scripts/manual_trigger.py --task-type fixes --config config/fixes-training.yaml
#
# Phase 2 — DPO (after SFT):
# python3 scripts/manual_trigger.py --dpo --config config/fixes-training.yaml
#
# Phase 3 — Convert to GGUF + deploy to Ollama:
# python3 -m src.converter --adapter adapters/fixes-ops-ai --quantize Q5_K_M
# ollama create magatama-ops:fixes-v1 -f adapters/fixes-ops-ai/Modelfile
#
# Estimated training time on Mac Studio M4 Max 48GB:
# SFT 7B: ~15-20 min (254 samples × 3 epochs, ~762 steps)
# SFT 14B: ~45-60 min
# DPO: ~5-10 min (1 epoch)
# ═══════════════════════════════════════════════════════════════