- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator - ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation) - ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles) - ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral) - Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry - Integration tests: claude-code-integration.test.ts (14 test cases) - PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan - Post-deployment verification procedures for health, client fallback, metrics
104 lines
4.3 KiB
YAML
104 lines
4.3 KiB
YAML
# ═══════════════════════════════════════════════════════════════
|
||
# fixes-training.yaml — Training Config for MAGATAMA Operations AI
|
||
# Dataset: 254 fixes from production (fixes.json → 4 JSONL formats)
|
||
# ═══════════════════════════════════════════════════════════════
|
||
|
||
# Data source (choose one)
|
||
data:
|
||
sft_file: "data/fixes-chatml-sft.jsonl" # ChatML format for PEFT/TRL
|
||
conversational_file: "data/fixes-conversational-sft.jsonl" # messages[] format
|
||
alpaca_file: "data/fixes-alpaca.jsonl" # instruction/input/output
|
||
dpo_file: "data/fixes-dpo-pairs.jsonl" # prompt/chosen/rejected
|
||
|
||
# Dataset stats (auto-generated 2026-04-13)
|
||
total_samples: 254
|
||
avg_tokens_per_sample: 402
|
||
coverage_at_512: "94%"
|
||
coverage_at_1024: "100%"
|
||
|
||
# Model selection
|
||
models:
|
||
# Primary: Qwen2.5-7B for ops knowledge (smaller, faster training)
|
||
primary: "Qwen/Qwen2.5-7B-Instruct"
|
||
# Secondary: Qwen2.5-14B for deeper reasoning (if VRAM allows)
|
||
secondary: "Qwen/Qwen2.5-14B-Instruct"
|
||
|
||
# SFT Training (Phase 1)
|
||
sft:
|
||
device: "mps" # Apple Silicon MPS
|
||
max_seq_length: 1024 # Covers 100% of samples (avg 402 tokens)
|
||
lora_r: 16
|
||
lora_alpha: 32
|
||
lora_dropout: 0.05
|
||
target_modules:
|
||
- "q_proj"
|
||
- "k_proj"
|
||
- "v_proj"
|
||
- "o_proj"
|
||
- "gate_proj"
|
||
- "up_proj"
|
||
- "down_proj"
|
||
num_epochs: 3
|
||
batch_size: 1 # MPS: batch_size=1 required
|
||
gradient_accumulation: 8 # Effective batch = 8
|
||
learning_rate: 2.0e-4
|
||
warmup_ratio: 0.1
|
||
weight_decay: 0.01
|
||
lr_scheduler: "cosine"
|
||
logging_steps: 10
|
||
save_steps: 50
|
||
eval_steps: 50
|
||
eval_split: 0.1 # 10% = ~25 samples for eval
|
||
|
||
# MPS-specific
|
||
gradient_checkpointing: false # Not supported on MPS
|
||
bf16: false # MPS doesn't support bf16
|
||
fp16: false # Use fp32 on MPS
|
||
use_cache: false # Required: false during training
|
||
|
||
# DPO Training (Phase 2 — after SFT)
|
||
dpo:
|
||
num_epochs: 1
|
||
batch_size: 1
|
||
gradient_accumulation: 4
|
||
beta: 0.1 # DPO temperature
|
||
learning_rate: 5.0e-5
|
||
max_seq_length: 1024
|
||
|
||
# Evaluation
|
||
evaluation:
|
||
n_eval_samples: 25 # 10% of 254
|
||
min_improvement_to_deploy: 0.3 # Confidence delta required
|
||
eval_prompts:
|
||
- "System: proxmox | Component: pvestatd\nSymptome:\n- Proxmox GUI: alle Graphs leer\n- pvestatd restart schlaegt fehl"
|
||
- "System: llm-gateway | Component: free-llm-fallback\nSymptome:\n- Ollama nicht erreichbar\n- Kein LLM-Fallback"
|
||
- "System: shieldx | Component: cipher-bypass\nSymptome:\n- ROT13/Base64 Injections nicht erkannt"
|
||
- "System: peercortex | Component: api-ratelimit\nSymptome:\n- HTTP 429 Too Many Requests\n- Hunderte parallele Requests"
|
||
- "System: ctxevent | Component: database-auth\nSymptome:\n- SASL: SCRAM-SERVER-FIRST-MESSAGE error"
|
||
|
||
# Output
|
||
output:
|
||
adapters_dir: "adapters/fixes-ops-ai"
|
||
gguf_quantization: "Q5_K_M" # Best quality/size ratio for 7B
|
||
ollama_model_name: "magatama-ops"
|
||
ollama_tag: "fixes-v1"
|
||
|
||
# Training command reference:
|
||
# ═══════════════════════════════════════════════════════════════
|
||
# Phase 1 — SFT:
|
||
# cd packages/fine-tuner
|
||
# python3 scripts/manual_trigger.py --task-type fixes --config config/fixes-training.yaml
|
||
#
|
||
# Phase 2 — DPO (after SFT):
|
||
# python3 scripts/manual_trigger.py --dpo --config config/fixes-training.yaml
|
||
#
|
||
# Phase 3 — Convert to GGUF + deploy to Ollama:
|
||
# python3 -m src.converter --adapter adapters/fixes-ops-ai --quantize Q5_K_M
|
||
# ollama create magatama-ops:fixes-v1 -f adapters/fixes-ops-ai/Modelfile
|
||
#
|
||
# Estimated training time on Mac Studio M4 Max 48GB:
|
||
# SFT 7B: ~15-20 min (254 samples × 3 epochs, ~762 steps)
|
||
# SFT 14B: ~45-60 min
|
||
# DPO: ~5-10 min (1 epoch)
|
||
# ═══════════════════════════════════════════════════════════════
|