Rene Fichtmueller 2ca77d0aee feat: Phase 2F — Multi-Agent Integration (ADRs + Client Fallback + Tests)
- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator
- ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation)
- ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles)
- ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral)
- Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry
- Integration tests: claude-code-integration.test.ts (14 test cases)
- PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan
- Post-deployment verification procedures for health, client fallback, metrics
2026-04-19 21:39:44 +02:00

51 lines
1.5 KiB
YAML

database_url: "postgresql://llm:llm_secure_2026@127.0.0.1:15432/llm_gateway"
gateway_url: "https://llm-gateway.context-x.org"
ollama_url: "http://localhost:11434"
models:
qwen_14b_hf: "Qwen/Qwen2.5-14B-Instruct" # HuggingFace model ID — used for general fine-tuning
qwen_7b_hf: "Qwen/Qwen2.5-7B-Instruct" # For task-specific runs (smaller, faster)
training:
device: "mps" # Apple Silicon MPS — fallback to "cpu" if MPS unavailable
max_seq_length: 2560 # BlogLLM ultra: 2560 for full article coverage + aggressive LoRA
lora_r: 32 # Doubled from 16 for domain specialization
lora_alpha: 64 # Doubled from 32 for stronger adaptation
lora_dropout: 0.05
target_modules:
- "q_proj"
- "k_proj"
- "v_proj"
- "o_proj"
- "gate_proj"
- "up_proj"
- "down_proj"
sft:
num_epochs: 15 # Ultra-aggressive: 15 epochs for blog domain specialization (vs standard 3)
batch_size: 1
gradient_accumulation: 8
learning_rate: 2.0e-4
warmup_ratio: 0.1
dpo:
num_epochs: 1
batch_size: 1
gradient_accumulation: 4
beta: 0.1 # DPO temperature — higher = more conservative
learning_rate: 5.0e-5
evaluation:
min_improvement_to_deploy: 0.3 # confidence delta required before deployment
n_eval_samples: 20
output:
adapters_dir: "adapters"
models_dir: "models"
llama_cpp:
convert_script: "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py"
quantize_binary: "/opt/homebrew/bin/llama-quantize"
python_bin: "/opt/homebrew/bin/python3.13"
default_quantization: "Q5_K_M"