- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator - ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation) - ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles) - ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral) - Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry - Integration tests: claude-code-integration.test.ts (14 test cases) - PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan - Post-deployment verification procedures for health, client fallback, metrics
40 lines
1.3 KiB
YAML
40 lines
1.3 KiB
YAML
database_url: "postgresql://llm:llm_secure_2026@127.0.0.1:5432/llm_gateway"
|
|
gateway_url: "https://llm-gateway.context-x.org"
|
|
ollama_url: "http://localhost:11434"
|
|
|
|
models:
|
|
qwen_7b_hf: "Qwen/Qwen2.5-7B-Instruct" # fo-blog uses 7B — fast inference on Mac Studio
|
|
|
|
training:
|
|
device: "mps" # Apple Silicon MPS
|
|
max_seq_length: 2048 # Reduced from 2560 — articles avg 7k chars, fits in 2k tokens
|
|
lora_r: 16 # Halved from 32 — less aggressive, prevents memorization
|
|
lora_alpha: 32 # Halved from 64 — proportional to r
|
|
lora_dropout: 0.1 # Increased from 0.05 — adds regularization
|
|
target_modules:
|
|
- "q_proj"
|
|
- "k_proj"
|
|
- "v_proj"
|
|
- "o_proj"
|
|
- "gate_proj"
|
|
- "up_proj"
|
|
- "down_proj"
|
|
|
|
sft:
|
|
num_epochs: 3 # Down from 15 — 3 epochs is standard for SFT, prevents overfitting
|
|
batch_size: 1
|
|
gradient_accumulation: 8
|
|
learning_rate: 5.0e-5 # Down from 2e-4 — gentler updates preserve base model
|
|
warmup_ratio: 0.05
|
|
|
|
output:
|
|
adapters_dir: "adapters"
|
|
models_dir: "models"
|
|
model_name: "fo-blog-v6"
|
|
|
|
llama_cpp:
|
|
convert_script: "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py"
|
|
quantize_binary: "/opt/homebrew/bin/llama-quantize"
|
|
python_bin: "/opt/homebrew/bin/python3.13"
|
|
default_quantization: "Q4_K_M" # Smaller than Q5_K_M, still high quality
|