database_url: "postgresql://llm:llm_secure_2026@127.0.0.1:5432/llm_gateway" gateway_url: "https://llm-gateway.context-x.org" ollama_url: "http://localhost:11434" models: qwen_7b_hf: "Qwen/Qwen2.5-7B-Instruct" # fo-blog uses 7B — fast inference on Mac Studio training: device: "mps" # Apple Silicon MPS max_seq_length: 2048 # Reduced from 2560 — articles avg 7k chars, fits in 2k tokens lora_r: 16 # Halved from 32 — less aggressive, prevents memorization lora_alpha: 32 # Halved from 64 — proportional to r lora_dropout: 0.1 # Increased from 0.05 — adds regularization target_modules: - "q_proj" - "k_proj" - "v_proj" - "o_proj" - "gate_proj" - "up_proj" - "down_proj" sft: num_epochs: 3 # Down from 15 — 3 epochs is standard for SFT, prevents overfitting batch_size: 1 gradient_accumulation: 8 learning_rate: 5.0e-5 # Down from 2e-4 — gentler updates preserve base model warmup_ratio: 0.05 output: adapters_dir: "adapters" models_dir: "models" model_name: "fo-blog-v6" llama_cpp: convert_script: "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py" quantize_binary: "/opt/homebrew/bin/llama-quantize" python_bin: "/opt/homebrew/bin/python3.13" default_quantization: "Q4_K_M" # Smaller than Q5_K_M, still high quality