database_url: "postgresql://llm:llm_secure_2026@217.154.82.179:5432/llm_gateway" gateway_url: "https://llm-gateway.context-x.org" ollama_url: "http://localhost:11434" models: qwen_14b_hf: "Qwen/Qwen2.5-14B-Instruct" # HuggingFace model ID — used for general fine-tuning qwen_7b_hf: "Qwen/Qwen2.5-7B-Instruct" # For task-specific runs (smaller, faster) training: device: "mps" # Apple Silicon MPS — fallback to "cpu" if MPS unavailable max_seq_length: 2048 lora_r: 16 lora_alpha: 32 lora_dropout: 0.05 target_modules: - "q_proj" - "k_proj" - "v_proj" - "o_proj" - "gate_proj" - "up_proj" - "down_proj" sft: num_epochs: 3 batch_size: 1 gradient_accumulation: 8 learning_rate: 2.0e-4 warmup_ratio: 0.1 dpo: num_epochs: 1 batch_size: 1 gradient_accumulation: 4 beta: 0.1 # DPO temperature — higher = more conservative learning_rate: 5.0e-5 evaluation: min_improvement_to_deploy: 0.3 # confidence delta required before deployment n_eval_samples: 20 output: adapters_dir: "adapters" models_dir: "models" llama_cpp: convert_script: "/opt/homebrew/lib/python3.12/site-packages/llama_cpp/convert_hf_to_gguf.py" quantize_binary: "/opt/homebrew/bin/llama-quantize" default_quantization: "Q5_K_M"