llm-gateway/packages/fine-tuner/config/fine_tuner.yaml

database_url: "postgresql://llm:llm_secure_password@localhost:5432/llm_gateway"
gateway_url: "http://localhost:3100"
ollama_url: "http://192.168.178.169:11434"

models:
  qwen_14b_hf: "Qwen/Qwen2.5-14B-Instruct"  # HuggingFace model ID — used for general fine-tuning
  qwen_7b_hf: "Qwen/Qwen2.5-7B-Instruct"    # For task-specific runs (smaller, faster)

training:
  device: "mps"  # Apple Silicon MPS — fallback to "cpu" if MPS unavailable
  max_seq_length: 2048
  lora_r: 16
  lora_alpha: 32
  lora_dropout: 0.05
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"

  sft:
    num_epochs: 3
    batch_size: 1
    gradient_accumulation: 8
    learning_rate: 2.0e-4
    warmup_ratio: 0.1

  dpo:
    num_epochs: 1
    batch_size: 1
    gradient_accumulation: 4
    beta: 0.1       # DPO temperature — higher = more conservative
    learning_rate: 5.0e-5

evaluation:
  min_improvement_to_deploy: 0.3   # confidence delta required before deployment
  n_eval_samples: 20

output:
  adapters_dir: "adapters"
  models_dir: "models"

llama_cpp:
  convert_script: "/opt/homebrew/lib/python3.12/site-packages/llama_cpp/convert_hf_to_gguf.py"
  quantize_binary: "/opt/homebrew/bin/llama-quantize"
  default_quantization: "Q5_K_M"