Rene Fichtmueller 3a00ff4d33 feat: initial llm-gateway implementation
- Complete Fastify gateway with 8-stage pipeline
- Circuit breaker (opossum) per model tier
- Rate limiting per caller
- Ban list validation (EN/DE/auto-detected)
- TIP validator (SFF-8024, part numbers, wavelengths)
- Prometheus metrics
- pg-boss async queue
- PostgreSQL audit log + review queue
- 9 prompt templates (TIP, LinkedIn, ShieldX)
- Learning engine scaffolding
- Auto-learning: ban-list, few-shot, routing, prompt optimizer
2026-04-02 22:48:55 +02:00

50 lines
1.3 KiB
YAML

database_url: "postgresql://llm:llm_secure_password@localhost:5432/llm_gateway"
gateway_url: "http://localhost:3100"
ollama_url: "http://192.168.178.169:11434"
models:
qwen_14b_hf: "Qwen/Qwen2.5-14B-Instruct" # HuggingFace model ID — used for general fine-tuning
qwen_7b_hf: "Qwen/Qwen2.5-7B-Instruct" # For task-specific runs (smaller, faster)
training:
device: "mps" # Apple Silicon MPS — fallback to "cpu" if MPS unavailable
max_seq_length: 2048
lora_r: 16
lora_alpha: 32
lora_dropout: 0.05
target_modules:
- "q_proj"
- "k_proj"
- "v_proj"
- "o_proj"
- "gate_proj"
- "up_proj"
- "down_proj"
sft:
num_epochs: 3
batch_size: 1
gradient_accumulation: 8
learning_rate: 2.0e-4
warmup_ratio: 0.1
dpo:
num_epochs: 1
batch_size: 1
gradient_accumulation: 4
beta: 0.1 # DPO temperature — higher = more conservative
learning_rate: 5.0e-5
evaluation:
min_improvement_to_deploy: 0.3 # confidence delta required before deployment
n_eval_samples: 20
output:
adapters_dir: "adapters"
models_dir: "models"
llama_cpp:
convert_script: "/opt/homebrew/lib/python3.12/site-packages/llama_cpp/convert_hf_to_gguf.py"
quantize_binary: "/opt/homebrew/bin/llama-quantize"
default_quantization: "Q5_K_M"