Complete code quality audit of llm-gateway pipeline modules for MAGATAMA standard compliance (50-line function maximum). All pipeline functions refactored to ensure high cohesion and readability. Pipeline module compliance (verified): ✅ llm-client.ts — Refactored callOllama() (58→26 lines) via helper extraction ✅ instrumented-llm-client.ts — All functions <50 lines (wrapper layer) ✅ router.ts — Refactored routeByScore() (81→32 lines) via delegation ✅ request-scorer.ts — 870-line file, all functions <50 lines ✅ external-providers.ts — All functions <50 lines (49-line max) ✅ post-validator.ts — All validators <50 lines Verified: ✓ npm run build (TypeScript, zero errors) ✓ All 6 pipeline modules independently audited ✓ Production-ready for Erik deployment (PM2 ids 19+20, port 3103) Deployment target: Gitea (192.168.178.196:3000/rene/llm-gateway)
51 lines
1.5 KiB
YAML
51 lines
1.5 KiB
YAML
database_url: "${DATABASE_URL}" # Set via environment variable at runtime
|
|
gateway_url: "https://llm-gateway.context-x.org"
|
|
ollama_url: "http://localhost:11434"
|
|
|
|
models:
|
|
qwen_14b_hf: "Qwen/Qwen2.5-14B-Instruct" # HuggingFace model ID — used for general fine-tuning
|
|
qwen_7b_hf: "Qwen/Qwen2.5-7B-Instruct" # For task-specific runs (smaller, faster)
|
|
|
|
training:
|
|
device: "mps" # Apple Silicon MPS — fallback to "cpu" if MPS unavailable
|
|
max_seq_length: 2560 # BlogLLM ultra: 2560 for full article coverage + aggressive LoRA
|
|
lora_r: 32 # Doubled from 16 for domain specialization
|
|
lora_alpha: 64 # Doubled from 32 for stronger adaptation
|
|
lora_dropout: 0.05
|
|
target_modules:
|
|
- "q_proj"
|
|
- "k_proj"
|
|
- "v_proj"
|
|
- "o_proj"
|
|
- "gate_proj"
|
|
- "up_proj"
|
|
- "down_proj"
|
|
|
|
sft:
|
|
num_epochs: 15 # Ultra-aggressive: 15 epochs for blog domain specialization (vs standard 3)
|
|
batch_size: 1
|
|
gradient_accumulation: 8
|
|
learning_rate: 2.0e-4
|
|
warmup_ratio: 0.1
|
|
|
|
dpo:
|
|
num_epochs: 1
|
|
batch_size: 1
|
|
gradient_accumulation: 4
|
|
beta: 0.1 # DPO temperature — higher = more conservative
|
|
learning_rate: 5.0e-5
|
|
|
|
evaluation:
|
|
min_improvement_to_deploy: 0.3 # confidence delta required before deployment
|
|
n_eval_samples: 20
|
|
|
|
output:
|
|
adapters_dir: "adapters"
|
|
models_dir: "models"
|
|
|
|
llama_cpp:
|
|
convert_script: "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py"
|
|
quantize_binary: "/opt/homebrew/bin/llama-quantize"
|
|
python_bin: "/opt/homebrew/bin/python3.13"
|
|
default_quantization: "Q5_K_M"
|