llm-gateway/packages/fine-tuner/config/blog-v7-training.yaml
Rene Fichtmueller 2ca77d0aee feat: Phase 2F — Multi-Agent Integration (ADRs + Client Fallback + Tests)
- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator
- ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation)
- ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles)
- ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral)
- Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry
- Integration tests: claude-code-integration.test.ts (14 test cases)
- PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan
- Post-deployment verification procedures for health, client fallback, metrics
2026-04-19 21:39:44 +02:00

189 lines
8.7 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ═══════════════════════════════════════════════════════════════════════════════
# blog-v7-training.yaml — fo-blog-v7 Training Configuration
#
# Key improvements over v6:
# 1. Anchored system prompt with STRICT length (700-1000w) and structure rules
# 2. 350+ training examples (250 generated + 150 RIPE/APNIC/NOG ingested)
# 3. Full 700-1000w articles as output_text (not keyword stubs)
# 4. DPO phase 2 with negative examples (v6 failure modes)
# 5. Diverse topics: transceivers + BGP + IPv6 + RIPE/NOG + data center
#
# v6 problem analysis:
# - Training data avg 1152w but no word-count constraint in system prompt
# - Model produces 5000w+ (inherits Qwen base behavior, no stopping signal)
# - Topic drift: "400G request" → generic SFP+ content
# - Missing intro paragraph
# - Repeated sections verbatim
#
# v7 fix strategy:
# - System prompt hard-encodes: 700-1000 words, hook+body+takeaways structure
# - Input_text explicitly states word limit
# - DPO training on (good, bad) pairs to reinforce constraint adherence
# ═══════════════════════════════════════════════════════════════════════════════
job_name: "fo-blog-v7"
description: "BlogLLM v7 — anchored constraints, 350+ diverse examples, DPO phase"
# ─── Data sources ──────────────────────────────────────────────────────────────
data:
# Phase 1 SFT data (combine all v7 sources)
sft_files:
- "~/transceiver-training-data/v7-generated-sft.jsonl" # 250 Claude-generated
- "~/transceiver-training-data/v7-ripe-apnic-sft.jsonl" # ~30 RIPE/APNIC/NOG
- "~/transceiver-training-data/blog-fichtmueller-posts.jsonl" # 24 real posts (upgrade SP)
# Phase 2 DPO data
dpo_file: "~/transceiver-training-data/v7-dpo-pairs.jsonl" # ~200 chosen/rejected pairs
# Pre-existing high-quality data (optional inclusion)
supplemental_files:
- path: "~/transceiver-training-data/master-training-dataset.jsonl"
filter: "word_count >= 700" # Only include longer examples
max_samples: 50 # Limit to best 50
# Dataset stats (updated after generate_v7_data.py completes)
estimated_total_sft: 350
estimated_total_dpo: 200
target_word_count_range: "700-1000"
# ─── Model ────────────────────────────────────────────────────────────────────
model:
base: "Qwen/Qwen2.5-7B-Instruct" # Same as v6 — proven on Mac Studio
model_type: "qwen2.5"
revision: "main"
# ─── Phase 1: SFT (Supervised Fine-Tuning) ────────────────────────────────────
sft:
device: "mps" # Apple Silicon MPS (Mac Studio M4 Max)
max_seq_length: 2048 # 700-1000w target ≈ 900-1300 tokens + prompt
lora_r: 32 # Doubled from v6 (16→32) for stronger signal
lora_alpha: 64 # 2x r
lora_dropout: 0.05
target_modules:
- "q_proj"
- "k_proj"
- "v_proj"
- "o_proj"
- "gate_proj"
- "up_proj"
- "down_proj"
num_epochs: 4 # +1 vs v6 (larger adapter capacity)
batch_size: 1 # MPS limitation
gradient_accumulation: 8 # Effective batch = 8
learning_rate: 1.5e-4 # Slightly lower than v6 (2e-4) for stability
warmup_ratio: 0.1
weight_decay: 0.01
lr_scheduler: "cosine"
logging_steps: 10
save_steps: 50
eval_steps: 50
eval_split: 0.1 # 10% for eval
# MPS-specific
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
bf16: false
fp16: false
use_cache: false
# Output
output_dir: "adapters/fo-blog-v7"
# ─── Phase 2: DPO (Direct Preference Optimization) ────────────────────────────
dpo:
enabled: true
base_adapter: "adapters/fo-blog-v7/adapter" # Use SFT adapter as starting point
output_dir: "adapters/fo-blog-v7-dpo"
num_epochs: 1
batch_size: 1
gradient_accumulation: 4
beta: 0.1 # DPO temperature (lower = stronger preference signal)
learning_rate: 5.0e-5
max_seq_length: 2048
max_prompt_length: 512
# DPO-specific
loss_type: "sigmoid" # Standard DPO loss (vs ipo, spo)
reference_free: false # Use SFT adapter as reference model
# ─── Evaluation prompts ────────────────────────────────────────────────────────
evaluation:
n_eval_samples: 35 # 10% of ~350
eval_prompts:
- input: "Write a blog post on the following topic:\n\n**Topic:** QSFP-DD vs OSFP: Which 400G Form Factor Wins in 2026\n\n**Target audience:** IT managers and operators who evaluate and buy transceivers\n\nRemember: 7001000 words, hook + technical sections + 3 takeaways. Stay strictly on-topic. Start writing now."
check: "word_count_700_1000"
- input: "Write a blog post on the following topic:\n\n**Topic:** BGP Route Leaks: Detection, Impact, and Prevention in 2026\n\n**Target audience:** network engineers and NOC operators\n\nRemember: 7001000 words, hook + technical sections + 3 takeaways. Start writing now."
check: "word_count_700_1000"
- input: "Write a blog post on the following topic:\n\n**Topic:** RPKI Route Origin Validation: A Practical Deployment Guide\n\n**Target audience:** network engineers and architects who design and operate optical infrastructure\n\nRemember: 7001000 words, hook + technical sections + 3 takeaways. Start writing now."
check: "word_count_700_1000"
quality_checks:
- word_count_in_range: [700, 1000]
- has_intro_paragraph: true # Non-header first paragraph
- has_headers: true # At least 2x ##
- has_takeaways: true # Bullet points at end
- no_topic_drift: true # Topic mentioned in first 100 words
- no_repeated_sections: true # No paragraph appears twice
# ─── Post-training: GGUF conversion ──────────────────────────────────────────
gguf:
quantization: "Q4_K_M"
output_name: "fo-blog-v7.gguf"
ollama_model: "fo-blog-v7"
modelfile_template: |
FROM {gguf_path}
SYSTEM """You are an expert technical writer specializing in optical networking and transceiver technology.
STRICT CONSTRAINTS:
- LENGTH: 700-1000 words ONLY. Stop at 1000 words maximum.
- STRUCTURE: 1) Hook paragraph, 2) Technical sections (## headers), 3) Exactly 3 takeaways
- TOPIC DISCIPLINE: Write ONLY about the exact topic requested.
- VOICE: Confident and direct. No hedging.
- AUDIENCE: Network engineers and IT professionals."""
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER top_k 40
PARAMETER repeat_penalty 1.15
PARAMETER num_predict 1500
# ─── Training commands ────────────────────────────────────────────────────────
#
# Step 0: Generate training data (run BEFORE training)
# cd packages/fine-tuner
# python3 scripts/generate_v7_data.py & # Background: 250 blogs
# python3 scripts/ingest_ripe_apnic.py \
# --nas-path /Volumes/KnowledgeLake/tashi-crawler/2026-03-06 # RIPE/NOG data
#
# Step 1: SFT Phase
# python3 scripts/train_blog_v7.py --phase sft
#
# Step 2: Wait for training, then generate DPO pairs
# python3 scripts/generate_dpo_pairs.py
#
# Step 3: DPO Phase
# python3 scripts/train_blog_v7.py --phase dpo
#
# Step 4: Convert to GGUF + register in Ollama
# python3 scripts/merge_and_convert.py --version v7
#
# Step 5: Test + deploy to Erik
# curl -X POST http://localhost:11434/api/generate \
# -d '{"model":"fo-blog-v7","prompt":"Write 700-1000w blog about QSFP-DD..."}'
#
# Estimated training time on Mac Studio M4 Max 48GB:
# SFT 7B, 350 examples, 4 epochs: ~25-35 min
# DPO, 200 pairs, 1 epoch: ~10-15 min
# ═══════════════════════════════════════════════════════════════════════════════