Full v8 training pipeline for the optical networking blog model: - train_blog_v8.py: SFT (LoRA r=64, 5 epochs) + DPO (2 epochs) on Qwen2.5-14B-Instruct Fixed for trl 1.2.x: SFTConfig instead of TrainingArguments, processing_class= instead of tokenizer=, eval_strategy= instead of deprecated evaluation_strategy= - consolidate_v8_dataset.py: weighted merge of all data sources (820 effective SFT / 235 DPO) - crawl_v8_sources.py: APNIC/RIPE Labs/potaroo/Cloudflare crawler with balanced div extraction - process_v6_blogs.py: converts 101 real v6 TIP blog outputs into SFT + DPO pairs - label_v7_quality.py: Claude-judged quality labels → v8 quality DPO pairs - parse_real_posts.py: parses blog.fichtmueller.org Ghost CMS HTML → gold SFT records - run_v8_pipeline.sh: autopilot (consolidate → SFT → DPO → GGUF → Ollama) - blog-v8-training.yaml: training config reference Dataset breakdown: 19 real posts ×3 + 196 v7-gen + 28 v6blogs ×2 + 135 external ×1.5
160 lines
6.7 KiB
YAML
160 lines
6.7 KiB
YAML
# ═══════════════════════════════════════════════════════════════════════════════
|
||
# blog-v8-training.yaml — fo-blog-v8 Training Configuration
|
||
#
|
||
# Base: Qwen/Qwen2.5-14B-Instruct (4× the capacity of v7's 7B)
|
||
# Target: 700-1000w blog posts, optical networking + BGP + infra
|
||
#
|
||
# Key improvements over v7:
|
||
# - 14B params → better instruction following at higher complexity
|
||
# - LoRA r=64 (was r=32) → more expressive adapter
|
||
# - Weighted datasets: human posts × 3.0, external rewritten × 1.5
|
||
# - More epochs (5 SFT, 2 DPO) → deeper style absorption
|
||
# - max_seq_length=4096 → handles longer real posts
|
||
# - DPO from real v7 quality labels (good/bad scored posts)
|
||
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
||
base_model: "Qwen/Qwen2.5-14B-Instruct"
|
||
|
||
# ─── Dataset Sources (merged by consolidate_v8_dataset.py) ────────────────────
|
||
datasets:
|
||
# Tier 1: Rene's actual blog posts — Gold Standard
|
||
- path: "~/transceiver-training-data/v8-real-posts-sft.jsonl"
|
||
weight: 3.0
|
||
description: "19 real blog posts from blog.fichtmueller.org (human written)"
|
||
|
||
# Tier 2: v7 generated blogs (Claude-written, 197 topics, validated)
|
||
- path: "~/transceiver-training-data/v7-generated-sft.jsonl"
|
||
weight: 1.0
|
||
description: "Claude-generated optical networking blogs (v7, 197 topics)"
|
||
|
||
# Tier 2: RIPE / APNIC NAS data
|
||
- path: "~/transceiver-training-data/v7-ripe-apnic-sft.jsonl"
|
||
weight: 1.0
|
||
description: "RIPE/APNIC BGP and routing content (v7 ingested)"
|
||
|
||
# Tier 3: External crawled + rewritten content
|
||
- path: "~/transceiver-training-data/v8-external-sft.jsonl"
|
||
weight: 1.5
|
||
description: "APNIC Blog / RIPE Labs / potaroo.net / Cloudflare (Claude rewritten)"
|
||
|
||
# DPO preferences: chosen/rejected pairs for preference learning
|
||
dpo:
|
||
- path: "~/transceiver-training-data/v7-dpo-pairs.jsonl"
|
||
description: "v7 DPO pairs (5 rejection strategies)"
|
||
- path: "~/transceiver-training-data/v8-quality-dpo.jsonl"
|
||
description: "v8 real quality labels (good/bad from v7 generated posts)"
|
||
optional: true # generated by label_v7_quality.py if run
|
||
|
||
# ─── SFT Phase ────────────────────────────────────────────────────────────────
|
||
sft:
|
||
output_dir: "adapters/fo-blog-v8/adapter"
|
||
merged_dir: "models/fo-blog-v8/merged"
|
||
|
||
# LoRA parameters
|
||
lora:
|
||
r: 64 # was 32 in v7 — more expressive
|
||
alpha: 128 # 2× r for stable training
|
||
dropout: 0.05
|
||
target_modules:
|
||
- "q_proj"
|
||
- "k_proj"
|
||
- "v_proj"
|
||
- "o_proj"
|
||
- "gate_proj"
|
||
- "up_proj"
|
||
- "down_proj"
|
||
|
||
# Training hyperparameters
|
||
training:
|
||
num_train_epochs: 5 # was 4 — extra epoch for 14B
|
||
per_device_train_batch_size: 1
|
||
gradient_accumulation_steps: 8 # effective batch = 8
|
||
learning_rate: 1.2e-4 # slightly lower than v7's 1.5e-4 for 14B stability
|
||
warmup_ratio: 0.05
|
||
lr_scheduler_type: "cosine"
|
||
max_seq_length: 4096 # was 2048 — handles longer real posts
|
||
fp16: false
|
||
bf16: true # M4 Max supports bf16
|
||
optim: "adamw_torch"
|
||
weight_decay: 0.01
|
||
max_grad_norm: 1.0
|
||
logging_steps: 10
|
||
save_steps: 100
|
||
evaluation_strategy: "no"
|
||
dataloader_num_workers: 0 # MPS: no multiprocessing
|
||
remove_unused_columns: false
|
||
gradient_checkpointing: true # save RAM on 14B
|
||
|
||
# Chat template (Qwen2.5 uses ChatML)
|
||
chat_template: "chatml"
|
||
dataset_text_field: "text"
|
||
|
||
# ─── DPO Phase ────────────────────────────────────────────────────────────────
|
||
dpo:
|
||
input_adapter: "adapters/fo-blog-v8/adapter" # start from SFT
|
||
output_dir: "adapters/fo-blog-v8-dpo/adapter"
|
||
|
||
training:
|
||
num_train_epochs: 2 # was 1 — more DPO for 14B
|
||
per_device_train_batch_size: 1
|
||
gradient_accumulation_steps: 8
|
||
learning_rate: 5e-5
|
||
warmup_ratio: 0.05
|
||
lr_scheduler_type: "cosine"
|
||
max_seq_length: 4096
|
||
bf16: true
|
||
optim: "adamw_torch"
|
||
logging_steps: 5
|
||
save_steps: 50
|
||
dataloader_num_workers: 0
|
||
gradient_checkpointing: true
|
||
|
||
dpo_params:
|
||
beta: 0.1 # KL penalty (standard)
|
||
loss_type: "sigmoid" # standard DPO loss
|
||
max_prompt_length: 512
|
||
max_length: 4096
|
||
|
||
# ─── GGUF Conversion ──────────────────────────────────────────────────────────
|
||
gguf:
|
||
output_name: "fo-blog-v8.gguf"
|
||
quantization: "Q4_K_M"
|
||
ollama_model_name: "fo-blog-v8"
|
||
convert_script: "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py"
|
||
quantize_bin: "/opt/homebrew/bin/llama-quantize"
|
||
|
||
# Ollama Modelfile system prompt
|
||
modelfile_system: |
|
||
You are an expert technical writer specializing in optical networking and transceiver technology.
|
||
|
||
STRICT CONSTRAINTS:
|
||
- LENGTH: 700-1000 words ONLY. Stop at 1000 words maximum.
|
||
- STRUCTURE: 1) Hook paragraph, 2) Technical sections (## headers), 3) Exactly 3 takeaways
|
||
- TOPIC DISCIPLINE: Write ONLY about the exact topic requested. Zero drift.
|
||
- NO REPETITION: Every sentence adds new information.
|
||
- VOICE: Confident and direct. No hedging phrases.
|
||
- AUDIENCE: Network engineers and IT professionals.
|
||
|
||
modelfile_params:
|
||
temperature: 0.7
|
||
top_p: 0.9
|
||
top_k: 40
|
||
repeat_penalty: 1.15
|
||
num_predict: 1500
|
||
|
||
# ─── Hardware ──────────────────────────────────────────────────────────────────
|
||
hardware:
|
||
device: "mps" # Apple Silicon M4 Max
|
||
ram_gb: 48
|
||
python: "/opt/homebrew/bin/python3.13"
|
||
# 14B model in fp16 ≈ 28GB — fits in 48GB with LoRA overhead (~4GB)
|
||
# Training peak RAM estimate: ~36-40GB
|
||
# Merge on CPU: device_map="cpu" to avoid MPS OOM during save_pretrained
|
||
|
||
# ─── Expected Timeline ─────────────────────────────────────────────────────────
|
||
# SFT: ~8-12 hours (5 epochs, 14B, MPS)
|
||
# DPO: ~2-4 hours (2 epochs, 14B)
|
||
# Merge: ~30 min (CPU)
|
||
# GGUF: ~15 min
|
||
# Total: ~12-16 hours (run overnight)
|