- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator - ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation) - ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles) - ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral) - Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry - Integration tests: claude-code-integration.test.ts (14 test cases) - PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan - Post-deployment verification procedures for health, client fallback, metrics
189 lines
8.7 KiB
YAML
189 lines
8.7 KiB
YAML
# ═══════════════════════════════════════════════════════════════════════════════
|
||
# blog-v7-training.yaml — fo-blog-v7 Training Configuration
|
||
#
|
||
# Key improvements over v6:
|
||
# 1. Anchored system prompt with STRICT length (700-1000w) and structure rules
|
||
# 2. 350+ training examples (250 generated + 150 RIPE/APNIC/NOG ingested)
|
||
# 3. Full 700-1000w articles as output_text (not keyword stubs)
|
||
# 4. DPO phase 2 with negative examples (v6 failure modes)
|
||
# 5. Diverse topics: transceivers + BGP + IPv6 + RIPE/NOG + data center
|
||
#
|
||
# v6 problem analysis:
|
||
# - Training data avg 1152w but no word-count constraint in system prompt
|
||
# - Model produces 5000w+ (inherits Qwen base behavior, no stopping signal)
|
||
# - Topic drift: "400G request" → generic SFP+ content
|
||
# - Missing intro paragraph
|
||
# - Repeated sections verbatim
|
||
#
|
||
# v7 fix strategy:
|
||
# - System prompt hard-encodes: 700-1000 words, hook+body+takeaways structure
|
||
# - Input_text explicitly states word limit
|
||
# - DPO training on (good, bad) pairs to reinforce constraint adherence
|
||
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
||
job_name: "fo-blog-v7"
|
||
description: "BlogLLM v7 — anchored constraints, 350+ diverse examples, DPO phase"
|
||
|
||
# ─── Data sources ──────────────────────────────────────────────────────────────
|
||
|
||
data:
|
||
# Phase 1 SFT data (combine all v7 sources)
|
||
sft_files:
|
||
- "~/transceiver-training-data/v7-generated-sft.jsonl" # 250 Claude-generated
|
||
- "~/transceiver-training-data/v7-ripe-apnic-sft.jsonl" # ~30 RIPE/APNIC/NOG
|
||
- "~/transceiver-training-data/blog-fichtmueller-posts.jsonl" # 24 real posts (upgrade SP)
|
||
|
||
# Phase 2 DPO data
|
||
dpo_file: "~/transceiver-training-data/v7-dpo-pairs.jsonl" # ~200 chosen/rejected pairs
|
||
|
||
# Pre-existing high-quality data (optional inclusion)
|
||
supplemental_files:
|
||
- path: "~/transceiver-training-data/master-training-dataset.jsonl"
|
||
filter: "word_count >= 700" # Only include longer examples
|
||
max_samples: 50 # Limit to best 50
|
||
|
||
# Dataset stats (updated after generate_v7_data.py completes)
|
||
estimated_total_sft: 350
|
||
estimated_total_dpo: 200
|
||
target_word_count_range: "700-1000"
|
||
|
||
# ─── Model ────────────────────────────────────────────────────────────────────
|
||
|
||
model:
|
||
base: "Qwen/Qwen2.5-7B-Instruct" # Same as v6 — proven on Mac Studio
|
||
model_type: "qwen2.5"
|
||
revision: "main"
|
||
|
||
# ─── Phase 1: SFT (Supervised Fine-Tuning) ────────────────────────────────────
|
||
|
||
sft:
|
||
device: "mps" # Apple Silicon MPS (Mac Studio M4 Max)
|
||
max_seq_length: 2048 # 700-1000w target ≈ 900-1300 tokens + prompt
|
||
lora_r: 32 # Doubled from v6 (16→32) for stronger signal
|
||
lora_alpha: 64 # 2x r
|
||
lora_dropout: 0.05
|
||
target_modules:
|
||
- "q_proj"
|
||
- "k_proj"
|
||
- "v_proj"
|
||
- "o_proj"
|
||
- "gate_proj"
|
||
- "up_proj"
|
||
- "down_proj"
|
||
num_epochs: 4 # +1 vs v6 (larger adapter capacity)
|
||
batch_size: 1 # MPS limitation
|
||
gradient_accumulation: 8 # Effective batch = 8
|
||
learning_rate: 1.5e-4 # Slightly lower than v6 (2e-4) for stability
|
||
warmup_ratio: 0.1
|
||
weight_decay: 0.01
|
||
lr_scheduler: "cosine"
|
||
logging_steps: 10
|
||
save_steps: 50
|
||
eval_steps: 50
|
||
eval_split: 0.1 # 10% for eval
|
||
|
||
# MPS-specific
|
||
gradient_checkpointing: true
|
||
gradient_checkpointing_kwargs:
|
||
use_reentrant: false
|
||
bf16: false
|
||
fp16: false
|
||
use_cache: false
|
||
|
||
# Output
|
||
output_dir: "adapters/fo-blog-v7"
|
||
|
||
# ─── Phase 2: DPO (Direct Preference Optimization) ────────────────────────────
|
||
|
||
dpo:
|
||
enabled: true
|
||
base_adapter: "adapters/fo-blog-v7/adapter" # Use SFT adapter as starting point
|
||
output_dir: "adapters/fo-blog-v7-dpo"
|
||
|
||
num_epochs: 1
|
||
batch_size: 1
|
||
gradient_accumulation: 4
|
||
beta: 0.1 # DPO temperature (lower = stronger preference signal)
|
||
learning_rate: 5.0e-5
|
||
max_seq_length: 2048
|
||
max_prompt_length: 512
|
||
|
||
# DPO-specific
|
||
loss_type: "sigmoid" # Standard DPO loss (vs ipo, spo)
|
||
reference_free: false # Use SFT adapter as reference model
|
||
|
||
# ─── Evaluation prompts ────────────────────────────────────────────────────────
|
||
|
||
evaluation:
|
||
n_eval_samples: 35 # 10% of ~350
|
||
eval_prompts:
|
||
- input: "Write a blog post on the following topic:\n\n**Topic:** QSFP-DD vs OSFP: Which 400G Form Factor Wins in 2026\n\n**Target audience:** IT managers and operators who evaluate and buy transceivers\n\nRemember: 700–1000 words, hook + technical sections + 3 takeaways. Stay strictly on-topic. Start writing now."
|
||
check: "word_count_700_1000"
|
||
|
||
- input: "Write a blog post on the following topic:\n\n**Topic:** BGP Route Leaks: Detection, Impact, and Prevention in 2026\n\n**Target audience:** network engineers and NOC operators\n\nRemember: 700–1000 words, hook + technical sections + 3 takeaways. Start writing now."
|
||
check: "word_count_700_1000"
|
||
|
||
- input: "Write a blog post on the following topic:\n\n**Topic:** RPKI Route Origin Validation: A Practical Deployment Guide\n\n**Target audience:** network engineers and architects who design and operate optical infrastructure\n\nRemember: 700–1000 words, hook + technical sections + 3 takeaways. Start writing now."
|
||
check: "word_count_700_1000"
|
||
|
||
quality_checks:
|
||
- word_count_in_range: [700, 1000]
|
||
- has_intro_paragraph: true # Non-header first paragraph
|
||
- has_headers: true # At least 2x ##
|
||
- has_takeaways: true # Bullet points at end
|
||
- no_topic_drift: true # Topic mentioned in first 100 words
|
||
- no_repeated_sections: true # No paragraph appears twice
|
||
|
||
# ─── Post-training: GGUF conversion ──────────────────────────────────────────
|
||
|
||
gguf:
|
||
quantization: "Q4_K_M"
|
||
output_name: "fo-blog-v7.gguf"
|
||
ollama_model: "fo-blog-v7"
|
||
modelfile_template: |
|
||
FROM {gguf_path}
|
||
|
||
SYSTEM """You are an expert technical writer specializing in optical networking and transceiver technology.
|
||
|
||
STRICT CONSTRAINTS:
|
||
- LENGTH: 700-1000 words ONLY. Stop at 1000 words maximum.
|
||
- STRUCTURE: 1) Hook paragraph, 2) Technical sections (## headers), 3) Exactly 3 takeaways
|
||
- TOPIC DISCIPLINE: Write ONLY about the exact topic requested.
|
||
- VOICE: Confident and direct. No hedging.
|
||
- AUDIENCE: Network engineers and IT professionals."""
|
||
|
||
PARAMETER temperature 0.7
|
||
PARAMETER top_p 0.9
|
||
PARAMETER top_k 40
|
||
PARAMETER repeat_penalty 1.15
|
||
PARAMETER num_predict 1500
|
||
|
||
# ─── Training commands ────────────────────────────────────────────────────────
|
||
#
|
||
# Step 0: Generate training data (run BEFORE training)
|
||
# cd packages/fine-tuner
|
||
# python3 scripts/generate_v7_data.py & # Background: 250 blogs
|
||
# python3 scripts/ingest_ripe_apnic.py \
|
||
# --nas-path /Volumes/KnowledgeLake/tashi-crawler/2026-03-06 # RIPE/NOG data
|
||
#
|
||
# Step 1: SFT Phase
|
||
# python3 scripts/train_blog_v7.py --phase sft
|
||
#
|
||
# Step 2: Wait for training, then generate DPO pairs
|
||
# python3 scripts/generate_dpo_pairs.py
|
||
#
|
||
# Step 3: DPO Phase
|
||
# python3 scripts/train_blog_v7.py --phase dpo
|
||
#
|
||
# Step 4: Convert to GGUF + register in Ollama
|
||
# python3 scripts/merge_and_convert.py --version v7
|
||
#
|
||
# Step 5: Test + deploy to Erik
|
||
# curl -X POST http://localhost:11434/api/generate \
|
||
# -d '{"model":"fo-blog-v7","prompt":"Write 700-1000w blog about QSFP-DD..."}'
|
||
#
|
||
# Estimated training time on Mac Studio M4 Max 48GB:
|
||
# SFT 7B, 350 examples, 4 epochs: ~25-35 min
|
||
# DPO, 200 pairs, 1 epoch: ~10-15 min
|
||
# ═══════════════════════════════════════════════════════════════════════════════
|