Rene Fichtmueller 2ca77d0aee feat: Phase 2F — Multi-Agent Integration (ADRs + Client Fallback + Tests)
- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator
- ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation)
- ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles)
- ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral)
- Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry
- Integration tests: claude-code-integration.test.ts (14 test cases)
- PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan
- Post-deployment verification procedures for health, client fallback, metrics
2026-04-19 21:39:44 +02:00

410 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
train_blog_v8.py — fo-blog-v8 Training (Qwen2.5-14B, MPS LoRA)
Phase 1: SFT (5 epochs, LoRA r=64, from merged v8 dataset)
Phase 2: DPO (2 epochs, from SFT adapter)
Usage:
python3 scripts/train_blog_v8.py --phase sft
python3 scripts/train_blog_v8.py --phase dpo
python3 scripts/train_blog_v8.py --phase both # SFT then DPO sequentially
Hardware: Apple Silicon M4 Max (48GB), MPS backend
Estimated: SFT ~10-14h, DPO ~3-5h (run overnight)
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from pathlib import Path
# ─── Paths ────────────────────────────────────────────────────────────────────
FINE_TUNER_DIR = Path(__file__).parent.parent
DATA_DIR = Path.home() / "transceiver-training-data"
SFT_DATA = DATA_DIR / "v8-sft-merged.jsonl"
DPO_DATA = DATA_DIR / "v8-dpo-merged.jsonl"
SFT_ADAPTER = FINE_TUNER_DIR / "adapters" / "fo-blog-v8" / "adapter"
DPO_ADAPTER = FINE_TUNER_DIR / "adapters" / "fo-blog-v8-dpo" / "adapter"
MERGED_DIR = FINE_TUNER_DIR / "models" / "fo-blog-v8" / "merged"
BASE_MODEL = "Qwen/Qwen2.5-14B-Instruct"
SYSTEM_PROMPT = """You are an expert technical writer specializing in optical networking, transceiver technology, and network infrastructure.
STRICT CONSTRAINTS — Follow exactly, no exceptions:
- LENGTH: 7001000 words. Count carefully. Stop at 1000 words maximum.
- STRUCTURE (mandatory, in this order):
1. HOOK paragraph — 23 sentences stating the problem this post addresses
2. Technical sections — 34 H2 sections covering the topic in depth
3. PRACTICAL TAKEAWAYS — exactly 3 bullet points, actionable
- TOPIC DISCIPLINE: Write ONLY about the exact topic requested. Zero drift.
- NO REPETITION: Every sentence must add new information. No restating.
- VOICE: Confident, direct. No hedging phrases like "it's worth noting".
- AUDIENCE: Network engineers and IT professionals. Assume technical fluency.
- FORMAT: Markdown. Use ## for section headers. Use **bold** for key terms.
Do not summarize what you are about to write. Start with the hook directly."""
def build_chatml(system: str, user: str, assistant: str) -> str:
"""Build ChatML-formatted training string."""
return (
f"<|im_start|>system\n{system}<|im_end|>\n"
f"<|im_start|>user\n{user}<|im_end|>\n"
f"<|im_start|>assistant\n{assistant}<|im_end|>"
)
def load_sft_dataset(tokenizer, max_seq_length: int = 4096):
"""Load + tokenize SFT dataset from v8-sft-merged.jsonl."""
from datasets import Dataset
if not SFT_DATA.exists():
raise FileNotFoundError(
f"SFT data not found: {SFT_DATA}\n"
"Run: python3 scripts/consolidate_v8_dataset.py"
)
records = []
with open(SFT_DATA, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
item = json.loads(line)
system = item.get("system_prompt", SYSTEM_PROMPT)
user = item.get("input_text", "")
assistant = item.get("output_text", "")
if user and assistant:
text = build_chatml(system, user, assistant)
records.append({"text": text})
except (json.JSONDecodeError, KeyError):
pass
print(f"Loaded {len(records)} SFT examples from {SFT_DATA.name}")
return Dataset.from_list(records)
def load_dpo_dataset():
"""Load DPO dataset from v8-dpo-merged.jsonl."""
from datasets import Dataset
if not DPO_DATA.exists():
raise FileNotFoundError(
f"DPO data not found: {DPO_DATA}\n"
"Run: python3 scripts/consolidate_v8_dataset.py"
)
records = []
with open(DPO_DATA, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
item = json.loads(line)
records.append({
"prompt": item["prompt"],
"chosen": item["chosen"],
"rejected": item["rejected"],
})
except (json.JSONDecodeError, KeyError):
pass
print(f"Loaded {len(records)} DPO pairs from {DPO_DATA.name}")
return Dataset.from_list(records)
def run_sft() -> None:
"""Phase 1: Supervised Fine-Tuning with LoRA."""
import torch
from peft import LoraConfig, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTTrainer, SFTConfig
print(f"=== fo-blog-v8 SFT: {BASE_MODEL} → LoRA r=64 ===")
print(f"Device: {'MPS' if torch.backends.mps.is_available() else 'CPU'}")
# ── Tokenizer ──
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# ── Dataset ──
dataset = load_sft_dataset(tokenizer, max_seq_length=4096)
# ── Model ──
print(f"Loading base model: {BASE_MODEL}")
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
dtype=torch.bfloat16, # bf16 for M4 Max (transformers 5.x: dtype= not torch_dtype=)
device_map=device,
trust_remote_code=True,
)
model.config.use_cache = False
# ── LoRA Config ──
lora_config = LoraConfig(
r=64,
lora_alpha=128,
lora_dropout=0.05,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
bias="none",
task_type=TaskType.CAUSAL_LM,
)
# ── Training Config (trl 1.x: SFTConfig carries both TrainingArguments + SFT params) ──
SFT_ADAPTER.mkdir(parents=True, exist_ok=True)
training_args = SFTConfig(
output_dir=str(SFT_ADAPTER),
num_train_epochs=5,
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
learning_rate=1.2e-4,
warmup_ratio=0.05,
lr_scheduler_type="cosine",
bf16=True,
fp16=False,
optim="adamw_torch",
weight_decay=0.01,
max_grad_norm=1.0,
logging_steps=10,
save_steps=100,
save_total_limit=2,
eval_strategy="no",
dataloader_num_workers=0,
remove_unused_columns=False,
gradient_checkpointing=True,
report_to="none",
# SFT-specific (moved from SFTTrainer in trl 1.x; trl 1.2: max_length not max_seq_length)
dataset_text_field="text",
max_length=4096,
packing=False,
)
# ── Trainer ──
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=lora_config,
processing_class=tokenizer,
args=training_args,
)
print(f"Starting SFT training: {len(dataset)} examples, 5 epochs...")
trainer.train()
print(f"Saving SFT adapter → {SFT_ADAPTER}")
trainer.save_model(str(SFT_ADAPTER))
tokenizer.save_pretrained(str(SFT_ADAPTER))
print("SFT Phase COMPLETE.")
def run_dpo() -> None:
"""Phase 2: Direct Preference Optimization."""
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import DPOTrainer, DPOConfig
print(f"=== fo-blog-v8 DPO: SFT adapter → DPO ===")
if not SFT_ADAPTER.exists():
raise FileNotFoundError(
f"SFT adapter not found at {SFT_ADAPTER}\n"
"Run: python3 scripts/train_blog_v8.py --phase sft"
)
# ── Tokenizer ──
tokenizer = AutoTokenizer.from_pretrained(str(SFT_ADAPTER), trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
# ── Dataset ──
dataset = load_dpo_dataset()
# ── Model (base + SFT adapter) ──
print(f"Loading model + SFT adapter...")
device = "mps" if __import__("torch").backends.mps.is_available() else "cpu"
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
dtype=torch.bfloat16,
device_map=device,
trust_remote_code=True,
)
model = PeftModel.from_pretrained(base_model, str(SFT_ADAPTER))
# ── DPO Config ──
DPO_ADAPTER.mkdir(parents=True, exist_ok=True)
dpo_config = DPOConfig(
output_dir=str(DPO_ADAPTER),
num_train_epochs=2,
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
learning_rate=5e-5,
warmup_ratio=0.05,
lr_scheduler_type="cosine",
bf16=True,
fp16=False,
optim="adamw_torch",
max_grad_norm=1.0,
logging_steps=5,
save_steps=50,
save_total_limit=2,
eval_strategy="no",
dataloader_num_workers=0,
gradient_checkpointing=True,
report_to="none",
# DPO-specific
beta=0.1,
loss_type="sigmoid",
max_prompt_length=512,
max_length=4096,
)
# ── Trainer ──
trainer = DPOTrainer(
model=model,
ref_model=None, # use implicit reference via peft
args=dpo_config,
train_dataset=dataset,
processing_class=tokenizer,
)
print(f"Starting DPO training: {len(dataset)} pairs, 2 epochs...")
trainer.train()
print(f"Saving DPO adapter → {DPO_ADAPTER}")
trainer.save_model(str(DPO_ADAPTER))
tokenizer.save_pretrained(str(DPO_ADAPTER))
print("DPO Phase COMPLETE.")
def run_merge_and_convert() -> None:
"""Merge adapter → full model, convert to GGUF, register in Ollama."""
import subprocess
import shutil
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
# Prefer DPO adapter, fall back to SFT
adapter_path = DPO_ADAPTER if DPO_ADAPTER.exists() else SFT_ADAPTER
if not adapter_path.exists():
print(f"No adapter found. Run --phase sft first.")
return
print(f"=== fo-blog-v8 Merge + GGUF ===")
print(f"Adapter: {adapter_path}")
# ── Merge ──
MERGED_DIR.mkdir(parents=True, exist_ok=True)
safetensors = MERGED_DIR / "model.safetensors"
if safetensors.exists() and safetensors.stat().st_size > 10_000_000_000:
print(f" Already merged ({safetensors.stat().st_size/1e9:.1f} GB) — skip merge")
else:
print(" Loading base model on CPU for merge (avoids MPS OOM)...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL, dtype=torch.float16,
device_map="cpu", trust_remote_code=True,
)
print(" Loading adapter...")
model = PeftModel.from_pretrained(model, str(adapter_path))
print(" Merging...")
model = model.merge_and_unload()
print(f" Saving merged model → {MERGED_DIR}")
model.save_pretrained(str(MERGED_DIR), safe_serialization=True)
tokenizer.save_pretrained(str(MERGED_DIR))
del model
print(" Merge done.")
# ── Copy tokenizer files from HF cache if needed ──
hf_cache = Path.home() / ".cache/huggingface/hub"
snaps = list(hf_cache.glob("models--Qwen--Qwen2.5-14B-Instruct/snapshots/*/tokenizer.json"))
if snaps:
snap_dir = snaps[0].parent
for fname in ["tokenizer.json", "tokenizer_config.json", "vocab.json", "merges.txt"]:
if (snap_dir / fname).exists() and not (MERGED_DIR / fname).exists():
shutil.copy2(snap_dir / fname, MERGED_DIR / fname)
# ── GGUF Conversion ──
gguf_dir = FINE_TUNER_DIR / "models" / "fo-blog-v8"
gguf_f16 = gguf_dir / "fo-blog-v8-f16.gguf"
gguf_q4 = gguf_dir / "fo-blog-v8.gguf"
convert_script = "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py"
quantize_bin = "/opt/homebrew/bin/llama-quantize"
python_bin = "/opt/homebrew/bin/python3.13"
if not gguf_f16.exists():
print(" Converting to GGUF f16...")
subprocess.run(
[python_bin, convert_script, str(MERGED_DIR),
"--outfile", str(gguf_f16), "--outtype", "f16"],
check=True,
)
else:
print(f" F16 GGUF exists ({gguf_f16.stat().st_size/1e9:.1f} GB) — skip")
if not gguf_q4.exists():
print(" Quantizing to Q4_K_M...")
subprocess.run(
[quantize_bin, str(gguf_f16), str(gguf_q4), "Q4_K_M"],
check=True,
)
gguf_f16.unlink(missing_ok=True)
print(f" Q4_K_M GGUF: {gguf_q4} ({gguf_q4.stat().st_size/1e9:.1f} GB)")
# ── Ollama Registration ──
modelfile_path = gguf_dir / "Modelfile-v8"
modelfile_content = f"""FROM {gguf_q4.resolve()}
SYSTEM \"\"\"{SYSTEM_PROMPT}\"\"\"
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER top_k 40
PARAMETER repeat_penalty 1.15
PARAMETER num_predict 1500
"""
modelfile_path.write_text(modelfile_content)
print(" Registering in Ollama as fo-blog-v8...")
subprocess.run(["ollama", "create", "fo-blog-v8", "-f", str(modelfile_path)], check=True)
import subprocess as sp
result = sp.run(["ollama", "list"], capture_output=True, text=True)
registered = "fo-blog-v8" in result.stdout
print(f" Ollama registration: {'✓ SUCCESS' if registered else '✗ FAILED'}")
print(f"\nDONE: {gguf_q4}")
def main() -> None:
parser = argparse.ArgumentParser(description="Train fo-blog-v8 (Qwen2.5-14B LoRA)")
parser.add_argument(
"--phase",
choices=["sft", "dpo", "both", "convert"],
default="sft",
help="Training phase to run (default: sft)",
)
args = parser.parse_args()
if args.phase in ("sft", "both"):
run_sft()
if args.phase in ("dpo", "both"):
run_dpo()
if args.phase == "convert":
run_merge_and_convert()
if __name__ == "__main__":
main()