#!/usr/bin/env python3 """ train_blog_v8.py — fo-blog-v8 Training (Qwen2.5-14B, MPS LoRA) Phase 1: SFT (5 epochs, LoRA r=64, from merged v8 dataset) Phase 2: DPO (2 epochs, from SFT adapter) Usage: python3 scripts/train_blog_v8.py --phase sft python3 scripts/train_blog_v8.py --phase dpo python3 scripts/train_blog_v8.py --phase both # SFT then DPO sequentially Hardware: Apple Silicon M4 Max (48GB), MPS backend Estimated: SFT ~10-14h, DPO ~3-5h (run overnight) """ from __future__ import annotations import argparse import json import os import sys from pathlib import Path # ─── Paths ──────────────────────────────────────────────────────────────────── FINE_TUNER_DIR = Path(__file__).parent.parent DATA_DIR = Path.home() / "transceiver-training-data" SFT_DATA = DATA_DIR / "v8-sft-merged.jsonl" DPO_DATA = DATA_DIR / "v8-dpo-merged.jsonl" SFT_ADAPTER = FINE_TUNER_DIR / "adapters" / "fo-blog-v8" / "adapter" DPO_ADAPTER = FINE_TUNER_DIR / "adapters" / "fo-blog-v8-dpo" / "adapter" MERGED_DIR = FINE_TUNER_DIR / "models" / "fo-blog-v8" / "merged" BASE_MODEL = "Qwen/Qwen2.5-14B-Instruct" SYSTEM_PROMPT = """You are an expert technical writer specializing in optical networking, transceiver technology, and network infrastructure. STRICT CONSTRAINTS — Follow exactly, no exceptions: - LENGTH: 700–1000 words. Count carefully. Stop at 1000 words maximum. - STRUCTURE (mandatory, in this order): 1. HOOK paragraph — 2–3 sentences stating the problem this post addresses 2. Technical sections — 3–4 H2 sections covering the topic in depth 3. PRACTICAL TAKEAWAYS — exactly 3 bullet points, actionable - TOPIC DISCIPLINE: Write ONLY about the exact topic requested. Zero drift. - NO REPETITION: Every sentence must add new information. No restating. - VOICE: Confident, direct. No hedging phrases like "it's worth noting". - AUDIENCE: Network engineers and IT professionals. Assume technical fluency. - FORMAT: Markdown. Use ## for section headers. Use **bold** for key terms. Do not summarize what you are about to write. Start with the hook directly.""" def build_chatml(system: str, user: str, assistant: str) -> str: """Build ChatML-formatted training string.""" return ( f"<|im_start|>system\n{system}<|im_end|>\n" f"<|im_start|>user\n{user}<|im_end|>\n" f"<|im_start|>assistant\n{assistant}<|im_end|>" ) def load_sft_dataset(tokenizer, max_seq_length: int = 4096): """Load + tokenize SFT dataset from v8-sft-merged.jsonl.""" from datasets import Dataset if not SFT_DATA.exists(): raise FileNotFoundError( f"SFT data not found: {SFT_DATA}\n" "Run: python3 scripts/consolidate_v8_dataset.py" ) records = [] with open(SFT_DATA, encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: item = json.loads(line) system = item.get("system_prompt", SYSTEM_PROMPT) user = item.get("input_text", "") assistant = item.get("output_text", "") if user and assistant: text = build_chatml(system, user, assistant) records.append({"text": text}) except (json.JSONDecodeError, KeyError): pass print(f"Loaded {len(records)} SFT examples from {SFT_DATA.name}") return Dataset.from_list(records) def load_dpo_dataset(): """Load DPO dataset from v8-dpo-merged.jsonl.""" from datasets import Dataset if not DPO_DATA.exists(): raise FileNotFoundError( f"DPO data not found: {DPO_DATA}\n" "Run: python3 scripts/consolidate_v8_dataset.py" ) records = [] with open(DPO_DATA, encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: item = json.loads(line) records.append({ "prompt": item["prompt"], "chosen": item["chosen"], "rejected": item["rejected"], }) except (json.JSONDecodeError, KeyError): pass print(f"Loaded {len(records)} DPO pairs from {DPO_DATA.name}") return Dataset.from_list(records) def run_sft() -> None: """Phase 1: Supervised Fine-Tuning with LoRA.""" import torch from peft import LoraConfig, TaskType from transformers import AutoModelForCausalLM, AutoTokenizer from trl import SFTTrainer, SFTConfig print(f"=== fo-blog-v8 SFT: {BASE_MODEL} → LoRA r=64 ===") print(f"Device: {'MPS' if torch.backends.mps.is_available() else 'CPU'}") # ── Tokenizer ── print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # ── Dataset ── dataset = load_sft_dataset(tokenizer, max_seq_length=4096) # ── Model ── print(f"Loading base model: {BASE_MODEL}") device = "mps" if torch.backends.mps.is_available() else "cpu" model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, dtype=torch.bfloat16, # bf16 for M4 Max (transformers 5.x: dtype= not torch_dtype=) device_map=device, trust_remote_code=True, ) model.config.use_cache = False # ── LoRA Config ── lora_config = LoraConfig( r=64, lora_alpha=128, lora_dropout=0.05, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], bias="none", task_type=TaskType.CAUSAL_LM, ) # ── Training Config (trl 1.x: SFTConfig carries both TrainingArguments + SFT params) ── SFT_ADAPTER.mkdir(parents=True, exist_ok=True) training_args = SFTConfig( output_dir=str(SFT_ADAPTER), num_train_epochs=5, per_device_train_batch_size=1, gradient_accumulation_steps=8, learning_rate=1.2e-4, warmup_ratio=0.05, lr_scheduler_type="cosine", bf16=True, fp16=False, optim="adamw_torch", weight_decay=0.01, max_grad_norm=1.0, logging_steps=10, save_steps=100, save_total_limit=2, eval_strategy="no", dataloader_num_workers=0, remove_unused_columns=False, gradient_checkpointing=True, report_to="none", # SFT-specific (moved from SFTTrainer in trl 1.x; trl 1.2: max_length not max_seq_length) dataset_text_field="text", max_length=4096, packing=False, ) # ── Trainer ── trainer = SFTTrainer( model=model, train_dataset=dataset, peft_config=lora_config, processing_class=tokenizer, args=training_args, ) print(f"Starting SFT training: {len(dataset)} examples, 5 epochs...") trainer.train() print(f"Saving SFT adapter → {SFT_ADAPTER}") trainer.save_model(str(SFT_ADAPTER)) tokenizer.save_pretrained(str(SFT_ADAPTER)) print("SFT Phase COMPLETE.") def run_dpo() -> None: """Phase 2: Direct Preference Optimization.""" import torch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments from trl import DPOTrainer, DPOConfig print(f"=== fo-blog-v8 DPO: SFT adapter → DPO ===") if not SFT_ADAPTER.exists(): raise FileNotFoundError( f"SFT adapter not found at {SFT_ADAPTER}\n" "Run: python3 scripts/train_blog_v8.py --phase sft" ) # ── Tokenizer ── tokenizer = AutoTokenizer.from_pretrained(str(SFT_ADAPTER), trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token # ── Dataset ── dataset = load_dpo_dataset() # ── Model (base + SFT adapter) ── print(f"Loading model + SFT adapter...") device = "mps" if __import__("torch").backends.mps.is_available() else "cpu" base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, dtype=torch.bfloat16, device_map=device, trust_remote_code=True, ) model = PeftModel.from_pretrained(base_model, str(SFT_ADAPTER)) # ── DPO Config ── DPO_ADAPTER.mkdir(parents=True, exist_ok=True) dpo_config = DPOConfig( output_dir=str(DPO_ADAPTER), num_train_epochs=2, per_device_train_batch_size=1, gradient_accumulation_steps=8, learning_rate=5e-5, warmup_ratio=0.05, lr_scheduler_type="cosine", bf16=True, fp16=False, optim="adamw_torch", max_grad_norm=1.0, logging_steps=5, save_steps=50, save_total_limit=2, eval_strategy="no", dataloader_num_workers=0, gradient_checkpointing=True, report_to="none", # DPO-specific beta=0.1, loss_type="sigmoid", max_prompt_length=512, max_length=4096, ) # ── Trainer ── trainer = DPOTrainer( model=model, ref_model=None, # use implicit reference via peft args=dpo_config, train_dataset=dataset, processing_class=tokenizer, ) print(f"Starting DPO training: {len(dataset)} pairs, 2 epochs...") trainer.train() print(f"Saving DPO adapter → {DPO_ADAPTER}") trainer.save_model(str(DPO_ADAPTER)) tokenizer.save_pretrained(str(DPO_ADAPTER)) print("DPO Phase COMPLETE.") def run_merge_and_convert() -> None: """Merge adapter → full model, convert to GGUF, register in Ollama.""" import subprocess import shutil import torch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer # Prefer DPO adapter, fall back to SFT adapter_path = DPO_ADAPTER if DPO_ADAPTER.exists() else SFT_ADAPTER if not adapter_path.exists(): print(f"No adapter found. Run --phase sft first.") return print(f"=== fo-blog-v8 Merge + GGUF ===") print(f"Adapter: {adapter_path}") # ── Merge ── MERGED_DIR.mkdir(parents=True, exist_ok=True) safetensors = MERGED_DIR / "model.safetensors" if safetensors.exists() and safetensors.stat().st_size > 10_000_000_000: print(f" Already merged ({safetensors.stat().st_size/1e9:.1f} GB) — skip merge") else: print(" Loading base model on CPU for merge (avoids MPS OOM)...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, dtype=torch.float16, device_map="cpu", trust_remote_code=True, ) print(" Loading adapter...") model = PeftModel.from_pretrained(model, str(adapter_path)) print(" Merging...") model = model.merge_and_unload() print(f" Saving merged model → {MERGED_DIR}") model.save_pretrained(str(MERGED_DIR), safe_serialization=True) tokenizer.save_pretrained(str(MERGED_DIR)) del model print(" Merge done.") # ── Copy tokenizer files from HF cache if needed ── hf_cache = Path.home() / ".cache/huggingface/hub" snaps = list(hf_cache.glob("models--Qwen--Qwen2.5-14B-Instruct/snapshots/*/tokenizer.json")) if snaps: snap_dir = snaps[0].parent for fname in ["tokenizer.json", "tokenizer_config.json", "vocab.json", "merges.txt"]: if (snap_dir / fname).exists() and not (MERGED_DIR / fname).exists(): shutil.copy2(snap_dir / fname, MERGED_DIR / fname) # ── GGUF Conversion ── gguf_dir = FINE_TUNER_DIR / "models" / "fo-blog-v8" gguf_f16 = gguf_dir / "fo-blog-v8-f16.gguf" gguf_q4 = gguf_dir / "fo-blog-v8.gguf" convert_script = "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py" quantize_bin = "/opt/homebrew/bin/llama-quantize" python_bin = "/opt/homebrew/bin/python3.13" if not gguf_f16.exists(): print(" Converting to GGUF f16...") subprocess.run( [python_bin, convert_script, str(MERGED_DIR), "--outfile", str(gguf_f16), "--outtype", "f16"], check=True, ) else: print(f" F16 GGUF exists ({gguf_f16.stat().st_size/1e9:.1f} GB) — skip") if not gguf_q4.exists(): print(" Quantizing to Q4_K_M...") subprocess.run( [quantize_bin, str(gguf_f16), str(gguf_q4), "Q4_K_M"], check=True, ) gguf_f16.unlink(missing_ok=True) print(f" Q4_K_M GGUF: {gguf_q4} ({gguf_q4.stat().st_size/1e9:.1f} GB)") # ── Ollama Registration ── modelfile_path = gguf_dir / "Modelfile-v8" modelfile_content = f"""FROM {gguf_q4.resolve()} SYSTEM \"\"\"{SYSTEM_PROMPT}\"\"\" PARAMETER temperature 0.7 PARAMETER top_p 0.9 PARAMETER top_k 40 PARAMETER repeat_penalty 1.15 PARAMETER num_predict 1500 """ modelfile_path.write_text(modelfile_content) print(" Registering in Ollama as fo-blog-v8...") subprocess.run(["ollama", "create", "fo-blog-v8", "-f", str(modelfile_path)], check=True) import subprocess as sp result = sp.run(["ollama", "list"], capture_output=True, text=True) registered = "fo-blog-v8" in result.stdout print(f" Ollama registration: {'✓ SUCCESS' if registered else '✗ FAILED'}") print(f"\nDONE: {gguf_q4}") def main() -> None: parser = argparse.ArgumentParser(description="Train fo-blog-v8 (Qwen2.5-14B LoRA)") parser.add_argument( "--phase", choices=["sft", "dpo", "both", "convert"], default="sft", help="Training phase to run (default: sft)", ) args = parser.parse_args() if args.phase in ("sft", "both"): run_sft() if args.phase in ("dpo", "both"): run_dpo() if args.phase == "convert": run_merge_and_convert() if __name__ == "__main__": main()