llm-gateway/packages/fine-tuner/scripts/run_v7_pipeline.sh
Rene Fichtmueller 2ca77d0aee feat: Phase 2F — Multi-Agent Integration (ADRs + Client Fallback + Tests)
- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator
- ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation)
- ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles)
- ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral)
- Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry
- Integration tests: claude-code-integration.test.ts (14 test cases)
- PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan
- Post-deployment verification procedures for health, client fallback, metrics
2026-04-19 21:39:44 +02:00

282 lines
12 KiB
Bash
Executable File

#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════
# run_v7_pipeline.sh — fo-blog-v7 Autopilot Pipeline
#
# Wartet bis generate_v7_data.py + ingest_ripe_apnic.py fertig,
# dann vollautomatisch:
# 1. DPO Pairs generieren
# 2. SFT Training (Phase 1)
# 3. DPO Training (Phase 2)
# 4. Merge + GGUF konvertieren
# 5. Ollama registrieren
#
# Usage:
# bash scripts/run_v7_pipeline.sh # Full auto
# bash scripts/run_v7_pipeline.sh --skip-generation
# bash scripts/run_v7_pipeline.sh --phase-from dpo
# ═══════════════════════════════════════════════════════════════
set -euo pipefail
FINE_TUNER_DIR="$(cd "$(dirname "$0")/.." && pwd)"
PYTHON="/opt/homebrew/bin/python3.13"
SCRIPTS="$FINE_TUNER_DIR/scripts"
DATA_DIR="$HOME/transceiver-training-data"
LOG_DIR="/tmp/v7-pipeline"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
mkdir -p "$LOG_DIR"
# ─── Colors ───────────────────────────────────────────────────
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'; BOLD='\033[1m'
log() { echo -e "${GREEN}[$(date +%H:%M:%S)]${NC} $*"; }
warn() { echo -e "${YELLOW}[$(date +%H:%M:%S)] ⚠${NC} $*"; }
err() { echo -e "${RED}[$(date +%H:%M:%S)] ✗${NC} $*"; }
step() { echo -e "\n${BOLD}${GREEN}══ $* ══${NC}"; }
# ─── Args ─────────────────────────────────────────────────────
SKIP_GENERATION=false
PHASE_FROM="wait" # wait | dpo | train-sft | train-dpo | convert
for arg in "$@"; do
case "$arg" in
--skip-generation) SKIP_GENERATION=true ;;
--phase-from=*) PHASE_FROM="${arg#*=}" ;;
esac
done
# ─── Step 0: Wait for generation processes ─────────────────────
if [[ "$SKIP_GENERATION" == "false" && "$PHASE_FROM" == "wait" ]]; then
step "Warte auf laufende Generation (generate_v7_data.py + ingest_ripe_apnic.py)"
log "Prüfe laufende Prozesse..."
GEN_RUNNING=true
INGEST_RUNNING=true
while [[ "$GEN_RUNNING" == "true" || "$INGEST_RUNNING" == "true" ]]; do
# Check processes
if ! pgrep -f "generate_v7_data.py" > /dev/null 2>&1; then
if [[ "$GEN_RUNNING" == "true" ]]; then
GENERATED=$(wc -l < "$DATA_DIR/v7-generated-sft.jsonl" 2>/dev/null || echo 0)
log "✓ generate_v7_data.py fertig — $GENERATED Blogs erzeugt"
GEN_RUNNING=false
fi
fi
if ! pgrep -f "ingest_ripe_apnic.py" > /dev/null 2>&1; then
if [[ "$INGEST_RUNNING" == "true" ]]; then
INGESTED=$(wc -l < "$DATA_DIR/v7-ripe-apnic-sft.jsonl" 2>/dev/null || echo 0)
log "✓ ingest_ripe_apnic.py fertig — $INGESTED Items verarbeitet"
INGEST_RUNNING=false
fi
fi
if [[ "$GEN_RUNNING" == "true" || "$INGEST_RUNNING" == "true" ]]; then
GEN_DONE=$(wc -l < "$DATA_DIR/v7-generated-sft.jsonl" 2>/dev/null || echo 0)
log " Fortschritt: $GEN_DONE/197 Blogs | Gen läuft: $GEN_RUNNING | Ingest läuft: $INGEST_RUNNING"
sleep 60
fi
done
log "Beide Prozesse abgeschlossen."
fi
# ─── Data summary ──────────────────────────────────────────────
step "Datenlage prüfen"
SFT_COUNT=$(wc -l < "$DATA_DIR/v7-generated-sft.jsonl" 2>/dev/null || echo 0)
RIPE_COUNT=$(wc -l < "$DATA_DIR/v7-ripe-apnic-sft.jsonl" 2>/dev/null || echo 0)
log " v7-generated-sft.jsonl: $SFT_COUNT Zeilen"
log " v7-ripe-apnic-sft.jsonl: $RIPE_COUNT Zeilen"
TOTAL_SFT=$((SFT_COUNT + RIPE_COUNT))
log " Gesamt SFT: $TOTAL_SFT Beispiele"
if [[ "$TOTAL_SFT" -lt 50 ]]; then
err "Zu wenig Trainingsdaten ($TOTAL_SFT) — mindestens 50 nötig."
err "Prüfe: tail -f /tmp/v7-generation.log"
exit 1
fi
# ─── Step 1: DPO Pairs generieren ──────────────────────────────
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" ]]; then
step "Phase 0: DPO Pairs generieren"
DPO_FILE="$DATA_DIR/v7-dpo-pairs.jsonl"
DPO_LOG="$LOG_DIR/dpo-pairs-$TIMESTAMP.log"
log "Starte generate_dpo_pairs.py..."
cd "$FINE_TUNER_DIR"
$PYTHON "$SCRIPTS/generate_dpo_pairs.py" 2>&1 | tee "$DPO_LOG"
DPO_COUNT=$(wc -l < "$DPO_FILE" 2>/dev/null || echo 0)
log "✓ DPO Pairs: $DPO_COUNT Paare erzeugt → $DPO_FILE"
fi
# ─── Step 2: SFT Training ──────────────────────────────────────
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" || "$PHASE_FROM" == "train-sft" ]]; then
step "Phase 1: SFT Training (LoRA, 4 Epochs)"
SFT_LOG="$LOG_DIR/sft-training-$TIMESTAMP.log"
log "Starte train_blog_v7.py --phase sft ..."
log "Log: $SFT_LOG"
cd "$FINE_TUNER_DIR"
$PYTHON "$SCRIPTS/train_blog_v7.py" --phase sft 2>&1 | tee "$SFT_LOG"
ADAPTER_PATH="$FINE_TUNER_DIR/adapters/fo-blog-v7/adapter"
if [[ ! -d "$ADAPTER_PATH" ]]; then
err "SFT Adapter nicht gefunden: $ADAPTER_PATH"
exit 1
fi
log "✓ SFT Adapter gespeichert: $ADAPTER_PATH"
fi
# ─── Step 3: DPO Training ──────────────────────────────────────
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" || "$PHASE_FROM" == "train-sft" || "$PHASE_FROM" == "train-dpo" ]]; then
step "Phase 2: DPO Training (1 Epoch)"
DPO_LOG="$LOG_DIR/dpo-training-$TIMESTAMP.log"
DPO_FILE="$DATA_DIR/v7-dpo-pairs.jsonl"
if [[ ! -f "$DPO_FILE" ]]; then
warn "DPO File nicht vorhanden — überspringe DPO Phase"
else
log "Starte train_blog_v7.py --phase dpo ..."
cd "$FINE_TUNER_DIR"
$PYTHON "$SCRIPTS/train_blog_v7.py" --phase dpo 2>&1 | tee "$DPO_LOG"
log "✓ DPO Training abgeschlossen"
fi
fi
# ─── Step 4: Merge + GGUF + Ollama ────────────────────────────
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" || "$PHASE_FROM" == "train-sft" || "$PHASE_FROM" == "train-dpo" || "$PHASE_FROM" == "convert" ]]; then
step "Phase 3: Merge + GGUF + Ollama Registrierung"
CONV_LOG="$LOG_DIR/convert-$TIMESTAMP.log"
# Determine which adapter to use (DPO > SFT)
DPO_ADAPTER="$FINE_TUNER_DIR/adapters/fo-blog-v7-dpo/adapter"
SFT_ADAPTER="$FINE_TUNER_DIR/adapters/fo-blog-v7/adapter"
if [[ -d "$DPO_ADAPTER" ]]; then
BEST_ADAPTER="$DPO_ADAPTER"
log "Verwende DPO Adapter: $BEST_ADAPTER"
elif [[ -d "$SFT_ADAPTER" ]]; then
BEST_ADAPTER="$SFT_ADAPTER"
warn "Kein DPO Adapter — verwende SFT Adapter: $BEST_ADAPTER"
else
err "Kein Adapter gefunden! Erst Training starten."
exit 1
fi
# Update merge_and_convert.py adapter path for v7
V7_CONVERT_SCRIPT="$SCRIPTS/merge_and_convert_v7.py"
cat > "$V7_CONVERT_SCRIPT" << 'PYEOF'
#!/usr/bin/env python3
"""Merge fo-blog-v7 adapter (SFT or DPO) into base model → GGUF → Ollama."""
import subprocess, sys, os
from pathlib import Path
FINE_TUNER = Path(__file__).parent.parent
ADAPTER_DIR = Path(os.environ.get("V7_ADAPTER_PATH",
str(FINE_TUNER / "adapters" / "fo-blog-v7-dpo" / "adapter")))
MERGED_DIR = FINE_TUNER / "models" / "fo-blog-v7" / "merged"
GGUF_DIR = FINE_TUNER / "models" / "fo-blog-v7"
GGUF_F16 = GGUF_DIR / "fo-blog-v7-f16.gguf"
GGUF_Q4 = GGUF_DIR / "fo-blog-v7.gguf"
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
CONVERT_SCRIPT = "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py"
QUANTIZE_BIN = "/opt/homebrew/bin/llama-quantize"
PYTHON_BIN = "/opt/homebrew/bin/python3.13"
MODELFILE = """FROM {gguf_path}
SYSTEM \"\"\"You are an expert technical writer specializing in optical networking and transceiver technology.
STRICT CONSTRAINTS:
- LENGTH: 700-1000 words ONLY. Stop at 1000 words maximum.
- STRUCTURE: 1) Hook paragraph, 2) Technical sections (## headers), 3) Exactly 3 takeaways
- TOPIC DISCIPLINE: Write ONLY about the exact topic requested. Zero drift.
- NO REPETITION: Every sentence adds new information.
- VOICE: Confident and direct. No hedging phrases.
- AUDIENCE: Network engineers and IT professionals.\"\"\"
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER top_k 40
PARAMETER repeat_penalty 1.15
PARAMETER num_predict 1500
"""
def merge():
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
MERGED_DIR.mkdir(parents=True, exist_ok=True)
if (MERGED_DIR / "model.safetensors").exists():
size = (MERGED_DIR / "model.safetensors").stat().st_size
if size > 10_000_000_000:
print(f" Already merged ({size/1e9:.1f} GB) — skip")
return
print(f" Loading base: {BASE_MODEL}")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.float16,
device_map="cpu", trust_remote_code=True)
print(f" Loading adapter: {ADAPTER_DIR}")
model = PeftModel.from_pretrained(model, str(ADAPTER_DIR))
print(" Merging...")
model = model.merge_and_unload()
print(f" Saving to: {MERGED_DIR}")
model.save_pretrained(str(MERGED_DIR), safe_serialization=True)
tokenizer.save_pretrained(str(MERGED_DIR))
def convert():
GGUF_DIR.mkdir(parents=True, exist_ok=True)
import shutil
hf_cache = Path.home() / ".cache/huggingface/hub"
snap = list(hf_cache.glob("models--Qwen--Qwen2.5-7B-Instruct/snapshots/*/tokenizer.json"))
if snap:
snap_dir = snap[0].parent
for f in ["tokenizer.json","tokenizer_config.json","vocab.json","merges.txt"]:
if (snap_dir / f).exists():
shutil.copy2(snap_dir / f, MERGED_DIR / f)
if not GGUF_F16.exists():
subprocess.run([PYTHON_BIN, CONVERT_SCRIPT, str(MERGED_DIR),
"--outfile", str(GGUF_F16), "--outtype", "f16"], check=True)
if not GGUF_Q4.exists():
subprocess.run([QUANTIZE_BIN, str(GGUF_F16), str(GGUF_Q4), "Q4_K_M"], check=True)
GGUF_F16.unlink(missing_ok=True)
return GGUF_Q4
def register(gguf_path):
mf = gguf_path.parent / "Modelfile-v7"
mf.write_text(MODELFILE.format(gguf_path=gguf_path.resolve()))
subprocess.run(["ollama", "create", "fo-blog-v7", "-f", str(mf)], check=True)
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
print("fo-blog-v7 in ollama:", "fo-blog-v7" in result.stdout)
if __name__ == "__main__":
print("=== fo-blog-v7 Merge + Convert ===")
print(f"Adapter: {ADAPTER_DIR}")
merge()
gguf = convert()
register(gguf)
print(f"\nDONE: {gguf}")
PYEOF
log "Starte Merge + GGUF Konvertierung..."
V7_ADAPTER_PATH="$BEST_ADAPTER" $PYTHON "$V7_CONVERT_SCRIPT" 2>&1 | tee "$CONV_LOG"
log "✓ GGUF fertig und in Ollama registriert als fo-blog-v7"
fi
# ─── Abschluss ─────────────────────────────────────────────────
step "Pipeline ABGESCHLOSSEN"
echo ""
log "fo-blog-v7 ist bereit:"
log " Ollama: ollama run fo-blog-v7"
log " TIP API: OLLAMA_LLM_MODEL=fo-blog-v7 (schon gesetzt)"
echo ""
log "Erik updaten:"
log " 1. GGUF auf Erik hochladen (rsync oder scp)"
log " 2. Ollama auf Erik registrieren: ollama create fo-blog-v7 -f Modelfile-v7"
log " 3. TIP ecosystem.config.js: OLLAMA_LLM_MODEL=fo-blog-v7"
log " 4. pm2 restart ecosystem.config.js --update-env"
echo ""
log "Logs: $LOG_DIR/"