- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator - ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation) - ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles) - ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral) - Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry - Integration tests: claude-code-integration.test.ts (14 test cases) - PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan - Post-deployment verification procedures for health, client fallback, metrics
282 lines
12 KiB
Bash
Executable File
282 lines
12 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# run_v7_pipeline.sh — fo-blog-v7 Autopilot Pipeline
|
|
#
|
|
# Wartet bis generate_v7_data.py + ingest_ripe_apnic.py fertig,
|
|
# dann vollautomatisch:
|
|
# 1. DPO Pairs generieren
|
|
# 2. SFT Training (Phase 1)
|
|
# 3. DPO Training (Phase 2)
|
|
# 4. Merge + GGUF konvertieren
|
|
# 5. Ollama registrieren
|
|
#
|
|
# Usage:
|
|
# bash scripts/run_v7_pipeline.sh # Full auto
|
|
# bash scripts/run_v7_pipeline.sh --skip-generation
|
|
# bash scripts/run_v7_pipeline.sh --phase-from dpo
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
set -euo pipefail
|
|
|
|
FINE_TUNER_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
PYTHON="/opt/homebrew/bin/python3.13"
|
|
SCRIPTS="$FINE_TUNER_DIR/scripts"
|
|
DATA_DIR="$HOME/transceiver-training-data"
|
|
LOG_DIR="/tmp/v7-pipeline"
|
|
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
# ─── Colors ───────────────────────────────────────────────────
|
|
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'; BOLD='\033[1m'
|
|
|
|
log() { echo -e "${GREEN}[$(date +%H:%M:%S)]${NC} $*"; }
|
|
warn() { echo -e "${YELLOW}[$(date +%H:%M:%S)] ⚠${NC} $*"; }
|
|
err() { echo -e "${RED}[$(date +%H:%M:%S)] ✗${NC} $*"; }
|
|
step() { echo -e "\n${BOLD}${GREEN}══ $* ══${NC}"; }
|
|
|
|
# ─── Args ─────────────────────────────────────────────────────
|
|
SKIP_GENERATION=false
|
|
PHASE_FROM="wait" # wait | dpo | train-sft | train-dpo | convert
|
|
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--skip-generation) SKIP_GENERATION=true ;;
|
|
--phase-from=*) PHASE_FROM="${arg#*=}" ;;
|
|
esac
|
|
done
|
|
|
|
# ─── Step 0: Wait for generation processes ─────────────────────
|
|
if [[ "$SKIP_GENERATION" == "false" && "$PHASE_FROM" == "wait" ]]; then
|
|
step "Warte auf laufende Generation (generate_v7_data.py + ingest_ripe_apnic.py)"
|
|
|
|
log "Prüfe laufende Prozesse..."
|
|
GEN_RUNNING=true
|
|
INGEST_RUNNING=true
|
|
|
|
while [[ "$GEN_RUNNING" == "true" || "$INGEST_RUNNING" == "true" ]]; do
|
|
# Check processes
|
|
if ! pgrep -f "generate_v7_data.py" > /dev/null 2>&1; then
|
|
if [[ "$GEN_RUNNING" == "true" ]]; then
|
|
GENERATED=$(wc -l < "$DATA_DIR/v7-generated-sft.jsonl" 2>/dev/null || echo 0)
|
|
log "✓ generate_v7_data.py fertig — $GENERATED Blogs erzeugt"
|
|
GEN_RUNNING=false
|
|
fi
|
|
fi
|
|
|
|
if ! pgrep -f "ingest_ripe_apnic.py" > /dev/null 2>&1; then
|
|
if [[ "$INGEST_RUNNING" == "true" ]]; then
|
|
INGESTED=$(wc -l < "$DATA_DIR/v7-ripe-apnic-sft.jsonl" 2>/dev/null || echo 0)
|
|
log "✓ ingest_ripe_apnic.py fertig — $INGESTED Items verarbeitet"
|
|
INGEST_RUNNING=false
|
|
fi
|
|
fi
|
|
|
|
if [[ "$GEN_RUNNING" == "true" || "$INGEST_RUNNING" == "true" ]]; then
|
|
GEN_DONE=$(wc -l < "$DATA_DIR/v7-generated-sft.jsonl" 2>/dev/null || echo 0)
|
|
log " Fortschritt: $GEN_DONE/197 Blogs | Gen läuft: $GEN_RUNNING | Ingest läuft: $INGEST_RUNNING"
|
|
sleep 60
|
|
fi
|
|
done
|
|
|
|
log "Beide Prozesse abgeschlossen."
|
|
fi
|
|
|
|
# ─── Data summary ──────────────────────────────────────────────
|
|
step "Datenlage prüfen"
|
|
SFT_COUNT=$(wc -l < "$DATA_DIR/v7-generated-sft.jsonl" 2>/dev/null || echo 0)
|
|
RIPE_COUNT=$(wc -l < "$DATA_DIR/v7-ripe-apnic-sft.jsonl" 2>/dev/null || echo 0)
|
|
log " v7-generated-sft.jsonl: $SFT_COUNT Zeilen"
|
|
log " v7-ripe-apnic-sft.jsonl: $RIPE_COUNT Zeilen"
|
|
TOTAL_SFT=$((SFT_COUNT + RIPE_COUNT))
|
|
log " Gesamt SFT: $TOTAL_SFT Beispiele"
|
|
|
|
if [[ "$TOTAL_SFT" -lt 50 ]]; then
|
|
err "Zu wenig Trainingsdaten ($TOTAL_SFT) — mindestens 50 nötig."
|
|
err "Prüfe: tail -f /tmp/v7-generation.log"
|
|
exit 1
|
|
fi
|
|
|
|
# ─── Step 1: DPO Pairs generieren ──────────────────────────────
|
|
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" ]]; then
|
|
step "Phase 0: DPO Pairs generieren"
|
|
DPO_FILE="$DATA_DIR/v7-dpo-pairs.jsonl"
|
|
DPO_LOG="$LOG_DIR/dpo-pairs-$TIMESTAMP.log"
|
|
|
|
log "Starte generate_dpo_pairs.py..."
|
|
cd "$FINE_TUNER_DIR"
|
|
$PYTHON "$SCRIPTS/generate_dpo_pairs.py" 2>&1 | tee "$DPO_LOG"
|
|
|
|
DPO_COUNT=$(wc -l < "$DPO_FILE" 2>/dev/null || echo 0)
|
|
log "✓ DPO Pairs: $DPO_COUNT Paare erzeugt → $DPO_FILE"
|
|
fi
|
|
|
|
# ─── Step 2: SFT Training ──────────────────────────────────────
|
|
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" || "$PHASE_FROM" == "train-sft" ]]; then
|
|
step "Phase 1: SFT Training (LoRA, 4 Epochs)"
|
|
SFT_LOG="$LOG_DIR/sft-training-$TIMESTAMP.log"
|
|
|
|
log "Starte train_blog_v7.py --phase sft ..."
|
|
log "Log: $SFT_LOG"
|
|
cd "$FINE_TUNER_DIR"
|
|
$PYTHON "$SCRIPTS/train_blog_v7.py" --phase sft 2>&1 | tee "$SFT_LOG"
|
|
|
|
ADAPTER_PATH="$FINE_TUNER_DIR/adapters/fo-blog-v7/adapter"
|
|
if [[ ! -d "$ADAPTER_PATH" ]]; then
|
|
err "SFT Adapter nicht gefunden: $ADAPTER_PATH"
|
|
exit 1
|
|
fi
|
|
log "✓ SFT Adapter gespeichert: $ADAPTER_PATH"
|
|
fi
|
|
|
|
# ─── Step 3: DPO Training ──────────────────────────────────────
|
|
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" || "$PHASE_FROM" == "train-sft" || "$PHASE_FROM" == "train-dpo" ]]; then
|
|
step "Phase 2: DPO Training (1 Epoch)"
|
|
DPO_LOG="$LOG_DIR/dpo-training-$TIMESTAMP.log"
|
|
|
|
DPO_FILE="$DATA_DIR/v7-dpo-pairs.jsonl"
|
|
if [[ ! -f "$DPO_FILE" ]]; then
|
|
warn "DPO File nicht vorhanden — überspringe DPO Phase"
|
|
else
|
|
log "Starte train_blog_v7.py --phase dpo ..."
|
|
cd "$FINE_TUNER_DIR"
|
|
$PYTHON "$SCRIPTS/train_blog_v7.py" --phase dpo 2>&1 | tee "$DPO_LOG"
|
|
log "✓ DPO Training abgeschlossen"
|
|
fi
|
|
fi
|
|
|
|
# ─── Step 4: Merge + GGUF + Ollama ────────────────────────────
|
|
if [[ "$PHASE_FROM" == "wait" || "$PHASE_FROM" == "dpo" || "$PHASE_FROM" == "train-sft" || "$PHASE_FROM" == "train-dpo" || "$PHASE_FROM" == "convert" ]]; then
|
|
step "Phase 3: Merge + GGUF + Ollama Registrierung"
|
|
CONV_LOG="$LOG_DIR/convert-$TIMESTAMP.log"
|
|
|
|
# Determine which adapter to use (DPO > SFT)
|
|
DPO_ADAPTER="$FINE_TUNER_DIR/adapters/fo-blog-v7-dpo/adapter"
|
|
SFT_ADAPTER="$FINE_TUNER_DIR/adapters/fo-blog-v7/adapter"
|
|
|
|
if [[ -d "$DPO_ADAPTER" ]]; then
|
|
BEST_ADAPTER="$DPO_ADAPTER"
|
|
log "Verwende DPO Adapter: $BEST_ADAPTER"
|
|
elif [[ -d "$SFT_ADAPTER" ]]; then
|
|
BEST_ADAPTER="$SFT_ADAPTER"
|
|
warn "Kein DPO Adapter — verwende SFT Adapter: $BEST_ADAPTER"
|
|
else
|
|
err "Kein Adapter gefunden! Erst Training starten."
|
|
exit 1
|
|
fi
|
|
|
|
# Update merge_and_convert.py adapter path for v7
|
|
V7_CONVERT_SCRIPT="$SCRIPTS/merge_and_convert_v7.py"
|
|
cat > "$V7_CONVERT_SCRIPT" << 'PYEOF'
|
|
#!/usr/bin/env python3
|
|
"""Merge fo-blog-v7 adapter (SFT or DPO) into base model → GGUF → Ollama."""
|
|
import subprocess, sys, os
|
|
from pathlib import Path
|
|
|
|
FINE_TUNER = Path(__file__).parent.parent
|
|
ADAPTER_DIR = Path(os.environ.get("V7_ADAPTER_PATH",
|
|
str(FINE_TUNER / "adapters" / "fo-blog-v7-dpo" / "adapter")))
|
|
MERGED_DIR = FINE_TUNER / "models" / "fo-blog-v7" / "merged"
|
|
GGUF_DIR = FINE_TUNER / "models" / "fo-blog-v7"
|
|
GGUF_F16 = GGUF_DIR / "fo-blog-v7-f16.gguf"
|
|
GGUF_Q4 = GGUF_DIR / "fo-blog-v7.gguf"
|
|
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
|
|
CONVERT_SCRIPT = "/opt/homebrew/Cellar/llama.cpp/8680/bin/convert_hf_to_gguf.py"
|
|
QUANTIZE_BIN = "/opt/homebrew/bin/llama-quantize"
|
|
PYTHON_BIN = "/opt/homebrew/bin/python3.13"
|
|
|
|
MODELFILE = """FROM {gguf_path}
|
|
|
|
SYSTEM \"\"\"You are an expert technical writer specializing in optical networking and transceiver technology.
|
|
|
|
STRICT CONSTRAINTS:
|
|
- LENGTH: 700-1000 words ONLY. Stop at 1000 words maximum.
|
|
- STRUCTURE: 1) Hook paragraph, 2) Technical sections (## headers), 3) Exactly 3 takeaways
|
|
- TOPIC DISCIPLINE: Write ONLY about the exact topic requested. Zero drift.
|
|
- NO REPETITION: Every sentence adds new information.
|
|
- VOICE: Confident and direct. No hedging phrases.
|
|
- AUDIENCE: Network engineers and IT professionals.\"\"\"
|
|
|
|
PARAMETER temperature 0.7
|
|
PARAMETER top_p 0.9
|
|
PARAMETER top_k 40
|
|
PARAMETER repeat_penalty 1.15
|
|
PARAMETER num_predict 1500
|
|
"""
|
|
|
|
def merge():
|
|
import torch
|
|
from peft import PeftModel
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
MERGED_DIR.mkdir(parents=True, exist_ok=True)
|
|
if (MERGED_DIR / "model.safetensors").exists():
|
|
size = (MERGED_DIR / "model.safetensors").stat().st_size
|
|
if size > 10_000_000_000:
|
|
print(f" Already merged ({size/1e9:.1f} GB) — skip")
|
|
return
|
|
print(f" Loading base: {BASE_MODEL}")
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
|
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.float16,
|
|
device_map="cpu", trust_remote_code=True)
|
|
print(f" Loading adapter: {ADAPTER_DIR}")
|
|
model = PeftModel.from_pretrained(model, str(ADAPTER_DIR))
|
|
print(" Merging...")
|
|
model = model.merge_and_unload()
|
|
print(f" Saving to: {MERGED_DIR}")
|
|
model.save_pretrained(str(MERGED_DIR), safe_serialization=True)
|
|
tokenizer.save_pretrained(str(MERGED_DIR))
|
|
|
|
def convert():
|
|
GGUF_DIR.mkdir(parents=True, exist_ok=True)
|
|
import shutil
|
|
hf_cache = Path.home() / ".cache/huggingface/hub"
|
|
snap = list(hf_cache.glob("models--Qwen--Qwen2.5-7B-Instruct/snapshots/*/tokenizer.json"))
|
|
if snap:
|
|
snap_dir = snap[0].parent
|
|
for f in ["tokenizer.json","tokenizer_config.json","vocab.json","merges.txt"]:
|
|
if (snap_dir / f).exists():
|
|
shutil.copy2(snap_dir / f, MERGED_DIR / f)
|
|
if not GGUF_F16.exists():
|
|
subprocess.run([PYTHON_BIN, CONVERT_SCRIPT, str(MERGED_DIR),
|
|
"--outfile", str(GGUF_F16), "--outtype", "f16"], check=True)
|
|
if not GGUF_Q4.exists():
|
|
subprocess.run([QUANTIZE_BIN, str(GGUF_F16), str(GGUF_Q4), "Q4_K_M"], check=True)
|
|
GGUF_F16.unlink(missing_ok=True)
|
|
return GGUF_Q4
|
|
|
|
def register(gguf_path):
|
|
mf = gguf_path.parent / "Modelfile-v7"
|
|
mf.write_text(MODELFILE.format(gguf_path=gguf_path.resolve()))
|
|
subprocess.run(["ollama", "create", "fo-blog-v7", "-f", str(mf)], check=True)
|
|
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
|
|
print("fo-blog-v7 in ollama:", "fo-blog-v7" in result.stdout)
|
|
|
|
if __name__ == "__main__":
|
|
print("=== fo-blog-v7 Merge + Convert ===")
|
|
print(f"Adapter: {ADAPTER_DIR}")
|
|
merge()
|
|
gguf = convert()
|
|
register(gguf)
|
|
print(f"\nDONE: {gguf}")
|
|
PYEOF
|
|
|
|
log "Starte Merge + GGUF Konvertierung..."
|
|
V7_ADAPTER_PATH="$BEST_ADAPTER" $PYTHON "$V7_CONVERT_SCRIPT" 2>&1 | tee "$CONV_LOG"
|
|
log "✓ GGUF fertig und in Ollama registriert als fo-blog-v7"
|
|
fi
|
|
|
|
# ─── Abschluss ─────────────────────────────────────────────────
|
|
step "Pipeline ABGESCHLOSSEN"
|
|
echo ""
|
|
log "fo-blog-v7 ist bereit:"
|
|
log " Ollama: ollama run fo-blog-v7"
|
|
log " TIP API: OLLAMA_LLM_MODEL=fo-blog-v7 (schon gesetzt)"
|
|
echo ""
|
|
log "Erik updaten:"
|
|
log " 1. GGUF auf Erik hochladen (rsync oder scp)"
|
|
log " 2. Ollama auf Erik registrieren: ollama create fo-blog-v7 -f Modelfile-v7"
|
|
log " 3. TIP ecosystem.config.js: OLLAMA_LLM_MODEL=fo-blog-v7"
|
|
log " 4. pm2 restart ecosystem.config.js --update-env"
|
|
echo ""
|
|
log "Logs: $LOG_DIR/"
|