#!/bin/bash # Load blog-training-alpaca.jsonl into PostgreSQL learning_corpus table JSONL_FILE="/Users/renefichtmueller/Desktop/Claude Code/llm-gateway/packages/fine-tuner/data/blog-training-alpaca.jsonl" DB_URL="postgresql://llm:llm_secure_2026@127.0.0.1:15432/llm_gateway" TASK_TYPE="tip_blog" # Parse connection string DB_HOST=$(echo "$DB_URL" | sed -n 's/.*@\([^:]*\).*/\1/p') DB_PORT=$(echo "$DB_URL" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p') DB_NAME=$(echo "$DB_URL" | sed -n 's/.*\/\(.*\)$/\1/p') DB_USER="llm" echo "🔄 Loading blog training data into PostgreSQL..." echo " Host: $DB_HOST:$DB_PORT" echo " Database: $DB_NAME" echo " JSONL: $JSONL_FILE" echo "" # Create temporary SQL file TMPFILE=$(mktemp) cat > "$TMPFILE" << 'EOF' BEGIN; -- Count before SELECT COUNT(*) as "Rows before" FROM learning_corpus WHERE task_type = :task_type; -- Load from JSONL (using Python/jq approach) EOF # Use Python to parse JSONL and generate SQL python3 << PYEOF import json import sys jsonl_path = "$JSONL_FILE" task_type = "$TASK_TYPE" print("-- Insert blog training samples") print("INSERT INTO learning_corpus (task_type, prompt_text, completion_text, quality_score, tags) VALUES") with open(jsonl_path) as f: samples = [json.loads(line) for line in f if line.strip()] for i, sample in enumerate(samples): prompt = sample.get('instruction', '').replace("'", "''") if sample.get('input'): prompt += f"\n{sample['input'].replace(\"'\", \"''\")}".replace('\n', '\\n') output = sample.get('output', '').replace("'", "''").replace('\n', '\\n') quality = sample.get('quality_score', 8.0) source = sample.get('source', 'unknown').replace("'", "''") # SQL VALUES clause values = f"('{task_type}', '{prompt}', '{output}', {quality}, ARRAY['{source}', '{task_type}'])" if i < len(samples) - 1: print(f" {values},") else: print(f" {values};") print("\n-- Count after") print(f"SELECT COUNT(*) as \"Rows after\" FROM learning_corpus WHERE task_type = '{task_type}';") print("COMMIT;") PYEOF >> "$TMPFILE" # Execute SQL PGPASSWORD="llm_secure_2026" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -f "$TMPFILE" # Cleanup rm "$TMPFILE" echo "" echo "✅ Load complete!"