- ADR-0001: Multi-Agent Coworking Architecture with LLM Gateway Orchestrator - ADR-0002: Tier Assignment Strategy for Model Selection (cost-first escalation) - ADR-0003: Confidence Gate Thresholds & Learning Cycle Intervals (6h/12h/24h cycles) - ADR-0004: External Provider Fallback Chain Ordering (Cerebras → Groq → Mistral) - Enhanced client SDK: Offline Ollama fallback, health checks, exponential backoff retry - Integration tests: claude-code-integration.test.ts (14 test cases) - PHASE_2F_DEPLOYMENT.md: Pre-deployment checklist, automated deploy, rollback plan - Post-deployment verification procedures for health, client fallback, metrics
76 lines
2.2 KiB
Bash
Executable File
76 lines
2.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# Load blog-training-alpaca.jsonl into PostgreSQL learning_corpus table
|
|
|
|
JSONL_FILE="/Users/renefichtmueller/Desktop/Claude Code/llm-gateway/packages/fine-tuner/data/blog-training-alpaca.jsonl"
|
|
DB_URL="postgresql://llm:llm_secure_2026@127.0.0.1:15432/llm_gateway"
|
|
TASK_TYPE="tip_blog"
|
|
|
|
# Parse connection string
|
|
DB_HOST=$(echo "$DB_URL" | sed -n 's/.*@\([^:]*\).*/\1/p')
|
|
DB_PORT=$(echo "$DB_URL" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p')
|
|
DB_NAME=$(echo "$DB_URL" | sed -n 's/.*\/\(.*\)$/\1/p')
|
|
DB_USER="llm"
|
|
|
|
echo "🔄 Loading blog training data into PostgreSQL..."
|
|
echo " Host: $DB_HOST:$DB_PORT"
|
|
echo " Database: $DB_NAME"
|
|
echo " JSONL: $JSONL_FILE"
|
|
echo ""
|
|
|
|
# Create temporary SQL file
|
|
TMPFILE=$(mktemp)
|
|
cat > "$TMPFILE" << 'EOF'
|
|
BEGIN;
|
|
|
|
-- Count before
|
|
SELECT COUNT(*) as "Rows before" FROM learning_corpus WHERE task_type = :task_type;
|
|
|
|
-- Load from JSONL (using Python/jq approach)
|
|
EOF
|
|
|
|
# Use Python to parse JSONL and generate SQL
|
|
python3 << PYEOF
|
|
import json
|
|
import sys
|
|
|
|
jsonl_path = "$JSONL_FILE"
|
|
task_type = "$TASK_TYPE"
|
|
|
|
print("-- Insert blog training samples")
|
|
print("INSERT INTO learning_corpus (task_type, prompt_text, completion_text, quality_score, tags) VALUES")
|
|
|
|
with open(jsonl_path) as f:
|
|
samples = [json.loads(line) for line in f if line.strip()]
|
|
|
|
for i, sample in enumerate(samples):
|
|
prompt = sample.get('instruction', '').replace("'", "''")
|
|
if sample.get('input'):
|
|
prompt += f"\n{sample['input'].replace(\"'\", \"''\")}".replace('\n', '\\n')
|
|
|
|
output = sample.get('output', '').replace("'", "''").replace('\n', '\\n')
|
|
quality = sample.get('quality_score', 8.0)
|
|
source = sample.get('source', 'unknown').replace("'", "''")
|
|
|
|
# SQL VALUES clause
|
|
values = f"('{task_type}', '{prompt}', '{output}', {quality}, ARRAY['{source}', '{task_type}'])"
|
|
|
|
if i < len(samples) - 1:
|
|
print(f" {values},")
|
|
else:
|
|
print(f" {values};")
|
|
|
|
print("\n-- Count after")
|
|
print(f"SELECT COUNT(*) as \"Rows after\" FROM learning_corpus WHERE task_type = '{task_type}';")
|
|
print("COMMIT;")
|
|
|
|
PYEOF >> "$TMPFILE"
|
|
|
|
# Execute SQL
|
|
PGPASSWORD="llm_secure_2026" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -f "$TMPFILE"
|
|
|
|
# Cleanup
|
|
rm "$TMPFILE"
|
|
|
|
echo ""
|
|
echo "✅ Load complete!"
|