/** * Seed FAQ and troubleshooting embeddings in Qdrant from knowledge_base. * * Run: npx tsx packages/api/src/embeddings/seed-knowledge-base.ts */ import { pool } from "../db/client"; import { embed, upsertPoints, type CollectionName } from "./client"; function kbToText(row: Record): string { const parts = [ `Q: ${row.question}`, `A: ${row.answer}`, row.subcategory && `Topic: ${row.subcategory}`, row.applies_to_form_factors && `Form factors: ${(row.applies_to_form_factors as string[]).join(", ")}`, row.applies_to_speeds && `Speeds: ${(row.applies_to_speeds as string[]).join(", ")}`, ].filter(Boolean); return parts.join(". "); } function collectionForCategory(category: string): CollectionName { if (category === "troubleshooting" || category === "known_issue") { return "troubleshooting_embeddings"; } return "faq_embeddings"; } async function main(): Promise { console.log("=== Seeding knowledge_base embeddings ===\n"); const result = await pool.query( `SELECT id, category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags FROM knowledge_base ORDER BY category, created_at` ); console.log(`Found ${result.rows.length} knowledge base entries\n`); const BATCH_SIZE = 5; let faqCount = 0; let troubleCount = 0; for (let i = 0; i < result.rows.length; i += BATCH_SIZE) { const batch = result.rows.slice(i, i + BATCH_SIZE); // Group by collection const byCollection = new Map(); for (const row of batch) { const col = collectionForCategory(row.category as string); if (!byCollection.has(col)) byCollection.set(col, []); byCollection.get(col)!.push(row); } for (const [collection, rows] of byCollection) { const points = await Promise.all( rows.map(async (row) => { const text = kbToText(row); const vector = await embed(text); return { id: row.id, vector, payload: { question: row.question || "", answer: row.answer || "", category: row.category || "", subcategory: row.subcategory || "", symptom: row.question || "", cause: row.subcategory || "", solution: row.answer || "", severity: row.severity || "info", form_factors: row.applies_to_form_factors || [], speeds: row.applies_to_speeds || [], tags: row.tags || [], text, }, }; }) ); await upsertPoints(collection, points); if (collection === "faq_embeddings") faqCount += points.length; else troubleCount += points.length; } console.log(` Embedded ${Math.min(i + BATCH_SIZE, result.rows.length)}/${result.rows.length} entries (FAQ: ${faqCount}, Troubleshooting: ${troubleCount})`); } console.log(`\n=== Done: ${faqCount} FAQ + ${troubleCount} troubleshooting embedded ===`); await pool.end(); } main().catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });