/** * Seed product_embeddings collection in Qdrant from PostgreSQL transceivers. * * Creates a rich text representation of each transceiver, embeds it via * Ollama nomic-embed-text, and stores in Qdrant with payload filters. * * Run: npx tsx packages/api/src/embeddings/seed-products.ts */ import { pool } from "../db/client"; import { embed, upsertPoints } from "./client"; function transceiverToText(row: Record): string { const parts = [ row.standard_name && `${row.standard_name}`, row.form_factor && `Form factor: ${row.form_factor}`, row.speed && `Speed: ${row.speed}`, row.reach_label && `Reach: ${row.reach_label}`, row.fiber_type && `Fiber: ${row.fiber_type}`, row.connector && `Connector: ${row.connector}`, row.wavelengths && `Wavelengths: ${row.wavelengths}`, row.wdm_type && `WDM: ${row.wdm_type}`, row.category && `Category: ${row.category}`, row.coherent && `Coherent optics`, row.power_consumption_w && `Power: ${row.power_consumption_w}W`, row.temp_range && `Temperature: ${row.temp_range}`, row.vendor_name && `Vendor: ${row.vendor_name}`, row.description && `${row.description}`, ].filter(Boolean); return parts.join(". "); } async function main() { console.log("=== Seeding product_embeddings ===\n"); const result = await pool.query( `SELECT t.id, t.slug, t.standard_name, t.form_factor, t.speed, t.speed_gbps, t.reach_label, t.reach_meters, t.fiber_type, t.connector, t.wavelengths, t.wdm_type, t.coherent, t.power_consumption_w, t.temp_range, t.category, t.notes as description, v.name as vendor_name FROM transceivers t LEFT JOIN vendors v ON v.id = t.vendor_id ORDER BY t.speed_gbps DESC` ); console.log(`Found ${result.rows.length} transceivers to embed\n`); const BATCH_SIZE = 10; let total = 0; for (let i = 0; i < result.rows.length; i += BATCH_SIZE) { const batch = result.rows.slice(i, i + BATCH_SIZE); const points = await Promise.all( batch.map(async (row) => { const text = transceiverToText(row); const vector = await embed(text); return { id: row.id, vector, payload: { slug: row.slug, standard_name: row.standard_name || "", form_factor: row.form_factor || "", speed: row.speed || "", speed_gbps: parseFloat(row.speed_gbps) || 0, reach_label: row.reach_label || "", reach_meters: row.reach_meters || 0, fiber_type: row.fiber_type || "", connector: row.connector || "", wdm_type: row.wdm_type || "", category: row.category || "", coherent: row.coherent || false, vendor: row.vendor_name || "", text, }, }; }) ); await upsertPoints("product_embeddings", points); total += points.length; console.log(` Embedded ${total}/${result.rows.length} transceivers`); } console.log(`\n=== Done: ${total} products embedded ===`); await pool.end(); } main().catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });