/**
 * Seed news_embeddings collection from PostgreSQL news_articles table.
 *
 * Run: npx tsx packages/api/src/embeddings/seed-news.ts
 */
import { pool } from "../db/client";
import { embed, upsertPoints } from "./client";

function articleToText(row: Record<string, unknown>): string {
  const parts = [
    row.title && `${row.title}`,
    row.source && `Source: ${row.source}`,
    row.category && `Category: ${row.category}`,
    row.summary && `${row.summary}`,
    row.full_text && `${String(row.full_text).slice(0, 500)}`,
  ].filter(Boolean);

  return parts.join(". ");
}

async function main() {
  console.log("=== Seeding news_embeddings ===\n");

  const result = await pool.query(
    `SELECT id, title, source_url, source, category, summary, full_text, published_at, relevance_score
     FROM news_articles
     ORDER BY published_at DESC
     LIMIT 500`,
  );

  console.log(`Found ${result.rows.length} news articles to embed\n`);

  if (result.rows.length === 0) {
    console.log("No articles found. Run the news scraper first.");
    await pool.end();
    return;
  }

  const BATCH_SIZE = 10;
  let total = 0;

  for (let i = 0; i < result.rows.length; i += BATCH_SIZE) {
    const batch = result.rows.slice(i, i + BATCH_SIZE);

    const points = await Promise.all(
      batch.map(async (row) => {
        const text = articleToText(row);
        const vector = await embed(text);

        return {
          id: String(row.id),
          vector,
          payload: {
            title: row.title || "",
            url: row.source_url || "",
            source: row.source || "",
            category: row.category || "",
            summary: row.summary || "",
            published_at: row.published_at ? new Date(row.published_at).toISOString() : "",
            relevance_score: row.relevance_score || 0,
            text,
          },
        };
      }),
    );

    await upsertPoints("news_embeddings", points);
    total += points.length;
    console.log(`  Embedded ${total}/${result.rows.length} articles`);
  }

  console.log(`\n=== Done: ${total} articles embedded ===`);
  await pool.end();
}

main().catch((err) => {
  console.error("Fatal:", err);
  pool.end();
  process.exit(1);
});