/** * Semantic search API routes (Qdrant vector search) * * GET /api/search?q=&collection=&limit= * GET /api/search/products?q=&form_factor=&speed_gbps=&fiber_type= * GET /api/search/documents?q=&doc_type=&vendor= * GET /api/search/news?q=&source= */ import { Router, Request, Response } from "express"; import { semanticSearch, getCollectionInfo, CollectionName } from "../embeddings/client"; import { searchTransceivers } from "../db/queries"; export const searchRouter = Router(); const VALID_COLLECTIONS: CollectionName[] = [ "product_embeddings", "datasheet_chunks", "faq_embeddings", "manual_chunks", "troubleshooting_embeddings", "news_embeddings", ]; const q = (p: string, req: Request): string | undefined => req.query[p] ? String(req.query[p]) : undefined; // GET /api/search — Generic semantic search across any collection searchRouter.get("/", async (req: Request, res: Response) => { const query = q("q", req); const collection = (q("collection", req) || "product_embeddings") as CollectionName; const limit = parseInt(q("limit", req) || "10"); if (!query) { res.status(400).json({ success: false, error: "Missing 'q' parameter" }); return; } if (!VALID_COLLECTIONS.includes(collection)) { res.status(400).json({ success: false, error: `Invalid collection. Valid: ${VALID_COLLECTIONS.join(", ")}`, }); return; } try { let results: any[]; let usedFallback = false; if (collection === "product_embeddings") { const fts = await searchTransceivers({ q: query, limit }); results = (((fts as any).data) || []).map((t: any) => ({ id: t.id, score: 0.5, payload: t })); usedFallback = true; } else { results = await semanticSearch(collection, query, limit); } res.json({ success: true, query, collection, fallback: usedFallback ? "fts" : undefined, results: results.map((r) => ({ id: r.id, score: Math.round(r.score * 1000) / 1000, ...r.payload, })), count: results.length, }); } catch (err) { if (collection === "product_embeddings") { try { const fts = await searchTransceivers({ q: query, limit }); const results = (((fts as any).data) || []).map((t: any) => ({ id: t.id, score: 0.5, ...t })); res.json({ success: true, query, collection, fallback: "fts", results, count: results.length }); return; } catch (e2) { /* fall through */ } } res.status(503).json({ success: false, error: "Vector search unavailable", detail: (err as Error).message, }); } }); // GET /api/search/products — Product-specific semantic search with filters searchRouter.get("/products", async (req: Request, res: Response) => { const query = q("q", req); const limit = parseInt(q("limit", req) || "10"); const formFactor = q("form_factor", req); const speedGbps = q("speed_gbps", req); const fiberType = q("fiber_type", req); const wdmType = q("wdm_type", req); if (!query) { res.status(400).json({ success: false, error: "Missing 'q' parameter" }); return; } // Build Qdrant payload filter const mustConditions: Array> = []; if (formFactor) { mustConditions.push({ key: "form_factor", match: { value: formFactor.toUpperCase() } }); } if (speedGbps) { mustConditions.push({ key: "speed_gbps", match: { value: parseFloat(speedGbps) } }); } if (fiberType) { mustConditions.push({ key: "fiber_type", match: { value: fiberType.toUpperCase() } }); } if (wdmType) { mustConditions.push({ key: "wdm_type", match: { value: wdmType.toUpperCase() } }); } const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined; try { const results = await semanticSearch("product_embeddings", query, limit, filter); res.json({ success: true, query, filters: { formFactor, speedGbps, fiberType, wdmType }, results: results.map((r) => ({ id: r.id, score: Math.round(r.score * 1000) / 1000, slug: r.payload.slug, standard_name: r.payload.standard_name, form_factor: r.payload.form_factor, speed: r.payload.speed, reach: r.payload.reach_label, fiber_type: r.payload.fiber_type, connector: r.payload.connector, category: r.payload.category, vendor: r.payload.vendor, })), count: results.length, }); } catch (err) { res.status(503).json({ success: false, error: "Vector search unavailable", detail: (err as Error).message, }); } }); // GET /api/search/documents — Search datasheets and manuals searchRouter.get("/documents", async (req: Request, res: Response) => { const query = q("q", req); const limit = parseInt(q("limit", req) || "10"); const docType = q("doc_type", req); const vendor = q("vendor", req); const collection = (q("collection", req) || "datasheet_chunks") as CollectionName; if (!query) { res.status(400).json({ success: false, error: "Missing 'q' parameter" }); return; } if (!["datasheet_chunks", "manual_chunks"].includes(collection)) { res.status(400).json({ success: false, error: "collection must be 'datasheet_chunks' or 'manual_chunks'", }); return; } const mustConditions: Array> = []; if (docType) { mustConditions.push({ key: "document_type", match: { value: docType.toLowerCase() } }); } if (vendor) { mustConditions.push({ key: "vendor", match: { value: vendor } }); } const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined; try { const results = await semanticSearch(collection, query, limit, filter); // Group by document for cleaner output const byDocument = new Map }>(); for (const r of results) { const docId = String(r.payload.document_id || r.id); if (!byDocument.has(docId)) { byDocument.set(docId, { title: String(r.payload.title || ""), vendor: String(r.payload.vendor || ""), source_url: String(r.payload.source_url || ""), chunks: [], }); } byDocument.get(docId)!.chunks.push({ score: Math.round(r.score * 1000) / 1000, heading: String(r.payload.section_heading || ""), text: String(r.payload.text || "").slice(0, 500), chunk_index: Number(r.payload.chunk_index || 0), }); } res.json({ success: true, query, collection, filters: { docType, vendor }, documents: Array.from(byDocument.values()), totalChunks: results.length, }); } catch (err) { res.status(503).json({ success: false, error: "Vector search unavailable", detail: (err as Error).message, }); } }); // GET /api/search/news — Search news articles searchRouter.get("/news", async (req: Request, res: Response) => { const query = q("q", req); const limit = parseInt(q("limit", req) || "10"); const source = q("source", req); if (!query) { res.status(400).json({ success: false, error: "Missing 'q' parameter" }); return; } const mustConditions: Array> = []; if (source) { mustConditions.push({ key: "source", match: { value: source } }); } const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined; try { const results = await semanticSearch("news_embeddings", query, limit, filter); res.json({ success: true, query, filters: { source }, results: results.map((r) => ({ id: r.id, score: Math.round(r.score * 1000) / 1000, title: r.payload.title, url: r.payload.url, source: r.payload.source, summary: r.payload.summary, published_at: r.payload.published_at, })), count: results.length, }); } catch (err) { res.status(503).json({ success: false, error: "Vector search unavailable", detail: (err as Error).message, }); } }); // GET /api/search/stats — Collection statistics searchRouter.get("/stats", async (_req: Request, res: Response) => { try { const stats = await Promise.all( VALID_COLLECTIONS.map(async (col) => { try { const info = await getCollectionInfo(col); return { collection: col, ...info }; } catch { return { collection: col, pointsCount: 0, vectorsCount: 0, error: "unavailable" }; } }) ); res.json({ success: true, collections: stats }); } catch (err) { res.status(503).json({ success: false, error: "Qdrant unavailable", detail: (err as Error).message, }); } });