285 lines
8.7 KiB
TypeScript

/**
* Semantic search API routes (Qdrant vector search)
*
* GET /api/search?q=<query>&collection=<col>&limit=<n>
* GET /api/search/products?q=<query>&form_factor=&speed_gbps=&fiber_type=
* GET /api/search/documents?q=<query>&doc_type=&vendor=
* GET /api/search/news?q=<query>&source=
*/
import { Router, Request, Response } from "express";
import { semanticSearch, getCollectionInfo, CollectionName } from "../embeddings/client";
import { searchTransceivers } from "../db/queries";
export const searchRouter = Router();
const VALID_COLLECTIONS: CollectionName[] = [
"product_embeddings",
"datasheet_chunks",
"faq_embeddings",
"manual_chunks",
"troubleshooting_embeddings",
"news_embeddings",
];
const q = (p: string, req: Request): string | undefined =>
req.query[p] ? String(req.query[p]) : undefined;
// GET /api/search — Generic semantic search across any collection
searchRouter.get("/", async (req: Request, res: Response) => {
const query = q("q", req);
const collection = (q("collection", req) || "product_embeddings") as CollectionName;
const limit = parseInt(q("limit", req) || "10");
if (!query) {
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
return;
}
if (!VALID_COLLECTIONS.includes(collection)) {
res.status(400).json({
success: false,
error: `Invalid collection. Valid: ${VALID_COLLECTIONS.join(", ")}`,
});
return;
}
try {
let results: any[];
let usedFallback = false;
if (collection === "product_embeddings") {
const fts = await searchTransceivers({ q: query, limit });
results = (((fts as any).data) || []).map((t: any) => ({ id: t.id, score: 0.5, payload: t }));
usedFallback = true;
} else {
results = await semanticSearch(collection, query, limit);
}
res.json({
success: true,
query,
collection,
fallback: usedFallback ? "fts" : undefined,
results: results.map((r) => ({
id: r.id,
score: Math.round(r.score * 1000) / 1000,
...r.payload,
})),
count: results.length,
});
} catch (err) {
if (collection === "product_embeddings") {
try {
const fts = await searchTransceivers({ q: query, limit });
const results = (((fts as any).data) || []).map((t: any) => ({ id: t.id, score: 0.5, ...t }));
res.json({ success: true, query, collection, fallback: "fts", results, count: results.length });
return;
} catch (e2) { /* fall through */ }
}
res.status(503).json({
success: false,
error: "Vector search unavailable",
detail: (err as Error).message,
});
}
});
// GET /api/search/products — Product-specific semantic search with filters
searchRouter.get("/products", async (req: Request, res: Response) => {
const query = q("q", req);
const limit = parseInt(q("limit", req) || "10");
const formFactor = q("form_factor", req);
const speedGbps = q("speed_gbps", req);
const fiberType = q("fiber_type", req);
const wdmType = q("wdm_type", req);
if (!query) {
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
return;
}
// Build Qdrant payload filter
const mustConditions: Array<Record<string, unknown>> = [];
if (formFactor) {
mustConditions.push({ key: "form_factor", match: { value: formFactor.toUpperCase() } });
}
if (speedGbps) {
mustConditions.push({ key: "speed_gbps", match: { value: parseFloat(speedGbps) } });
}
if (fiberType) {
mustConditions.push({ key: "fiber_type", match: { value: fiberType.toUpperCase() } });
}
if (wdmType) {
mustConditions.push({ key: "wdm_type", match: { value: wdmType.toUpperCase() } });
}
const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined;
try {
const results = await semanticSearch("product_embeddings", query, limit, filter);
res.json({
success: true,
query,
filters: { formFactor, speedGbps, fiberType, wdmType },
results: results.map((r) => ({
id: r.id,
score: Math.round(r.score * 1000) / 1000,
slug: r.payload.slug,
standard_name: r.payload.standard_name,
form_factor: r.payload.form_factor,
speed: r.payload.speed,
reach: r.payload.reach_label,
fiber_type: r.payload.fiber_type,
connector: r.payload.connector,
category: r.payload.category,
vendor: r.payload.vendor,
})),
count: results.length,
});
} catch (err) {
res.status(503).json({
success: false,
error: "Vector search unavailable",
detail: (err as Error).message,
});
}
});
// GET /api/search/documents — Search datasheets and manuals
searchRouter.get("/documents", async (req: Request, res: Response) => {
const query = q("q", req);
const limit = parseInt(q("limit", req) || "10");
const docType = q("doc_type", req);
const vendor = q("vendor", req);
const collection = (q("collection", req) || "datasheet_chunks") as CollectionName;
if (!query) {
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
return;
}
if (!["datasheet_chunks", "manual_chunks"].includes(collection)) {
res.status(400).json({
success: false,
error: "collection must be 'datasheet_chunks' or 'manual_chunks'",
});
return;
}
const mustConditions: Array<Record<string, unknown>> = [];
if (docType) {
mustConditions.push({ key: "document_type", match: { value: docType.toLowerCase() } });
}
if (vendor) {
mustConditions.push({ key: "vendor", match: { value: vendor } });
}
const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined;
try {
const results = await semanticSearch(collection, query, limit, filter);
// Group by document for cleaner output
const byDocument = new Map<string, { title: string; vendor: string; source_url: string; chunks: Array<{ score: number; heading: string; text: string; chunk_index: number }> }>();
for (const r of results) {
const docId = String(r.payload.document_id || r.id);
if (!byDocument.has(docId)) {
byDocument.set(docId, {
title: String(r.payload.title || ""),
vendor: String(r.payload.vendor || ""),
source_url: String(r.payload.source_url || ""),
chunks: [],
});
}
byDocument.get(docId)!.chunks.push({
score: Math.round(r.score * 1000) / 1000,
heading: String(r.payload.section_heading || ""),
text: String(r.payload.text || "").slice(0, 500),
chunk_index: Number(r.payload.chunk_index || 0),
});
}
res.json({
success: true,
query,
collection,
filters: { docType, vendor },
documents: Array.from(byDocument.values()),
totalChunks: results.length,
});
} catch (err) {
res.status(503).json({
success: false,
error: "Vector search unavailable",
detail: (err as Error).message,
});
}
});
// GET /api/search/news — Search news articles
searchRouter.get("/news", async (req: Request, res: Response) => {
const query = q("q", req);
const limit = parseInt(q("limit", req) || "10");
const source = q("source", req);
if (!query) {
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
return;
}
const mustConditions: Array<Record<string, unknown>> = [];
if (source) {
mustConditions.push({ key: "source", match: { value: source } });
}
const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined;
try {
const results = await semanticSearch("news_embeddings", query, limit, filter);
res.json({
success: true,
query,
filters: { source },
results: results.map((r) => ({
id: r.id,
score: Math.round(r.score * 1000) / 1000,
title: r.payload.title,
url: r.payload.url,
source: r.payload.source,
summary: r.payload.summary,
published_at: r.payload.published_at,
})),
count: results.length,
});
} catch (err) {
res.status(503).json({
success: false,
error: "Vector search unavailable",
detail: (err as Error).message,
});
}
});
// GET /api/search/stats — Collection statistics
searchRouter.get("/stats", async (_req: Request, res: Response) => {
try {
const stats = await Promise.all(
VALID_COLLECTIONS.map(async (col) => {
try {
const info = await getCollectionInfo(col);
return { collection: col, ...info };
} catch {
return { collection: col, pointsCount: 0, vectorsCount: 0, error: "unavailable" };
}
})
);
res.json({ success: true, collections: stats });
} catch (err) {
res.status(503).json({
success: false,
error: "Qdrant unavailable",
detail: (err as Error).message,
});
}
});