285 lines
8.7 KiB
TypeScript
285 lines
8.7 KiB
TypeScript
/**
|
|
* Semantic search API routes (Qdrant vector search)
|
|
*
|
|
* GET /api/search?q=<query>&collection=<col>&limit=<n>
|
|
* GET /api/search/products?q=<query>&form_factor=&speed_gbps=&fiber_type=
|
|
* GET /api/search/documents?q=<query>&doc_type=&vendor=
|
|
* GET /api/search/news?q=<query>&source=
|
|
*/
|
|
import { Router, Request, Response } from "express";
|
|
import { semanticSearch, getCollectionInfo, CollectionName } from "../embeddings/client";
|
|
import { searchTransceivers } from "../db/queries";
|
|
|
|
export const searchRouter = Router();
|
|
|
|
const VALID_COLLECTIONS: CollectionName[] = [
|
|
"product_embeddings",
|
|
"datasheet_chunks",
|
|
"faq_embeddings",
|
|
"manual_chunks",
|
|
"troubleshooting_embeddings",
|
|
"news_embeddings",
|
|
];
|
|
|
|
const q = (p: string, req: Request): string | undefined =>
|
|
req.query[p] ? String(req.query[p]) : undefined;
|
|
|
|
// GET /api/search — Generic semantic search across any collection
|
|
searchRouter.get("/", async (req: Request, res: Response) => {
|
|
const query = q("q", req);
|
|
const collection = (q("collection", req) || "product_embeddings") as CollectionName;
|
|
const limit = parseInt(q("limit", req) || "10");
|
|
|
|
if (!query) {
|
|
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
|
|
return;
|
|
}
|
|
|
|
if (!VALID_COLLECTIONS.includes(collection)) {
|
|
res.status(400).json({
|
|
success: false,
|
|
error: `Invalid collection. Valid: ${VALID_COLLECTIONS.join(", ")}`,
|
|
});
|
|
return;
|
|
}
|
|
|
|
try {
|
|
let results: any[];
|
|
let usedFallback = false;
|
|
if (collection === "product_embeddings") {
|
|
const fts = await searchTransceivers({ q: query, limit });
|
|
results = (((fts as any).data) || []).map((t: any) => ({ id: t.id, score: 0.5, payload: t }));
|
|
usedFallback = true;
|
|
} else {
|
|
results = await semanticSearch(collection, query, limit);
|
|
}
|
|
res.json({
|
|
success: true,
|
|
query,
|
|
collection,
|
|
fallback: usedFallback ? "fts" : undefined,
|
|
results: results.map((r) => ({
|
|
id: r.id,
|
|
score: Math.round(r.score * 1000) / 1000,
|
|
...r.payload,
|
|
})),
|
|
count: results.length,
|
|
});
|
|
} catch (err) {
|
|
if (collection === "product_embeddings") {
|
|
try {
|
|
const fts = await searchTransceivers({ q: query, limit });
|
|
const results = (((fts as any).data) || []).map((t: any) => ({ id: t.id, score: 0.5, ...t }));
|
|
res.json({ success: true, query, collection, fallback: "fts", results, count: results.length });
|
|
return;
|
|
} catch (e2) { /* fall through */ }
|
|
}
|
|
res.status(503).json({
|
|
success: false,
|
|
error: "Vector search unavailable",
|
|
detail: (err as Error).message,
|
|
});
|
|
}
|
|
});
|
|
|
|
// GET /api/search/products — Product-specific semantic search with filters
|
|
searchRouter.get("/products", async (req: Request, res: Response) => {
|
|
const query = q("q", req);
|
|
const limit = parseInt(q("limit", req) || "10");
|
|
const formFactor = q("form_factor", req);
|
|
const speedGbps = q("speed_gbps", req);
|
|
const fiberType = q("fiber_type", req);
|
|
const wdmType = q("wdm_type", req);
|
|
|
|
if (!query) {
|
|
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
|
|
return;
|
|
}
|
|
|
|
// Build Qdrant payload filter
|
|
const mustConditions: Array<Record<string, unknown>> = [];
|
|
if (formFactor) {
|
|
mustConditions.push({ key: "form_factor", match: { value: formFactor.toUpperCase() } });
|
|
}
|
|
if (speedGbps) {
|
|
mustConditions.push({ key: "speed_gbps", match: { value: parseFloat(speedGbps) } });
|
|
}
|
|
if (fiberType) {
|
|
mustConditions.push({ key: "fiber_type", match: { value: fiberType.toUpperCase() } });
|
|
}
|
|
if (wdmType) {
|
|
mustConditions.push({ key: "wdm_type", match: { value: wdmType.toUpperCase() } });
|
|
}
|
|
|
|
const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined;
|
|
|
|
try {
|
|
const results = await semanticSearch("product_embeddings", query, limit, filter);
|
|
res.json({
|
|
success: true,
|
|
query,
|
|
filters: { formFactor, speedGbps, fiberType, wdmType },
|
|
results: results.map((r) => ({
|
|
id: r.id,
|
|
score: Math.round(r.score * 1000) / 1000,
|
|
slug: r.payload.slug,
|
|
standard_name: r.payload.standard_name,
|
|
form_factor: r.payload.form_factor,
|
|
speed: r.payload.speed,
|
|
reach: r.payload.reach_label,
|
|
fiber_type: r.payload.fiber_type,
|
|
connector: r.payload.connector,
|
|
category: r.payload.category,
|
|
vendor: r.payload.vendor,
|
|
})),
|
|
count: results.length,
|
|
});
|
|
} catch (err) {
|
|
res.status(503).json({
|
|
success: false,
|
|
error: "Vector search unavailable",
|
|
detail: (err as Error).message,
|
|
});
|
|
}
|
|
});
|
|
|
|
// GET /api/search/documents — Search datasheets and manuals
|
|
searchRouter.get("/documents", async (req: Request, res: Response) => {
|
|
const query = q("q", req);
|
|
const limit = parseInt(q("limit", req) || "10");
|
|
const docType = q("doc_type", req);
|
|
const vendor = q("vendor", req);
|
|
const collection = (q("collection", req) || "datasheet_chunks") as CollectionName;
|
|
|
|
if (!query) {
|
|
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
|
|
return;
|
|
}
|
|
|
|
if (!["datasheet_chunks", "manual_chunks"].includes(collection)) {
|
|
res.status(400).json({
|
|
success: false,
|
|
error: "collection must be 'datasheet_chunks' or 'manual_chunks'",
|
|
});
|
|
return;
|
|
}
|
|
|
|
const mustConditions: Array<Record<string, unknown>> = [];
|
|
if (docType) {
|
|
mustConditions.push({ key: "document_type", match: { value: docType.toLowerCase() } });
|
|
}
|
|
if (vendor) {
|
|
mustConditions.push({ key: "vendor", match: { value: vendor } });
|
|
}
|
|
|
|
const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined;
|
|
|
|
try {
|
|
const results = await semanticSearch(collection, query, limit, filter);
|
|
|
|
// Group by document for cleaner output
|
|
const byDocument = new Map<string, { title: string; vendor: string; source_url: string; chunks: Array<{ score: number; heading: string; text: string; chunk_index: number }> }>();
|
|
|
|
for (const r of results) {
|
|
const docId = String(r.payload.document_id || r.id);
|
|
if (!byDocument.has(docId)) {
|
|
byDocument.set(docId, {
|
|
title: String(r.payload.title || ""),
|
|
vendor: String(r.payload.vendor || ""),
|
|
source_url: String(r.payload.source_url || ""),
|
|
chunks: [],
|
|
});
|
|
}
|
|
byDocument.get(docId)!.chunks.push({
|
|
score: Math.round(r.score * 1000) / 1000,
|
|
heading: String(r.payload.section_heading || ""),
|
|
text: String(r.payload.text || "").slice(0, 500),
|
|
chunk_index: Number(r.payload.chunk_index || 0),
|
|
});
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
query,
|
|
collection,
|
|
filters: { docType, vendor },
|
|
documents: Array.from(byDocument.values()),
|
|
totalChunks: results.length,
|
|
});
|
|
} catch (err) {
|
|
res.status(503).json({
|
|
success: false,
|
|
error: "Vector search unavailable",
|
|
detail: (err as Error).message,
|
|
});
|
|
}
|
|
});
|
|
|
|
// GET /api/search/news — Search news articles
|
|
searchRouter.get("/news", async (req: Request, res: Response) => {
|
|
const query = q("q", req);
|
|
const limit = parseInt(q("limit", req) || "10");
|
|
const source = q("source", req);
|
|
|
|
if (!query) {
|
|
res.status(400).json({ success: false, error: "Missing 'q' parameter" });
|
|
return;
|
|
}
|
|
|
|
const mustConditions: Array<Record<string, unknown>> = [];
|
|
if (source) {
|
|
mustConditions.push({ key: "source", match: { value: source } });
|
|
}
|
|
|
|
const filter = mustConditions.length > 0 ? { must: mustConditions } : undefined;
|
|
|
|
try {
|
|
const results = await semanticSearch("news_embeddings", query, limit, filter);
|
|
res.json({
|
|
success: true,
|
|
query,
|
|
filters: { source },
|
|
results: results.map((r) => ({
|
|
id: r.id,
|
|
score: Math.round(r.score * 1000) / 1000,
|
|
title: r.payload.title,
|
|
url: r.payload.url,
|
|
source: r.payload.source,
|
|
summary: r.payload.summary,
|
|
published_at: r.payload.published_at,
|
|
})),
|
|
count: results.length,
|
|
});
|
|
} catch (err) {
|
|
res.status(503).json({
|
|
success: false,
|
|
error: "Vector search unavailable",
|
|
detail: (err as Error).message,
|
|
});
|
|
}
|
|
});
|
|
|
|
// GET /api/search/stats — Collection statistics
|
|
searchRouter.get("/stats", async (_req: Request, res: Response) => {
|
|
try {
|
|
const stats = await Promise.all(
|
|
VALID_COLLECTIONS.map(async (col) => {
|
|
try {
|
|
const info = await getCollectionInfo(col);
|
|
return { collection: col, ...info };
|
|
} catch {
|
|
return { collection: col, pointsCount: 0, vectorsCount: 0, error: "unavailable" };
|
|
}
|
|
})
|
|
);
|
|
|
|
res.json({ success: true, collections: stats });
|
|
} catch (err) {
|
|
res.status(503).json({
|
|
success: false,
|
|
error: "Qdrant unavailable",
|
|
detail: (err as Error).message,
|
|
});
|
|
}
|
|
});
|