Scrapers:
- atgbics.ts: PlaywrightCrawler for UK vendor ATGBICS (Shopify store),
scrapes SFP/SFP+/SFP28/QSFP+/QSFP28/QSFP-DD in GBP, max 50 pages/run
- prolabs.ts: HttpCrawler for ProLabs (Legrand subsidiary), USD pricing,
category-driven crawl with reach/fiber/speed detection
- Both registered in scheduler (every 8h, staggered) and index.ts CLI
MCP HTTP Server:
- packages/mcp-server/src/http-server.ts: Express + SSEServerTransport
- Exposes all 12 TIP tools via GET /sse + POST /message
- Bearer token auth (MCP_SECRET env), CORS-configurable
- GET /health → { status: "ok", tools: 12 }
- Port: MCP_HTTP_PORT (default 3201)
SQL + tools:
- sql/006-009: seed scripts for whitebox switches, vendors, assets
- switch-docs.ts: MCP tool for switch documentation queries
100 lines
3.1 KiB
TypeScript
100 lines
3.1 KiB
TypeScript
/**
|
|
* Seed FAQ and troubleshooting embeddings in Qdrant from knowledge_base.
|
|
*
|
|
* Run: npx tsx packages/api/src/embeddings/seed-knowledge-base.ts
|
|
*/
|
|
import { pool } from "../db/client";
|
|
import { embed, upsertPoints, type CollectionName } from "./client";
|
|
|
|
function kbToText(row: Record<string, unknown>): string {
|
|
const parts = [
|
|
`Q: ${row.question}`,
|
|
`A: ${row.answer}`,
|
|
row.subcategory && `Topic: ${row.subcategory}`,
|
|
row.applies_to_form_factors && `Form factors: ${(row.applies_to_form_factors as string[]).join(", ")}`,
|
|
row.applies_to_speeds && `Speeds: ${(row.applies_to_speeds as string[]).join(", ")}`,
|
|
].filter(Boolean);
|
|
|
|
return parts.join(". ");
|
|
}
|
|
|
|
function collectionForCategory(category: string): CollectionName {
|
|
if (category === "troubleshooting" || category === "known_issue") {
|
|
return "troubleshooting_embeddings";
|
|
}
|
|
return "faq_embeddings";
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
console.log("=== Seeding knowledge_base embeddings ===\n");
|
|
|
|
const result = await pool.query(
|
|
`SELECT id, category, subcategory, question, answer,
|
|
applies_to_form_factors, applies_to_speeds, severity, tags
|
|
FROM knowledge_base
|
|
ORDER BY category, created_at`
|
|
);
|
|
|
|
console.log(`Found ${result.rows.length} knowledge base entries\n`);
|
|
|
|
const BATCH_SIZE = 5;
|
|
let faqCount = 0;
|
|
let troubleCount = 0;
|
|
|
|
for (let i = 0; i < result.rows.length; i += BATCH_SIZE) {
|
|
const batch = result.rows.slice(i, i + BATCH_SIZE);
|
|
|
|
// Group by collection
|
|
const byCollection = new Map<CollectionName, typeof batch>();
|
|
for (const row of batch) {
|
|
const col = collectionForCategory(row.category as string);
|
|
if (!byCollection.has(col)) byCollection.set(col, []);
|
|
byCollection.get(col)!.push(row);
|
|
}
|
|
|
|
for (const [collection, rows] of byCollection) {
|
|
const points = await Promise.all(
|
|
rows.map(async (row) => {
|
|
const text = kbToText(row);
|
|
const vector = await embed(text);
|
|
|
|
return {
|
|
id: row.id,
|
|
vector,
|
|
payload: {
|
|
question: row.question || "",
|
|
answer: row.answer || "",
|
|
category: row.category || "",
|
|
subcategory: row.subcategory || "",
|
|
symptom: row.question || "",
|
|
cause: row.subcategory || "",
|
|
solution: row.answer || "",
|
|
severity: row.severity || "info",
|
|
form_factors: row.applies_to_form_factors || [],
|
|
speeds: row.applies_to_speeds || [],
|
|
tags: row.tags || [],
|
|
text,
|
|
},
|
|
};
|
|
})
|
|
);
|
|
|
|
await upsertPoints(collection, points);
|
|
|
|
if (collection === "faq_embeddings") faqCount += points.length;
|
|
else troubleCount += points.length;
|
|
}
|
|
|
|
console.log(` Embedded ${Math.min(i + BATCH_SIZE, result.rows.length)}/${result.rows.length} entries (FAQ: ${faqCount}, Troubleshooting: ${troubleCount})`);
|
|
}
|
|
|
|
console.log(`\n=== Done: ${faqCount} FAQ + ${troubleCount} troubleshooting embedded ===`);
|
|
await pool.end();
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("Fatal:", err);
|
|
pool.end();
|
|
process.exit(1);
|
|
});
|