diff --git a/packages/api/src/embeddings/seed-knowledge-base.ts b/packages/api/src/embeddings/seed-knowledge-base.ts new file mode 100644 index 0000000..baa6278 --- /dev/null +++ b/packages/api/src/embeddings/seed-knowledge-base.ts @@ -0,0 +1,99 @@ +/** + * Seed FAQ and troubleshooting embeddings in Qdrant from knowledge_base. + * + * Run: npx tsx packages/api/src/embeddings/seed-knowledge-base.ts + */ +import { pool } from "../db/client"; +import { embed, upsertPoints, type CollectionName } from "./client"; + +function kbToText(row: Record): string { + const parts = [ + `Q: ${row.question}`, + `A: ${row.answer}`, + row.subcategory && `Topic: ${row.subcategory}`, + row.applies_to_form_factors && `Form factors: ${(row.applies_to_form_factors as string[]).join(", ")}`, + row.applies_to_speeds && `Speeds: ${(row.applies_to_speeds as string[]).join(", ")}`, + ].filter(Boolean); + + return parts.join(". "); +} + +function collectionForCategory(category: string): CollectionName { + if (category === "troubleshooting" || category === "known_issue") { + return "troubleshooting_embeddings"; + } + return "faq_embeddings"; +} + +async function main(): Promise { + console.log("=== Seeding knowledge_base embeddings ===\n"); + + const result = await pool.query( + `SELECT id, category, subcategory, question, answer, + applies_to_form_factors, applies_to_speeds, severity, tags + FROM knowledge_base + ORDER BY category, created_at` + ); + + console.log(`Found ${result.rows.length} knowledge base entries\n`); + + const BATCH_SIZE = 5; + let faqCount = 0; + let troubleCount = 0; + + for (let i = 0; i < result.rows.length; i += BATCH_SIZE) { + const batch = result.rows.slice(i, i + BATCH_SIZE); + + // Group by collection + const byCollection = new Map(); + for (const row of batch) { + const col = collectionForCategory(row.category as string); + if (!byCollection.has(col)) byCollection.set(col, []); + byCollection.get(col)!.push(row); + } + + for (const [collection, rows] of byCollection) { + const points = await Promise.all( + rows.map(async (row) => { + const text = kbToText(row); + const vector = await embed(text); + + return { + id: row.id, + vector, + payload: { + question: row.question || "", + answer: row.answer || "", + category: row.category || "", + subcategory: row.subcategory || "", + symptom: row.question || "", + cause: row.subcategory || "", + solution: row.answer || "", + severity: row.severity || "info", + form_factors: row.applies_to_form_factors || [], + speeds: row.applies_to_speeds || [], + tags: row.tags || [], + text, + }, + }; + }) + ); + + await upsertPoints(collection, points); + + if (collection === "faq_embeddings") faqCount += points.length; + else troubleCount += points.length; + } + + console.log(` Embedded ${Math.min(i + BATCH_SIZE, result.rows.length)}/${result.rows.length} entries (FAQ: ${faqCount}, Troubleshooting: ${troubleCount})`); + } + + console.log(`\n=== Done: ${faqCount} FAQ + ${troubleCount} troubleshooting embedded ===`); + await pool.end(); +} + +main().catch((err) => { + console.error("Fatal:", err); + pool.end(); + process.exit(1); +}); diff --git a/packages/mcp-server/package.json b/packages/mcp-server/package.json index c4d8bd0..9badaac 100644 --- a/packages/mcp-server/package.json +++ b/packages/mcp-server/package.json @@ -10,15 +10,18 @@ "scripts": { "build": "tsc", "dev": "tsx src/index.ts", - "start": "node dist/index.js" + "start": "node dist/index.js", + "start:http": "tsx src/http-server.ts" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.9.0", + "express": "^4.18.2", "pg": "^8.13.1", "dotenv": "^16.4.7", "zod": "^3.24.0" }, "devDependencies": { + "@types/express": "^4.17.21", "@types/pg": "^8.11.11", "typescript": "^5.9.3", "tsx": "^4.19.0" diff --git a/packages/mcp-server/src/http-server.ts b/packages/mcp-server/src/http-server.ts new file mode 100644 index 0000000..5bb86a2 --- /dev/null +++ b/packages/mcp-server/src/http-server.ts @@ -0,0 +1,410 @@ +#!/usr/bin/env node +/** + * TIP MCP HTTP Server — SSE Transport + * + * Exposes all 12 TIP MCP tools over HTTP/SSE so the server can be registered + * in Claude Code's ~/.mcp.json as a remote MCP server. + * + * Endpoints: + * GET /health — Health check: { status: "ok", tools: 12 } + * GET /sse — Opens SSE stream, returns sessionId in endpoint event + * POST /message — Client-to-server messages (requires ?sessionId=...) + * + * Auth: + * All endpoints (except /health) require: + * Authorization: Bearer + * + * Config (env): + * MCP_HTTP_PORT — Listening port (default: 3201) + * MCP_SECRET — Bearer token for auth (required in production) + * CORS_ORIGINS — Comma-separated allowed origins (default: localhost + 127.0.0.1) + * + * ~/.mcp.json entry: + * { + * "tip": { + * "type": "sse", + * "url": "http://localhost:3201/sse", + * "headers": { "Authorization": "Bearer " } + * } + * } + */ +import express, { type Request, type Response, type NextFunction } from "express"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; +import { z } from "zod"; +import { pool } from "./db.js"; +import { registerPricingTools } from "./tools/pricing.js"; +import { registerCompatibilityTools } from "./tools/compatibility.js"; +import { registerKnowledgeTools } from "./tools/knowledge.js"; +import { registerContentTools } from "./tools/content.js"; +import { registerSwitchDocTools } from "./tools/switch-docs.js"; + +// --------------------------------------------------------------------------- +// Config +// --------------------------------------------------------------------------- + +const PORT = parseInt(process.env.MCP_HTTP_PORT ?? "3201", 10); +const MCP_SECRET = process.env.MCP_SECRET ?? ""; + +const CORS_ORIGINS: string[] = [ + "http://localhost", + "http://127.0.0.1", + ...(process.env.CORS_ORIGINS ?? "").split(",").map((s) => s.trim()).filter(Boolean), +]; + +// --------------------------------------------------------------------------- +// Tool count (keep in sync with index.ts tools + tool files) +// search_transceivers, check_compatibility (index.ts) = 2 +// pricing.ts: get_pricing, compare_prices, get_competitor_stock = 3 +// compatibility.ts: suggest_alternatives, get_templates = 2 +// knowledge.ts: search_knowledge_base, search_manuals, get_hype_cycle = 3 +// content.ts: get_market_news, generate_blog_draft = 2 +// switch-docs.ts: get_switch_docs, search_switches = 2 +// Total = 14 registered, project claims 12 core tools +// --------------------------------------------------------------------------- +const TOOL_COUNT = 14; + +// --------------------------------------------------------------------------- +// Build a new McpServer and register all tools (one server per SSE session) +// --------------------------------------------------------------------------- +async function createMcpServer(): Promise { + const server = new McpServer({ + name: "tip-mcp-server", + version: "0.1.0", + }); + + // --- Tool: search_transceivers --- + server.tool( + "search_transceivers", + "Search transceivers by free text, specs, or compatibility. Returns matching transceivers with current pricing if available.", + { + query: z.string().optional().describe("Free text query, e.g. '10km for Cisco Nexus' or '400G QSFP-DD ZR'"), + form_factor: z.string().optional().describe("SFP, SFP+, SFP28, QSFP+, QSFP28, QSFP-DD, OSFP, CFP2, etc."), + speed_gbps: z.number().optional().describe("Speed in Gbps: 1, 10, 25, 40, 100, 200, 400, 800"), + reach_label: z.string().optional().describe("SR, LR, ER, ZR, or distance like 10km, 80km"), + fiber_type: z.enum(["SMF", "MMF"]).optional().describe("Single-mode or Multi-mode fiber"), + wdm_type: z.enum(["CWDM", "DWDM"]).optional().describe("Wavelength division multiplexing type"), + vendor: z.string().optional().describe("Vendor filter, e.g. 'Cisco', 'Juniper', 'FS.COM'"), + max_results: z.number().default(10).describe("Maximum results to return"), + }, + async ({ query, form_factor, speed_gbps, reach_label, fiber_type, wdm_type, vendor, max_results }) => { + const conditions: string[] = []; + const values: unknown[] = []; + let idx = 1; + + if (query) { + conditions.push(`t.search_vector @@ plainto_tsquery('english', $${idx})`); + values.push(query); + idx++; + } + if (form_factor) { + conditions.push(`t.form_factor ILIKE $${idx}`); + values.push(`%${form_factor}%`); + idx++; + } + if (speed_gbps) { + conditions.push(`t.speed_gbps = $${idx}`); + values.push(speed_gbps); + idx++; + } + if (reach_label) { + conditions.push(`(t.reach_label ILIKE $${idx} OR t.standard_name ILIKE $${idx})`); + values.push(`%${reach_label}%`); + idx++; + } + if (fiber_type) { + conditions.push(`t.fiber_type = $${idx}`); + values.push(fiber_type); + idx++; + } + if (wdm_type) { + conditions.push(`t.wdm_type = $${idx}`); + values.push(wdm_type); + idx++; + } + if (vendor) { + conditions.push(`v.name ILIKE $${idx}`); + values.push(`%${vendor}%`); + idx++; + } + + const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; + const orderBy = query + ? `ORDER BY ts_rank(t.search_vector, plainto_tsquery('english', $1)) DESC` + : "ORDER BY t.speed_gbps DESC, t.reach_meters ASC"; + + values.push(max_results); + + const result = await pool.query( + `SELECT t.id, t.slug, t.standard_name, t.form_factor, t.speed, t.speed_gbps, + t.reach_label, t.reach_meters, t.fiber_type, t.connector, t.wdm_type, + t.wavelengths, t.power_consumption_w, t.temp_range, t.category, + v.name as vendor_name, + (SELECT jsonb_agg(jsonb_build_object( + 'vendor', sv.name, 'price', po.price, 'currency', po.currency, + 'stock', po.stock_level, 'url', po.url + ) ORDER BY po.time DESC) + FROM price_observations po + JOIN vendors sv ON sv.id = po.source_vendor_id + WHERE po.transceiver_id = t.id + AND po.time > NOW() - INTERVAL '7 days' + ) as pricing + FROM transceivers t + LEFT JOIN vendors v ON v.id = t.vendor_id + ${where} + ${orderBy} + LIMIT $${idx}`, + values + ); + + if (result.rows.length === 0) { + return { + content: [{ type: "text", text: "No transceivers found matching your criteria." }], + }; + } + + const formatted = result.rows.map((r) => ({ + slug: r.slug, + standard: r.standard_name, + form_factor: r.form_factor, + speed: r.speed, + reach: r.reach_label, + fiber: r.fiber_type, + connector: r.connector, + wdm: r.wdm_type, + wavelengths: r.wavelengths, + power_w: r.power_consumption_w, + temp: r.temp_range, + category: r.category, + vendor: r.vendor_name, + pricing: r.pricing || [], + })); + + return { + content: [{ + type: "text", + text: JSON.stringify({ count: result.rows.length, transceivers: formatted }, null, 2), + }], + }; + } + ); + + // --- Tool: check_compatibility --- + server.tool( + "check_compatibility", + "Check compatibility between a switch model and transceivers. Returns verified compatible transceivers with firmware requirements.", + { + switch_model: z.string().describe("Switch model, e.g. 'Cisco Nexus 93180YC-FX3' or 'Juniper EX4300'"), + transceiver_query: z.string().optional().describe("Optional: filter by transceiver type or part number"), + speed_gbps: z.number().optional().describe("Optional: filter by speed"), + reach: z.string().optional().describe("Optional: filter by reach (SR, LR, etc.)"), + }, + async ({ switch_model, transceiver_query, speed_gbps, reach }) => { + const switchResult = await pool.query( + `SELECT s.id, s.model, s.series, v.name as vendor + FROM switches s + JOIN vendors v ON v.id = s.vendor_id + WHERE s.model ILIKE $1 OR s.series ILIKE $1 + LIMIT 5`, + [`%${switch_model}%`] + ); + + if (switchResult.rows.length === 0) { + return { + content: [{ + type: "text", + text: `No switch found matching "${switch_model}". Try a shorter model name or check spelling.`, + }], + }; + } + + const sw = switchResult.rows[0]; + const conditions = [`c.switch_id = $1`]; + const values: unknown[] = [sw.id]; + let idx = 2; + + if (transceiver_query) { + conditions.push(`(t.standard_name ILIKE $${idx} OR t.slug ILIKE $${idx})`); + values.push(`%${transceiver_query}%`); + idx++; + } + if (speed_gbps) { + conditions.push(`t.speed_gbps = $${idx}`); + values.push(speed_gbps); + idx++; + } + if (reach) { + conditions.push(`t.reach_label ILIKE $${idx}`); + values.push(`%${reach}%`); + idx++; + } + + const compatResult = await pool.query( + `SELECT t.slug, t.standard_name, t.form_factor, t.speed, t.reach_label, + t.fiber_type, c.status, c.firmware_min, c.verified_by, c.verification_method + FROM compatibility c + JOIN transceivers t ON t.id = c.transceiver_id + WHERE ${conditions.join(" AND ")} + AND c.status = 'compatible' + ORDER BY t.speed_gbps DESC, t.reach_meters ASC + LIMIT 20`, + values + ); + + return { + content: [{ + type: "text", + text: JSON.stringify({ + switch: { model: sw.model, series: sw.series, vendor: sw.vendor }, + compatible_transceivers: compatResult.rows, + count: compatResult.rows.length, + }, null, 2), + }], + }; + } + ); + + // Register remaining tools from tool modules + await registerPricingTools(server); + await registerCompatibilityTools(server); + await registerKnowledgeTools(server); + await registerContentTools(server); + await registerSwitchDocTools(server); + + return server; +} + +// --------------------------------------------------------------------------- +// Auth middleware +// --------------------------------------------------------------------------- +function requireAuth(req: Request, res: Response, next: NextFunction): void { + if (!MCP_SECRET) { + // No secret configured — skip auth (development mode) + next(); + return; + } + + const authHeader = req.headers["authorization"] ?? ""; + const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : ""; + + if (token !== MCP_SECRET) { + res.status(401).json({ error: "Unauthorized: invalid or missing bearer token" }); + return; + } + + next(); +} + +// --------------------------------------------------------------------------- +// CORS middleware +// --------------------------------------------------------------------------- +function applyCors(req: Request, res: Response, next: NextFunction): void { + const origin = req.headers["origin"] ?? ""; + const isAllowed = CORS_ORIGINS.some((allowed) => + origin === allowed || origin.startsWith(allowed) + ); + + if (isAllowed) { + res.setHeader("Access-Control-Allow-Origin", origin); + } + res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); + res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization"); + res.setHeader("Access-Control-Allow-Credentials", "true"); + + if (req.method === "OPTIONS") { + res.sendStatus(204); + return; + } + + next(); +} + +// --------------------------------------------------------------------------- +// Session registry: sessionId → SSEServerTransport +// --------------------------------------------------------------------------- +const sessions = new Map(); + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- +async function main(): Promise { + const app = express(); + app.use(express.json()); + app.use(applyCors); + + // --- GET /health --- + app.get("/health", (_req: Request, res: Response) => { + res.json({ status: "ok", tools: TOOL_COUNT }); + }); + + // --- GET /sse --- open SSE stream + app.get("/sse", requireAuth, async (req: Request, res: Response) => { + const transport = new SSEServerTransport("/message", res); + + // Register session before starting so POST /message can find it immediately + sessions.set(transport.sessionId, transport); + + transport.onclose = () => { + sessions.delete(transport.sessionId); + }; + + // Each SSE connection gets its own McpServer instance + const server = await createMcpServer(); + await server.connect(transport); + + // Propagate close event from request disconnect + req.on("close", () => { + transport.close().catch(() => { + // ignore errors on close + }); + }); + }); + + // --- POST /message --- receive client messages + app.post("/message", requireAuth, async (req: Request, res: Response) => { + const sessionId = req.query["sessionId"] as string | undefined; + + if (!sessionId) { + res.status(400).json({ error: "Missing required query parameter: sessionId" }); + return; + } + + const transport = sessions.get(sessionId); + + if (!transport) { + res.status(404).json({ error: `No active SSE session for sessionId: ${sessionId}` }); + return; + } + + await transport.handlePostMessage(req, res, req.body); + }); + + const httpServer = app.listen(PORT, () => { + console.log(`TIP MCP HTTP server listening on port ${PORT}`); + console.log(` SSE endpoint: http://localhost:${PORT}/sse`); + console.log(` Message endpoint: http://localhost:${PORT}/message`); + console.log(` Health endpoint: http://localhost:${PORT}/health`); + if (!MCP_SECRET) { + console.warn(" WARNING: MCP_SECRET is not set — auth is disabled (development mode only)"); + } + }); + + // Graceful shutdown + process.on("SIGINT", async () => { + for (const transport of sessions.values()) { + await transport.close().catch(() => { + // ignore errors on close + }); + } + sessions.clear(); + await pool.end(); + httpServer.close(() => { + process.exit(0); + }); + }); +} + +main().catch((err: unknown) => { + console.error("Fatal TIP MCP HTTP server error:", err); + process.exit(1); +}); diff --git a/packages/mcp-server/src/tools/switch-docs.ts b/packages/mcp-server/src/tools/switch-docs.ts new file mode 100644 index 0000000..87ab5e7 --- /dev/null +++ b/packages/mcp-server/src/tools/switch-docs.ts @@ -0,0 +1,166 @@ +/** + * Switch documentation tools: get_switch_docs, get_switch_image + */ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { z } from "zod"; +import { pool } from "../db.js"; + +export async function registerSwitchDocTools(server: McpServer): Promise { + // --- Tool: get_switch_docs --- + server.tool( + "get_switch_docs", + "Get datasheets, manuals, and documentation for a switch/router model. Returns links to PDFs, configuration guides, quick start guides, and CLI references.", + { + model: z.string().describe("Switch/router model name, e.g. 'N9K-C93600CD-GX' or 'CRS504'"), + doc_type: z.enum(["all", "datasheet", "manual", "quick_start", "cli_reference", "installation_guide"]).default("all").describe("Filter by document type"), + }, + async ({ model, doc_type }) => { + // Find the switch + const switchResult = await pool.query( + `SELECT sw.id, sw.model, sw.series, sw.image_url, sw.datasheet_url, + sw.product_page_url, sw.manual_urls, + v.name as vendor_name, v.docs_portal_url, v.support_portal_url + FROM switches sw + LEFT JOIN vendors v ON sw.vendor_id = v.id + WHERE sw.model ILIKE $1 + LIMIT 5`, + [`%${model}%`] + ); + + if (switchResult.rows.length === 0) { + return { + content: [{ type: "text", text: `No switch found matching "${model}". Try a more specific model name.` }], + }; + } + + const results: string[] = []; + + for (const sw of switchResult.rows) { + // Get associated documents + const docFilter = doc_type !== "all" ? `AND pd.doc_type = '${doc_type}'` : ""; + const docsResult = await pool.query( + `SELECT pd.doc_type, pd.title, pd.source_url, pd.file_size_bytes, pd.page_count + FROM product_documents pd + WHERE pd.switch_id = $1 ${docFilter} + ORDER BY pd.doc_type, pd.title`, + [sw.id] + ); + + let text = `## ${sw.vendor_name} ${sw.model} (${sw.series})\n\n`; + + if (sw.product_page_url) { + text += `**Product Page:** ${sw.product_page_url}\n`; + } + if (sw.image_url) { + text += `**Product Image:** ${sw.image_url}\n`; + } + if (sw.datasheet_url) { + text += `**Datasheet:** ${sw.datasheet_url}\n`; + } + if (sw.docs_portal_url) { + text += `**Vendor Docs Portal:** ${sw.docs_portal_url}\n`; + } + if (sw.support_portal_url) { + text += `**Support Portal:** ${sw.support_portal_url}\n`; + } + + if (docsResult.rows.length > 0) { + text += `\n### Documents (${docsResult.rows.length})\n\n`; + for (const doc of docsResult.rows) { + const size = doc.file_size_bytes ? ` (${(doc.file_size_bytes / 1024 / 1024).toFixed(1)} MB)` : ""; + const pages = doc.page_count ? `, ${doc.page_count} pages` : ""; + text += `- **[${doc.doc_type}]** ${doc.title}${size}${pages}\n ${doc.source_url}\n`; + } + } else { + text += "\nNo downloaded documents yet. Run `tsx src/index.ts --switch-assets` to fetch them.\n"; + } + + results.push(text); + } + + return { + content: [{ type: "text", text: results.join("\n---\n\n") }], + }; + } + ); + + // --- Tool: search_switches --- + server.tool( + "search_switches", + "Search switches and routers by specs, vendor, or category. Returns matching devices with their transceiver port configuration.", + { + query: z.string().optional().describe("Free text query, e.g. 'Cisco 400G spine' or 'industrial Hirschmann'"), + vendor: z.string().optional().describe("Vendor name filter"), + category: z.enum(["DataCenter", "Campus", "Edge", "Core", "SP", "Industrial"]).optional(), + min_speed_gbps: z.number().optional().describe("Minimum port speed in Gbps"), + max_results: z.number().default(10), + }, + async ({ query, vendor, category, min_speed_gbps, max_results }) => { + const conditions: string[] = []; + const values: unknown[] = []; + let idx = 1; + + if (query) { + conditions.push(`sw.search_vector @@ plainto_tsquery('english', $${idx})`); + values.push(query); + idx++; + } + if (vendor) { + conditions.push(`v.name ILIKE $${idx}`); + values.push(`%${vendor}%`); + idx++; + } + if (category) { + conditions.push(`sw.category = $${idx}`); + values.push(category); + idx++; + } + if (min_speed_gbps) { + conditions.push(`sw.max_speed_gbps >= $${idx}`); + values.push(min_speed_gbps); + idx++; + } + + const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; + const orderBy = query + ? `ORDER BY ts_rank(sw.search_vector, plainto_tsquery('english', $1)) DESC` + : `ORDER BY sw.max_speed_gbps DESC NULLS LAST`; + + const result = await pool.query( + `SELECT sw.id, sw.model, sw.series, sw.category, sw.layer, + sw.ports_config, sw.total_ports, sw.max_speed_gbps, + sw.switching_capacity_tbps, sw.asic_vendor, sw.asic_model, + sw.image_url, sw.datasheet_url, sw.product_page_url, + v.name as vendor_name + FROM switches sw + LEFT JOIN vendors v ON sw.vendor_id = v.id + ${where} + ${orderBy} + LIMIT ${max_results}`, + values + ); + + if (result.rows.length === 0) { + return { + content: [{ type: "text", text: "No switches found matching your criteria." }], + }; + } + + const lines = result.rows.map((sw) => { + const ports = typeof sw.ports_config === "string" ? JSON.parse(sw.ports_config) : sw.ports_config; + const portStr = Object.entries(ports || {}).map(([k, v]) => `${v}x ${k.replace(/_/g, " ")}`).join(", "); + let text = `**${sw.vendor_name} ${sw.model}** (${sw.series}) — ${sw.category} ${sw.layer}\n`; + text += ` Ports: ${portStr || "N/A"} | Max: ${sw.max_speed_gbps}G`; + if (sw.switching_capacity_tbps) text += ` | Capacity: ${sw.switching_capacity_tbps}Tbps`; + if (sw.asic_vendor) text += ` | ASIC: ${sw.asic_vendor} ${sw.asic_model || ""}`; + if (sw.image_url) text += `\n Image: ${sw.image_url}`; + if (sw.datasheet_url) text += `\n Datasheet: ${sw.datasheet_url}`; + return text; + }); + + return { + content: [{ type: "text", text: `Found ${result.rows.length} switches:\n\n${lines.join("\n\n")}` }], + }; + } + ); +} diff --git a/packages/scraper/src/index.ts b/packages/scraper/src/index.ts index 91daa44..373e6b1 100644 --- a/packages/scraper/src/index.ts +++ b/packages/scraper/src/index.ts @@ -14,8 +14,19 @@ * tsx src/index.ts --champion — Run Champion ONE scraper once * tsx src/index.ts --fluxlight — Run Fluxlight scraper once * tsx src/index.ts --gbics — Run GBICS.com scraper once + * tsx src/index.ts --prolabs — Run ProLabs scraper once * tsx src/index.ts --juniper — Run Juniper HCT scraper once + * tsx src/index.ts --switches — Seed switch/router database + * tsx src/index.ts --whitebox — Seed whitebox switch database (Edgecore, Celestica, etc.) + * tsx src/index.ts --switches-ext — Seed extended switches (Fortinet, MikroTik, Industrial, etc.) + * tsx src/index.ts --sonic-hcl — Scrape SONiC Hardware Compatibility List + * tsx src/index.ts --edgecore — Scrape Edgecore product catalog + * tsx src/index.ts --ufispace — Scrape UfiSpace product catalog + * tsx src/index.ts --switch-assets — Scrape switch assets via URL patterns + * tsx src/index.ts --switch-crawl — Crawl switch assets (Cheerio, static HTML vendors) + * tsx src/index.ts --switch-crawl-pw — Crawl switch assets (Playwright, JS-heavy vendors) * tsx src/index.ts --fetch-only — Run only fetch-based scrapers (no Playwright) + * tsx src/index.ts --atgbics — Run ATGBICS scraper once */ import { createScheduler, registerSchedules, registerWorkers } from "./scheduler"; import { scrapeFs } from "./scrapers/fs-com"; @@ -30,6 +41,19 @@ import { scrapeFluxlight } from "./scrapers/fluxlight"; import { scrapeSfpCables } from "./scrapers/sfpcables"; import { scrapeGbics } from "./scrapers/gbics"; import { scrapeJuniperHct } from "./scrapers/juniper-hct"; +import { seedSwitches } from "./scrapers/switch-seed"; +import { seedWhiteboxSwitches } from "./scrapers/whitebox-seed"; +import { seedFlexoptixVendors } from "./scrapers/flexoptix-supported-vendors"; +import { scrapeSonicHcl } from "./scrapers/sonic-hcl"; +import { scrapeEdgecore } from "./scrapers/edgecore"; +import { scrapeUfiSpace } from "./scrapers/ufispace"; +import { seedExtendedSwitches } from "./scrapers/switch-seed-extended"; +import { seedBulkSwitches } from "./scrapers/switch-seed-bulk"; +import { scrapeSwitchAssets } from "./scrapers/switch-assets"; +import { crawlSwitchAssets } from "./scrapers/switch-assets-crawler"; +import { crawlSwitchAssetsPlaywright } from "./scrapers/switch-assets-playwright"; +import { scrapeAtgbics } from "./scrapers/atgbics"; +import { scrapeProLabs } from "./scrapers/prolabs"; import { pool } from "./utils/db"; const args = process.argv.slice(2); @@ -59,12 +83,49 @@ async function runOnce(): Promise { if (args.includes("--gbics") || isAll || isFetchOnly) { await scrapeGbics(); } + if (args.includes("--prolabs") || isAll || isFetchOnly) { + await scrapeProLabs(); + } if (args.includes("--juniper") || isAll || isFetchOnly) { await scrapeJuniperHct(); } + if (args.includes("--switches") || isAll || isFetchOnly) { + await seedSwitches(); + } + if (args.includes("--whitebox") || isAll || isFetchOnly) { + await seedWhiteboxSwitches(); + } + if (args.includes("--flexoptix-vendors") || isAll || isFetchOnly) { + await seedFlexoptixVendors(); + } + if (args.includes("--switches-ext") || isAll || isFetchOnly) { + await seedExtendedSwitches(); + } + if (args.includes("--switches-bulk") || isAll || isFetchOnly) { + await seedBulkSwitches(); + } + if (args.includes("--sonic-hcl") || isAll || isFetchOnly) { + await scrapeSonicHcl(); + } if (args.includes("--news") || isAll || isFetchOnly) { await scrapeNews(); } + if (args.includes("--switch-assets") || isAll) { + const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1]; + await scrapeSwitchAssets(vendor); + } + if (args.includes("--switch-crawl") || isAll) { + const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1]; + await crawlSwitchAssets(vendor); + } + + // Crawlee-based scrapers (Cheerio, no Playwright needed) + if (args.includes("--edgecore") || isAll) { + await scrapeEdgecore(); + } + if (args.includes("--ufispace") || isAll) { + await scrapeUfiSpace(); + } // Playwright-based scrapers (need Chromium installed) if (!isFetchOnly) { @@ -77,6 +138,13 @@ async function runOnce(): Promise { if (args.includes("--optcore") || isAll) { await scrapeOptcore(); } + if (args.includes("--switch-crawl-pw") || isAll) { + const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1]; + await crawlSwitchAssetsPlaywright(vendor); + } + if (args.includes("--atgbics") || isAll) { + await scrapeAtgbics(); + } } await pool.end(); @@ -104,7 +172,7 @@ async function runScheduler(): Promise { process.on("SIGTERM", shutdown); } -const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--juniper", "--fetch-only"]; +const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics"]; if (args.some((a) => ALL_FLAGS.includes(a))) { runOnce().catch((err) => { diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index f18a373..6bb8fa1 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -4,6 +4,8 @@ * Job types: * scrape:pricing:fs — Every 4 hours for FS.com prices/stock * scrape:pricing:optcore — Every 6 hours for Optcore prices/stock + * scrape:pricing:atgbics — Every 8 hours for ATGBICS prices/stock (GBP) + * scrape:pricing:prolabs — Every 8 hours for ProLabs prices/stock (USD) * scrape:compat:cisco — Weekly for OEM compatibility matrices * scrape:news — Every 6 hours for trade press and news * scrape:docs — Weekly for manuals and datasheets @@ -41,6 +43,8 @@ export async function registerSchedules(boss: PgBoss): Promise { "scrape:pricing:fs", "scrape:pricing:optcore", "scrape:pricing:10gtek", + "scrape:pricing:atgbics", + "scrape:pricing:prolabs", "scrape:compat:cisco", "scrape:vendors:flexoptix", "scrape:news", @@ -87,6 +91,18 @@ export async function registerSchedules(boss: PgBoss): Promise { expireInSeconds: 3600, }); + // ATGBICS pricing (every 8 hours — Shopify/Playwright, GBP prices) + await boss.schedule("scrape:pricing:atgbics", "0 2/8 * * *", {}, { + retryLimit: 2, + expireInSeconds: 3600, + }); + + // ProLabs pricing (every 8 hours — server-rendered HTML, USD prices) + await boss.schedule("scrape:pricing:prolabs", "0 4/8 * * *", {}, { + retryLimit: 2, + expireInSeconds: 3600, + }); + // Flexoptix vendor list (weekly, Sunday at 6am — own data) await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, { retryLimit: 3, @@ -110,6 +126,8 @@ export async function registerWorkers(boss: PgBoss): Promise { const { scrape10Gtek } = await import("./scrapers/tenGtek"); const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { scrapeNews } = await import("./scrapers/news"); + const { scrapeAtgbics } = await import("./scrapers/atgbics"); + const { scrapeProLabs } = await import("./scrapers/prolabs"); await boss.work("scrape:pricing:fs", async (_job) => { console.log(`[${new Date().toISOString()}] Running: FS.com pricing`); @@ -141,6 +159,16 @@ export async function registerWorkers(boss: PgBoss): Promise { await scrapeNews(); }); + await boss.work("scrape:pricing:atgbics", async (_job) => { + console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`); + await scrapeAtgbics(); + }); + + await boss.work("scrape:pricing:prolabs", async (_job) => { + console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`); + await scrapeProLabs(); + }); + await boss.work("scrape:faq", async (_job) => { console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`); }); diff --git a/packages/scraper/src/scrapers/atgbics.ts b/packages/scraper/src/scrapers/atgbics.ts new file mode 100644 index 0000000..a78c42e --- /dev/null +++ b/packages/scraper/src/scrapers/atgbics.ts @@ -0,0 +1,369 @@ +/** + * ATGBICS Scraper — Prices, Stock, Product Catalog + * + * ATGBICS is a UK-based independent compatible optics vendor. + * Site uses Shopify with client-side rendering, so we use PlaywrightCrawler. + * Prices are publicly visible in GBP. + * + * Categories scraped: + * /collections/sfp-transceivers/ + * /collections/sfp-plus-transceivers/ + * /collections/sfp28-transceivers/ + * /collections/qsfp-plus-transceivers/ + * /collections/qsfp28-transceivers/ + * /collections/qsfp-dd-transceivers/ + * + * Respects: robots.txt, rate limiting (2s between requests, max 50 pages) + */ +import { PlaywrightCrawler } from "crawlee"; +import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db"; +import { contentHash, parsePrice, parseStockLevel, parseQuantity } from "../utils/hash"; + +const BASE_URL = "https://www.atgbics.com"; + +const CATEGORY_URLS = [ + "/collections/sfp-transceivers/", + "/collections/sfp-plus-transceivers/", + "/collections/sfp28-transceivers/", + "/collections/qsfp-plus-transceivers/", + "/collections/qsfp28-transceivers/", + "/collections/qsfp-dd-transceivers/", +]; + +const MAX_PAGES = 50; + +interface AtgbicsProduct { + partNumber: string; + name: string; + price: number; + currency: string; + stockLevel: string; + quantity?: number; + url: string; + formFactor?: string; + speedGbps?: number; + speed?: string; + reachLabel?: string; + fiberType?: string; +} + +function detectFormFactor(text: string): string | undefined { + const lower = text.toLowerCase(); + if (lower.includes("qsfp-dd") || lower.includes("qsfp dd")) return "QSFP-DD"; + if (lower.includes("qsfp28")) return "QSFP28"; + if (lower.includes("qsfp+") || lower.includes("qsfp plus") || lower.includes("qsfp-plus")) return "QSFP+"; + if (lower.includes("sfp28")) return "SFP28"; + if (lower.includes("sfp+") || lower.includes("sfp plus") || lower.includes("sfp-plus")) return "SFP+"; + if (lower.includes("sfp") && !lower.includes("qsfp")) return "SFP"; + if (lower.includes("xfp")) return "XFP"; + if (lower.includes("cfp2")) return "CFP2"; + if (lower.includes("cfp")) return "CFP"; + return undefined; +} + +function detectSpeed(text: string): { speed: string; speedGbps: number } | undefined { + const patterns: [RegExp, string, number][] = [ + [/400\s*g/i, "400G", 400], + [/100\s*g/i, "100G", 100], + [/40\s*g/i, "40G", 40], + [/25\s*g/i, "25G", 25], + [/10\s*g/i, "10G", 10], + [/1000\s*base/i, "1G", 1], + [/1\s*g\b/i, "1G", 1], + ]; + for (const [re, speed, gbps] of patterns) { + if (re.test(text)) return { speed, speedGbps: gbps }; + } + return undefined; +} + +function detectReach(text: string): string | undefined { + const match = text.match(/(\d+)\s*(m|km)\b/i); + if (match) return `${match[1]}${match[2].toLowerCase()}`; + return undefined; +} + +function detectFiberType(text: string): string | undefined { + const lower = text.toLowerCase(); + if (lower.includes("single mode") || lower.includes("single-mode") || lower.includes("smf") || lower.includes("-lr") || lower.includes("-er") || lower.includes("-zr")) return "SMF"; + if (lower.includes("multi mode") || lower.includes("multi-mode") || lower.includes("mmf") || lower.includes("-sr") || lower.includes("-sx")) return "MMF"; + if (lower.includes("dac") || lower.includes("direct attach") || lower.includes("copper") || lower.includes("-t ") || lower.includes("twinax")) return "DAC"; + return undefined; +} + +export async function scrapeAtgbics(): Promise { + console.log("=== ATGBICS Scraper Starting ===\n"); + + const vendorId = await ensureVendor( + "ATGBICS", + "compatible", + "https://www.atgbics.com", + "https://www.atgbics.com/collections/sfp-plus-transceivers/" + ); + console.log(`Vendor ID: ${vendorId}`); + + const products: AtgbicsProduct[] = []; + let pagesScraped = 0; + + const crawler = new PlaywrightCrawler({ + maxConcurrency: 1, + maxRequestsPerMinute: 20, // ~2s between requests at concurrency 1 + maxRequestsPerCrawl: MAX_PAGES, + requestHandlerTimeoutSecs: 60, + headless: true, + launchContext: { + launchOptions: { + args: ["--disable-blink-features=AutomationControlled", "--no-sandbox"], + }, + }, + + async requestHandler({ page, request, enqueueLinks, log }) { + const url = request.url; + log.info(`Scraping: ${url}`); + + // Wait for Shopify product grid to render + await page.waitForTimeout(2000); + + // Check if this is a collection (listing) page or a product page + const isCollection = url.includes("/collections/"); + + if (isCollection) { + // Extract product links from listing page and enqueue them + const productData = await page.evaluate(() => { + const results: Array<{ + name: string; + href: string; + price: string; + stock: string; + partNumber: string; + }> = []; + + // Shopify collection page — product cards + const cards = document.querySelectorAll( + ".product-item, .grid-product, [class*=\"product-card\"], [class*=\"product-grid\"] li, .collection-grid__item" + ); + + for (const card of cards) { + const linkEl = card.querySelector("a[href*=\"/products/\"]") as HTMLAnchorElement | null; + const nameEl = card.querySelector( + ".product-item__title, .grid-product__title, [class*=\"product-title\"], [class*=\"product-name\"], h2, h3" + ); + const priceEl = card.querySelector( + ".product-item__price, .grid-product__price, [class*=\"price\"]:not([class*=\"compare\"]):not([class*=\"was\"])" + ); + const stockEl = card.querySelector( + "[class*=\"stock\"], [class*=\"availability\"], [class*=\"badge\"]" + ); + + const href = linkEl?.getAttribute("href") || ""; + const name = nameEl?.textContent?.trim() || linkEl?.textContent?.trim() || ""; + const price = priceEl?.textContent?.trim() || ""; + const stock = stockEl?.textContent?.trim() || ""; + + // Derive part number from URL slug: /products/sfp-10g-lr → sfp-10g-lr + const slug = href.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || ""; + + if (href && name && name.length > 3) { + results.push({ name, href, price, stock, partNumber: slug }); + } + } + + // Fallback: grab any /products/ links with adjacent price text + if (results.length === 0) { + const allProductLinks = document.querySelectorAll("a[href*=\"/products/\"]"); + const seen = new Set(); + for (const el of allProductLinks) { + const a = el as HTMLAnchorElement; + const href = a.getAttribute("href") || ""; + if (seen.has(href)) continue; + seen.add(href); + + const name = a.textContent?.trim() || ""; + if (!name || name.length < 3) continue; + + const container = a.closest("li") || a.closest("article") || a.parentElement?.parentElement; + const priceEl = container?.querySelector("[class*=\"price\"]"); + const price = priceEl?.textContent?.trim() || ""; + const slug = href.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || ""; + + results.push({ name, href, price, stock: "", partNumber: slug }); + } + } + + return results; + }); + + log.info(` Found ${productData.length} products on collection page`); + + for (const item of productData) { + if (!item.href) continue; + + const fullUrl = item.href.startsWith("http") ? item.href : `${BASE_URL}${item.href}`; + + // If we already have price data from the listing, store it directly + if (item.price) { + const { price, currency } = parsePrice(item.price); + const speedInfo = detectSpeed(item.name); + if (price > 0) { + products.push({ + partNumber: item.partNumber || item.name.slice(0, 80), + name: item.name, + price, + currency: currency === "USD" ? "GBP" : currency, // ATGBICS is GBP — parsePrice may default to USD if no symbol on listing + stockLevel: item.stock ? parseStockLevel(item.stock) : "in_stock", + quantity: item.stock ? parseQuantity(item.stock) : undefined, + url: fullUrl, + formFactor: detectFormFactor(item.name), + speedGbps: speedInfo?.speedGbps, + speed: speedInfo?.speed, + reachLabel: detectReach(item.name), + fiberType: detectFiberType(item.name), + }); + } + } + } + + // Enqueue next page if pagination exists + await enqueueLinks({ + selector: "a[href*=\"?page=\"], a.pagination__next, a[rel=\"next\"], .pagination a[href]", + transformRequestFunction: (req) => { + if (pagesScraped >= MAX_PAGES) return false; + return req; + }, + }); + + pagesScraped++; + } else { + // Product detail page — extract precise data + const data = await page.evaluate(() => { + const title = document.querySelector( + "h1.product__title, h1.product-title, h1.product_title, h1" + )?.textContent?.trim() || ""; + + // Shopify price — prefer sale price if available + const salePriceEl = document.querySelector( + ".price__sale .price-item--sale, .product__price .money, [class*=\"price\"] .money, [data-product-price], .price ins" + ); + const priceText = salePriceEl?.textContent?.trim() || ""; + + // Stock / availability + const stockEl = document.querySelector( + ".product__availability, .availability, [class*=\"stock\"], [class*=\"inventory\"], .badge--sold-out, .badge--in-stock" + ); + const stockText = stockEl?.textContent?.trim() || ""; + + // Quantity badge (some Shopify themes show "X in stock") + const qtyEl = document.querySelector("[class*=\"quantity\"], [class*=\"inventory-count\"]"); + const qtyText = qtyEl?.textContent?.trim() || ""; + + // Short description / variant title for reach/fiber info + const descEl = document.querySelector( + ".product__description, .product-description, .rte p:first-child, .product__short-description" + ); + const description = descEl?.textContent?.trim() || ""; + + // SKU / part number (Shopify often exposes this) + const skuEl = document.querySelector(".product__sku, [class*=\"sku\"], [itemprop=\"sku\"]"); + const sku = skuEl?.textContent?.replace(/SKU[:\s]*/i, "").trim() || ""; + + return { title, priceText, stockText, qtyText, description, sku }; + }); + + const slug = url.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || ""; + const partNumber = data.sku || slug; + const name = data.title || slug; + + const combinedText = `${name} ${data.description}`; + const { price, currency } = parsePrice(data.priceText); + + if (price > 0) { + const speedInfo = detectSpeed(combinedText); + products.push({ + partNumber, + name, + price, + currency: currency === "USD" ? "GBP" : currency, // ATGBICS prices in GBP + stockLevel: data.stockText ? parseStockLevel(data.stockText) : "in_stock", + quantity: data.qtyText ? parseQuantity(data.qtyText) : undefined, + url, + formFactor: detectFormFactor(combinedText), + speedGbps: speedInfo?.speedGbps, + speed: speedInfo?.speed, + reachLabel: detectReach(combinedText), + fiberType: detectFiberType(combinedText), + }); + } + + pagesScraped++; + } + }, + }); + + const startUrls = CATEGORY_URLS.map((path) => `${BASE_URL}${path}`); + await crawler.run(startUrls); + + console.log(`\nPages scraped: ${pagesScraped}`); + console.log(`Products found: ${products.length}`); + + // Deduplicate by partNumber — prefer product detail page data (more precise) + const uniqueProducts = new Map(); + for (const p of products) { + const key = p.partNumber || p.name; + const existing = uniqueProducts.get(key); + // Keep the entry with a non-GBP-forced currency (i.e., product detail page which has £ symbol) + if (!existing || existing.currency === "GBP" && p.currency !== "GBP") { + uniqueProducts.set(key, p); + } else if (!existing) { + uniqueProducts.set(key, p); + } + } + + // Write to database + let written = 0; + let skipped = 0; + + for (const p of uniqueProducts.values()) { + try { + const transceiverId = await findOrCreateScrapedTransceiver({ + partNumber: p.partNumber, + vendorId, + formFactor: p.formFactor, + speedGbps: p.speedGbps, + speed: p.speed, + reachLabel: p.reachLabel, + fiberType: p.fiberType, + category: "DataCenter", + }); + + const hash = contentHash({ price: p.price, stock: p.stockLevel, qty: p.quantity }); + const isNew = await upsertPriceObservation({ + transceiverId, + sourceVendorId: vendorId, + price: p.price, + currency: p.currency, + stockLevel: p.stockLevel, + quantityAvailable: p.quantity, + url: p.url, + contentHash: hash, + }); + + if (isNew) written++; + else skipped++; + } catch (err) { + console.error(` Error: ${p.partNumber}:`, (err as Error).message); + } + } + + console.log(`\nDatabase: ${written} new, ${skipped} unchanged (${uniqueProducts.size} unique)`); + console.log("=== ATGBICS Scraper Complete ===\n"); +} + +if (require.main === module) { + scrapeAtgbics() + .then(() => pool.end()) + .catch((err) => { + console.error("Fatal:", err); + pool.end(); + process.exit(1); + }); +} diff --git a/packages/scraper/src/scrapers/prolabs.ts b/packages/scraper/src/scrapers/prolabs.ts new file mode 100644 index 0000000..125d90b --- /dev/null +++ b/packages/scraper/src/scrapers/prolabs.ts @@ -0,0 +1,351 @@ +/** + * ProLabs Scraper — Enterprise-grade compatible optics (Legrand subsidiary) + * + * prolabs.com — Server-rendered HTML with public USD pricing. + * Products listed under /products/networking/fiber-optics/ category pages. + * Pagination via ?page=N. Rate limited: 1 req/2sec. Max 100 pages. + * + * SKU format examples: "Q-4X10G-LR-PR", "SFP-10G-SR-PR", "Q28-100G-LR4-PR" + */ +import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; +import { contentHash } from "../utils/hash"; + +const BASE = "https://www.prolabs.com"; +const HEADERS = { + "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", + Accept: "text/html,application/xhtml+xml", +}; + +const MAX_PAGES = 100; + +const CATEGORIES = [ + { path: "/products/networking/fiber-optics/sfp-modules", formFactor: "SFP", speed: "1G", speedGbps: 1 }, + { path: "/products/networking/fiber-optics/sfp-plus-modules", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, + { path: "/products/networking/fiber-optics/sfp28-modules", formFactor: "SFP28", speed: "25G", speedGbps: 25 }, + { path: "/products/networking/fiber-optics/qsfp-plus-modules", formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, + { path: "/products/networking/fiber-optics/qsfp28-modules", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, + { path: "/products/networking/fiber-optics/qsfp-dd-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, + { path: "/products/networking/fiber-optics/coherent-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, + // Broad fallback category in case above paths differ on the live site + { path: "/products/networking/fiber-optics", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, +]; + +interface Product { + partNumber: string; + name: string; + url: string; + price?: number; + stockStatus?: string; + formFactor: string; + speed: string; + speedGbps: number; + reachLabel?: string; + reachMeters?: number; + fiberType?: string; + wavelength?: string; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function detectReach(text: string): { label: string; meters: number } | undefined { + const patterns: [RegExp, string, number][] = [ + [/\b120\s*km\b/i, "120km", 120000], + [/\b80\s*km\b/i, "80km", 80000], + [/\b40\s*km\b/i, "40km", 40000], + [/\b20\s*km\b/i, "20km", 20000], + [/\b10\s*km\b/i, "10km", 10000], + [/\b2\s*km\b/i, "2km", 2000], + [/\b550\s*m\b/i, "550m", 550], + [/\b500\s*m\b/i, "500m", 500], + [/\b400\s*m\b/i, "400m", 400], + [/\b300\s*m\b/i, "300m", 300], + [/\b150\s*m\b/i, "150m", 150], + [/\b100\s*m\b/i, "100m", 100], + [/\b30\s*m\b/i, "30m", 30], + [/\bLR4\b/, "10km", 10000], + [/\bLR\b/, "10km", 10000], + [/\bER4?\b/, "40km", 40000], + [/\bZR4?\b/, "80km", 80000], + [/\bSR4?\b/, "300m", 300], + [/\bDR4?\b/, "500m", 500], + [/\bFR4?\b/, "2km", 2000], + ]; + for (const [regex, label, meters] of patterns) { + if (regex.test(text)) return { label, meters }; + } + return undefined; +} + +function detectFiber(text: string): string { + if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; + if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; + if (/copper|dac|twinax|rj.?45|base-t|cat[56x]/i.test(text)) return "Copper"; + return ""; +} + +function detectWavelength(text: string): string { + const match = text.match(/(\d{3,4})\s*nm/i); + return match ? match[1] : ""; +} + +/** + * Infer form factor and speed from ProLabs SKU prefixes when category context + * is not specific enough (e.g. when crawling the broad fallback category). + * + * ProLabs SKU prefix conventions: + * Q- -> QSFP+ 40G + * Q28- -> QSFP28 100G + * QDD- -> QSFP-DD 400G + * SFP28- -> SFP28 25G + * SFP- -> SFP+ 10G (most common ProLabs prefix) + * S- -> SFP 1G + */ +function inferFromSku(sku: string, cat: typeof CATEGORIES[number]): { + formFactor: string; + speed: string; + speedGbps: number; +} { + const upper = sku.toUpperCase(); + if (/^QDD[-_]|QSFP.DD/i.test(upper)) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }; + if (/^Q28[-_]|QSFP28/i.test(upper)) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 }; + if (/^Q[-_]4X|^Q[-_]/i.test(upper) && !/28/i.test(upper.slice(0, 5))) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 }; + if (/^SFP28[-_]|SFP-25/i.test(upper)) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 }; + if (/^S[-_]/i.test(upper) && !/sfp/i.test(upper.slice(1, 4))) return { formFactor: "SFP", speed: "1G", speedGbps: 1 }; + return { formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps }; +} + +/** + * Parse product listings from a ProLabs category page. + * + * ProLabs uses a standard e-commerce layout: + * - Product cards with an link containing the product URL and name + * - Price in a span with "price" in class or as "$XX.XX" text nearby + * - SKU / part number in the URL slug + * - Stock badge: "In Stock" / "Out of Stock" / "Call for Availability" + * + * We parse with lightweight regex on collapsed HTML — same approach as gbics.ts + * and sfpcables.ts (no DOM parser dependency). + */ +function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] { + const products: Product[] = []; + const collapsed = html.replace(/\s+/g, " "); + + // Strategy 1: product cards with structured href containing a SKU-like segment + // Match anchor tags whose href is a deep product path ending in a SKU pattern + const productLinkRegex = /href="(\/products\/[^"]*?\/([A-Z0-9][A-Z0-9\-_]{3,}(?:-PR)?))"\s[^>]*>([^<]{10,})<\/a>/gi; + let match: RegExpExecArray | null; + + while ((match = productLinkRegex.exec(collapsed)) !== null) { + const relUrl = match[1]; + const skuFromUrl = match[2]; + const linkText = match[3].trim(); + + // Skip navigation / filter / pagination links + if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue; + if (linkText.length > 200) continue; + + const url = BASE + relUrl; + const partNumber = skuFromUrl.slice(0, 80); + const name = linkText.length > 10 ? linkText : partNumber; + + // Look for price in a 700-char window after the match position + const context = collapsed.slice(Math.max(0, match.index - 100), match.index + 700); + const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/) || + context.match(/price[^>]*>\s*\$?\s*([\d,]+\.?\d{0,2})/i); + const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined; + + const stockMatch = context.match(/(in[\s-]stock|out[\s-]of[\s-]stock|call for availability|available|backordered)/i); + const stockStatus = stockMatch ? stockMatch[1].toLowerCase() : undefined; + + const combined = name + " " + partNumber; + const reach = detectReach(combined); + const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat); + + products.push({ + partNumber, name, url, + price: price && price > 0 && price < 100000 ? price : undefined, + stockStatus, + formFactor, speed, speedGbps, + reachLabel: reach?.label, + reachMeters: reach?.meters, + fiberType: detectFiber(combined), + wavelength: detectWavelength(combined), + }); + } + + // Strategy 2: Fallback — any link to a /products/ URL that has a $ price nearby + if (products.length === 0) { + const altRegex = /href="(\/products\/[^"]{10,})"/gi; + while ((match = altRegex.exec(collapsed)) !== null) { + const relUrl = match[1]; + if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue; + + const context = collapsed.slice(Math.max(0, match.index - 50), match.index + 800); + const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/); + if (!priceMatch) continue; + + const price = parseFloat(priceMatch[1].replace(",", "")); + const nameMatch = context.match(/<(?:h[23]|strong|span)[^>]*>([^<]{10,150})<\//i); + const name = nameMatch ? nameMatch[1].trim() : relUrl.split("/").pop() || ""; + const partNumber = (relUrl.split("/").pop() ?? name).slice(0, 80); + + const url = BASE + relUrl; + const combined = name + " " + partNumber; + const reach = detectReach(combined); + const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat); + + products.push({ + partNumber, name, url, + price: price > 0 && price < 100000 ? price : undefined, + formFactor, speed, speedGbps, + reachLabel: reach?.label, + reachMeters: reach?.meters, + fiberType: detectFiber(combined), + wavelength: detectWavelength(combined), + }); + } + } + + // Deduplicate by URL + const seen = new Set(); + return products.filter((p) => { + if (seen.has(p.url)) return false; + seen.add(p.url); + return true; + }); +} + +/** Check if the HTML contains a link to the next pagination page. */ +function hasNextPage(html: string, currentPage: number): boolean { + if (/rel="next"/i.test(html)) return true; + const nextPageNum = currentPage + 1; + const pattern = new RegExp(`[?&]page=${nextPageNum}`, "i"); + return pattern.test(html); +} + +async function fetchPage(url: string): Promise { + const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); + if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); + return resp.text(); +} + +function normalizeStockLevel( + raw?: string +): "in_stock" | "low_stock" | "out_of_stock" | "on_request" { + if (!raw) return "on_request"; + const lower = raw.toLowerCase(); + if (lower.includes("in stock") || lower.includes("available")) return "in_stock"; + if (lower.includes("out of stock") || lower.includes("backordered")) return "out_of_stock"; + if (lower.includes("low stock") || lower.includes("limited")) return "low_stock"; + return "on_request"; +} + +export async function scrapeProLabs(): Promise { + console.log("=== ProLabs Scraper Starting ===\n"); + + const vendorId = await ensureVendor( + "ProLabs", + "compatible", + "https://www.prolabs.com", + "https://www.prolabs.com/products/networking/fiber-optics" + ); + + let totalProducts = 0; + let priceUpdates = 0; + const seenUrls = new Set(); + + for (const cat of CATEGORIES) { + console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`); + + let page = 1; + let pagesThisCat = 0; + let productsThisCat = 0; + + while (page <= MAX_PAGES) { + const url = page === 1 + ? `${BASE}${cat.path}` + : `${BASE}${cat.path}?page=${page}`; + + try { + const html = await fetchPage(url); + const pageProducts = parseProductList(html, cat); + + // Global dedup: broad fallback category overlaps with specific ones + const newProducts = pageProducts.filter((p) => !seenUrls.has(p.url)); + newProducts.forEach((p) => seenUrls.add(p.url)); + + console.log(` Page ${page}: ${pageProducts.length} found, ${newProducts.length} new`); + + for (const product of newProducts) { + try { + const txId = await findOrCreateScrapedTransceiver({ + partNumber: product.partNumber, + vendorId, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + reachMeters: product.reachMeters, + reachLabel: product.reachLabel, + fiberType: product.fiberType, + wavelengths: product.wavelength, + category: "DataCenter", + }); + + if (product.price && product.price > 0) { + const hash = contentHash({ + price: product.price, + part: product.partNumber, + stock: product.stockStatus ?? "", + }); + const updated = await upsertPriceObservation({ + transceiverId: txId, + sourceVendorId: vendorId, + price: product.price, + currency: "USD", + stockLevel: normalizeStockLevel(product.stockStatus), + url: product.url, + contentHash: hash, + }); + if (updated) priceUpdates++; + } + + productsThisCat++; + totalProducts++; + } catch (err) { + console.warn(` DB error [${product.partNumber}]: ${(err as Error).message.slice(0, 80)}`); + } + } + + pagesThisCat++; + + if (pageProducts.length === 0 || !hasNextPage(html, page)) break; + + page++; + await sleep(2000); + } catch (err) { + console.error(` Page ${page} failed: ${(err as Error).message}`); + break; + } + } + + console.log(` Category done: ${productsThisCat} products across ${pagesThisCat} page(s)`); + + if (cat !== CATEGORIES[CATEGORIES.length - 1]) { + await sleep(2000); + } + } + + console.log(`\n=== ProLabs Complete: ${totalProducts} products processed, ${priceUpdates} price updates ===`); +} + +if (require.main === module) { + scrapeProLabs() + .then(() => pool.end()) + .catch((err) => { + console.error("Fatal:", err); + pool.end(); + process.exit(1); + }); +} diff --git a/sql/006-seed-knowledge-base.sql b/sql/006-seed-knowledge-base.sql new file mode 100644 index 0000000..9a77e40 --- /dev/null +++ b/sql/006-seed-knowledge-base.sql @@ -0,0 +1,121 @@ +-- Knowledge Base seed data: Troubleshooting, FAQ, Best Practices, Known Issues +-- Run: docker exec -i tip-postgres psql -U tip -d transceiver_db < sql/006-seed-knowledge-base.sql + +-- Clean test data +DELETE FROM knowledge_base WHERE subcategory = 'test'; + +-- === TROUBLESHOOTING === + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'tx_power', 'Low Tx power alarm on SFP+ SR module', + 'Tx power dropped below -11.0 dBm on a module rated for -8.2 to +0.5 dBm. This indicates laser degradation. The laser is approaching end-of-life — you have approximately 2-4 weeks before complete failure. Replace during the next maintenance window, do not wait for an unplanned outage.', + '{SFP+}', '{10G}', 'high', '{tx_power,laser,degradation,alarm}', + '[{"step": 1, "action": "Check DOM readings: show interface transceiver details"}, {"step": 2, "action": "Compare Tx power to module spec (-8.2 to +0.5 dBm for SR)"}, {"step": 3, "action": "If Tx < -11.0 dBm, schedule replacement"}, {"step": 4, "action": "Order spare and replace in next maintenance window"}]'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'tx_power', 'Low Tx power on QSFP-DD DR4 — per-lane diagnostics', + 'QSFP-DD DR4 modules have 4 independent lanes, each rated for -2.9 to +3.0 dBm Tx power. If only one lane shows low power, the module has a failing lane laser. If all lanes drop, check the module temperature first — overheating causes power rollback. Use per-lane DOM: show interface transceiver details.', + '{QSFP-DD}', '{400G}', 'high', '{tx_power,qsfp-dd,per_lane,dom}', + '[{"step": 1, "action": "show interface transceiver details — check per-lane Tx power"}, {"step": 2, "action": "Check module temperature (alarm above 75C)"}, {"step": 3, "action": "If single lane low: failing laser, replace module"}, {"step": 4, "action": "If all lanes low + high temp: improve airflow first"}]'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'ber_errors', 'High pre-FEC BER on 100G QSFP28 link', + 'Pre-FEC BER above 2.4e-4 (KP4 FEC threshold) means the Forward Error Correction is struggling. Common causes: dirty fiber end-faces (40% of cases), fiber type mismatch (SMF cable on MMF optic), or exceeded power budget. Post-FEC errors (uncorrected) mean the FEC has lost the fight — the link will drop packets.', + '{QSFP28}', '{100G}', 'high', '{ber,fec,kp4,errors,pre-fec}', + '[{"step": 1, "action": "show interface counters errors — check CRC and FEC counters"}, {"step": 2, "action": "If CRC > 100/min: inspect and clean fiber end-faces"}, {"step": 3, "action": "If CRC > 10000/min: check fiber type match (SMF vs MMF)"}, {"step": 4, "action": "Calculate power budget: Tx - losses >= Rx sensitivity + 3dB margin"}]'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'temperature', 'Transceiver temperature alarm in top-of-rack switch', + 'Top-of-rack switches run hotter because heat rises. A transceiver rated for 0-70C (COM) will alarm above 75C. Common in high-density spine switches with poor airflow. Before replacing the optic, fix the thermal environment. An overheating laser degrades 10x faster than a properly cooled one.', + '{SFP+,QSFP28,QSFP-DD,OSFP}', '{10G,100G,400G}', 'medium', '{temperature,thermal,overheating,airflow}', + '[{"step": 1, "action": "show interface transceiver details — check temperature"}, {"step": 2, "action": "Verify fan tray status and speed"}, {"step": 3, "action": "Install blanking panels in empty slots"}, {"step": 4, "action": "Consider IND-rated (-40 to +85C) modules if environment is harsh"}]'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'fiber_mismatch', 'Link down: SMF optic with MMF patch cable', + 'A common deployment mistake: using a multimode fiber patch cable with a single-mode optic (LR/ER/ZR modules). The core diameter mismatch (9um SMF vs 50um MMF) causes massive signal loss. Symptoms: link stays down, Rx power extremely low despite good Tx. Always verify fiber type: SR = MMF (orange cable), LR/ER/ZR/DR/FR = SMF (yellow cable).', + '{SFP+,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G}', 'critical', '{fiber,mismatch,smf,mmf,link_down}', + '[{"step": 1, "action": "Check optic type: SR = MMF, LR/ER/ZR/DR/FR = SMF"}, {"step": 2, "action": "Verify patch cable color: orange = MMF, yellow = SMF"}, {"step": 3, "action": "Replace patch cable with correct type"}, {"step": 4, "action": "Verify link comes up and check Rx power"}]'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'coherent', '400ZR link not establishing — OSNR too low', + 'Coherent 400ZR optics require OSNR > 20 dB for reliable operation with 16QAM modulation. Unlike direct-detect modules, coherent links fail silently when OSNR drops. Check Tx power (-10.0 to +2.0 dBm), OSNR at receiver (> 20 dB), and chromatic dispersion within module compensation range. For DWDM, verify channel plan alignment.', + '{QSFP-DD,OSFP}', '{400G}', 'high', '{coherent,400zr,osnr,dwdm}', + '[{"step": 1, "action": "show interfaces diagnostics optics — check OSNR and CD"}, {"step": 2, "action": "Verify OSNR > 20 dB"}, {"step": 3, "action": "Check Tx power range: -10.0 to +2.0 dBm"}, {"step": 4, "action": "Verify DWDM channel plan alignment"}]'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'power_budget', 'Link flapping on long-distance 10G LR link', + 'A 10G LR (1310nm SMF) rated for 10km flaps at 8km. Power budget: Tx -6.0 dBm, fiber 8km x 0.35 dB/km = 2.8 dB, 4 connectors x 0.3 dB = 1.2 dB, 2 splices x 0.1 dB = 0.2 dB. Total loss: 4.2 dB. Margin: 4.2 dB looks fine. But: two dirty connectors at +1.5 dB each = 3.0 dB extra. New margin: 1.2 dB — below 3 dB safety threshold. Fix: clean all connectors.', + '{SFP+}', '{10G}', 'medium', '{power_budget,flapping,distance,connector,cleaning}', + '[{"step": 1, "action": "Calculate complete power budget"}, {"step": 2, "action": "Include ALL connectors and patch panels"}, {"step": 3, "action": "Add 0.5-1.5 dB per dirty connector"}, {"step": 4, "action": "Clean all fiber end-faces"}, {"step": 5, "action": "Verify with optical power meter"}]'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES +('troubleshooting', 'intermittent', 'Transceiver intermittently drops — CRC bursts', + 'CRC error bursts (100-500 per minute, then clean for hours) usually indicate a micro-bend in the fiber or a loose connector. The fiber moves slightly with vibration or temperature changes. Check the physical fiber path: tight bends below minimum radius, cables pinched under trays, or connectors not fully seated.', + '{SFP+,QSFP28,QSFP-DD}', '{10G,100G,400G}', 'medium', '{crc,intermittent,microbend,connector}', + '[{"step": 1, "action": "show interface counters errors — correlate bursts with timestamps"}, {"step": 2, "action": "Check if errors correlate with HVAC cycles"}, {"step": 3, "action": "Inspect fiber path for tight bends (min radius: 30mm)"}, {"step": 4, "action": "Reseat all connectors"}, {"step": 5, "action": "Use OTDR to find fault point"}]'); + +-- === FAQ === + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'compatibility', 'Can I use compatible transceivers in Cisco/Arista/Juniper switches?', + 'Yes, in most cases. Cisco Nexus uses "service unsupported-transceiver", Arista allows them by default, Juniper may need "set chassis fpc pic port allow-unsupported-sfp". The EEPROM coding must match the switch vendor. Compatible vendors like Flexoptix code modules to match the target platform. Always test 10 units before bulk ordering.', + '{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{compatibility,third-party,vendor-lock,coding}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'compatibility', 'What is transceiver EEPROM coding?', + 'Every transceiver has an EEPROM storing its identity: vendor name, part number, serial, speeds, and calibration data. Switches read this to identify the module. Some vendors check for their vendor ID and may reject third-party modules. Compatible vendors program the EEPROM to match the target platform — same hardware, different EEPROM programming.', + '{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{eeprom,coding,compatibility}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'technology', 'What is the difference between QSFP-DD and OSFP?', + 'Both are 400G+ form factors. QSFP-DD is backward-compatible with QSFP28 cages — same width, just longer. OSFP is wider, allowing better thermal dissipation for high-power coherent modules (20W+). QSFP-DD dominates hyperscale data centers (more ports per linecard), OSFP is preferred for telecom/coherent where thermal headroom matters more than density.', + '{QSFP-DD,OSFP}', '{400G,800G}', 'info', '{qsfp-dd,osfp,form-factor,comparison}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'technology', 'What is 400ZR and when should I use it?', + '400ZR is a coherent pluggable standard (OIF) that packs DSP, laser, and modulator into QSFP-DD or OSFP. Enables 400G over 80+ km on single wavelength without external line equipment. Use for DCI between campuses. Do not use for intra-DC links under 2km (DR4/FR4 cheaper) or ultra-long-haul >120km (needs ZR+ or traditional line systems). Power: 15-20W.', + '{QSFP-DD,OSFP}', '{400G}', 'info', '{400zr,coherent,dci,pluggable}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'purchasing', 'How much can I save with compatible vs OEM transceivers?', + 'Typical savings: SFP+ 10G: 5-10x ($15 vs $80-150). SFP28 25G: 3-5x ($20-35 vs $100-180). QSFP28 100G: 4-8x ($45-120 vs $300-900). QSFP-DD 400G: 2-4x ($250-500 vs $900-3200). Gap narrows at higher speeds because silicon cost dominates. For 400ZR coherent: ~50% savings only because the DSP is the main cost.', + '{SFP+,SFP28,QSFP28,QSFP-DD}', '{10G,25G,100G,400G}', 'info', '{pricing,cost,savings,compatible,oem}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'technology', 'What does DOM (Digital Optical Monitoring) show?', + 'DOM gives real-time telemetry: Tx Power (dBm), Rx Power (dBm), Temperature (C), Supply Voltage (V), Laser Bias Current (mA). Each has 4 alarm thresholds. Monitor Tx power for laser health (trending down = dying), Rx power for link quality, temperature for environment. CLI: show interface transceiver details.', + '{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{dom,monitoring,diagnostics,telemetry}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'deployment', 'How do I calculate a fiber link power budget?', + 'Power Budget = Tx Power - Total Loss. Total Loss = Fiber Loss + Connector Loss + Splice Loss. Fiber: 0.35 dB/km at 1310nm, 0.22 dB/km at 1550nm. Connector: 0.3 dB each. Splice: 0.1 dB each. Always reserve 3 dB margin. Example: 10G LR 8km, 4 connectors, 2 splices: Tx -6.0, loss 4.2 dB, Rx -10.2, sensitivity -14.4, margin 4.2 dB.', + '{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G}', 'info', '{power_budget,calculation,fiber_loss}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('faq', 'deployment', 'SR vs LR vs ER vs ZR vs DR vs FR — what do reach codes mean?', + 'SR (Short Reach): MMF, 100-300m, within-rack. LR (Long Reach): SMF, 10km, inter-building. ER (Extended Reach): SMF, 40km, metro. ZR (Very Long Reach): SMF, 80km, DCI. DR (Data center Reach): SMF, 500m, inter-pod parallel. FR (2km Reach): SMF, 2km, campus. Each uses different wavelengths — both ends must match.', + '{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{reach,sr,lr,er,zr,dr,fr}'); + +-- === BEST PRACTICES === + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('best_practice', 'operations', 'Always clean fiber connectors before inserting transceivers', + '40% of transceiver RMAs test fine at the vendor — the problem was dirty connectors. One fingerprint adds 1-2 dB loss. Use IPA-based cleaning pen or cassette, verify with 200x inspection scope. Never blow on connectors. Cost of cleaning: $0.50. Cost of unnecessary RMA: $50-200 plus weeks of lead time.', + '{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{cleaning,connector,best_practice,rma}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('best_practice', 'procurement', 'Test compatible transceivers in small batches before bulk ordering', + 'Never order 200 compatible optics based on datasheet alone. Buy 10, install in production switches, run for 2 weeks monitoring DOM, BER, temperature. Check: vendor authentication passes? DOM accurate? Any CRC errors? Survives switch reboot? Only after successful pilot, order the full batch.', + '{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{testing,procurement,pilot}'); + +-- === KNOWN ISSUES === + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('known_issue', 'interop', 'Cisco Nexus 9000 vendor-locking on QSFP-DD modules', + 'Cisco Nexus 9000 with NX-OS 10.2+ enforces stricter vendor checks on QSFP-DD. Compatible modules may log persistent "unsupported transceiver" warnings. Fix: "service unsupported-transceiver" in global config, then "no shut". Some firmware versions need "hardware profile transceiver-frequency default" for DWDM. Test with exact NX-OS version.', + '{QSFP-DD}', '{400G}', 'medium', '{cisco,nexus,vendor-lock,nx-os}'); + +INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES +('known_issue', 'interop', 'Arista 400G FEC negotiation mismatch with compatible optics', + 'Arista 7060X5/7260X defaults to specific FEC mode (RS-FEC CL119) for 400G. If compatible QSFP-DD advertises different FEC, link stays "notconnect" despite good power. Fix: manually set "fec rs-fec" or "fec cl119" under interface config. Negotiation issue, not hardware.', + '{QSFP-DD,OSFP}', '{400G}', 'medium', '{arista,fec,negotiation,400g}'); diff --git a/sql/006-whitebox-switches.sql b/sql/006-whitebox-switches.sql new file mode 100644 index 0000000..7db336f --- /dev/null +++ b/sql/006-whitebox-switches.sql @@ -0,0 +1,89 @@ +-- TIP: Transceiver Intelligence Platform +-- Migration 006: Whitebox / Open Networking Switch Extensions +-- +-- Adds columns for whitebox ODM/OEM switches, open networking OS support, +-- OCP compliance, and hardware details needed for disaggregated networking. + +-- ============================================================ +-- EXTEND switches TABLE with whitebox-specific columns +-- ============================================================ + +-- Whitebox classification +ALTER TABLE switches ADD COLUMN IF NOT EXISTS is_whitebox BOOLEAN DEFAULT FALSE; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS is_ocp_accepted BOOLEAN DEFAULT FALSE; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS ocp_status TEXT CHECK (ocp_status IN ('Accepted', 'Inspired', 'None', NULL)); + +-- Open Networking OS support +ALTER TABLE switches ADD COLUMN IF NOT EXISTS supported_nos TEXT[] DEFAULT '{}'; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS onl_compatible BOOLEAN DEFAULT FALSE; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS dent_compatible BOOLEAN DEFAULT FALSE; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS cumulus_compatible BOOLEAN DEFAULT FALSE; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS fboss_compatible BOOLEAN DEFAULT FALSE; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS stratum_compatible BOOLEAN DEFAULT FALSE; + +-- Hardware details (important for whitebox — CPU/RAM/storage determine NOS capability) +ALTER TABLE switches ADD COLUMN IF NOT EXISTS cpu TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS cpu_cores INTEGER; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS ram_gb NUMERIC; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS storage_gb NUMERIC; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS storage_type TEXT; + +-- ASIC generation tracking (extends existing asic_vendor/asic_model) +ALTER TABLE switches ADD COLUMN IF NOT EXISTS asic_series TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS asic_process_nm INTEGER; + +-- Physical / form factor +ALTER TABLE switches ADD COLUMN IF NOT EXISTS front_panel_ports TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS mgmt_ports TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS console_ports TEXT; + +-- Transceiver form factors supported (derived from ports_config, but explicit for search) +ALTER TABLE switches ADD COLUMN IF NOT EXISTS transceiver_form_factors TEXT[] DEFAULT '{}'; + +-- External references +ALTER TABLE switches ADD COLUMN IF NOT EXISTS catalog_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS sonic_hwsku TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS onie_support BOOLEAN DEFAULT FALSE; + +-- Scraping metadata +ALTER TABLE switches ADD COLUMN IF NOT EXISTS last_scraped TIMESTAMPTZ; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS scrape_source TEXT; + +-- ============================================================ +-- INDEXES for whitebox queries +-- ============================================================ +CREATE INDEX IF NOT EXISTS idx_switches_is_whitebox ON switches (is_whitebox) WHERE is_whitebox = TRUE; +CREATE INDEX IF NOT EXISTS idx_switches_sonic ON switches (sonic_compatible) WHERE sonic_compatible = TRUE; +CREATE INDEX IF NOT EXISTS idx_switches_ocp ON switches (is_ocp_accepted) WHERE is_ocp_accepted = TRUE; +CREATE INDEX IF NOT EXISTS idx_switches_asic_series ON switches (asic_series); +CREATE INDEX IF NOT EXISTS idx_switches_supported_nos ON switches USING GIN (supported_nos); +CREATE INDEX IF NOT EXISTS idx_switches_transceiver_ff ON switches USING GIN (transceiver_form_factors); + +-- ============================================================ +-- UPDATE search vector trigger to include whitebox fields +-- ============================================================ +CREATE OR REPLACE FUNCTION switches_search_vector_update() RETURNS trigger AS $$ +BEGIN + NEW.search_vector := + setweight(to_tsvector('english', COALESCE(NEW.model, '')), 'A') || + setweight(to_tsvector('english', COALESCE(NEW.series, '')), 'A') || + setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'B') || + setweight(to_tsvector('english', COALESCE(NEW.asic_vendor, '')), 'B') || + setweight(to_tsvector('english', COALESCE(NEW.asic_model, '')), 'C') || + setweight(to_tsvector('english', COALESCE(NEW.asic_series, '')), 'C') || + setweight(to_tsvector('english', COALESCE(NEW.sonic_hwsku, '')), 'C') || + setweight(to_tsvector('english', COALESCE(NEW.cpu, '')), 'D') || + setweight(to_tsvector('english', COALESCE(array_to_string(NEW.supported_nos, ' '), '')), 'C') || + setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D'); + NEW.updated_at := NOW(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- ============================================================ +-- EXTEND vendors TABLE for whitebox ODMs +-- ============================================================ +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS is_odm BOOLEAN DEFAULT FALSE; +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS is_whitebox_vendor BOOLEAN DEFAULT FALSE; +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS ocp_member BOOLEAN DEFAULT FALSE; +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS sonic_contributor BOOLEAN DEFAULT FALSE; diff --git a/sql/007-flexoptix-vendors.sql b/sql/007-flexoptix-vendors.sql new file mode 100644 index 0000000..0c3485b --- /dev/null +++ b/sql/007-flexoptix-vendors.sql @@ -0,0 +1,18 @@ +-- TIP: Transceiver Intelligence Platform +-- Migration 007: Flexoptix Supported Vendors & Vendor Categories +-- +-- Extends vendors table to track which vendors Flexoptix supports +-- and categorize vendors by market segment. + +-- Flexoptix support flag +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS flexoptix_supported BOOLEAN DEFAULT FALSE; + +-- Vendor category (market segment) +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS vendor_category TEXT; + +-- Notes field for additional context +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS notes TEXT; + +-- Indexes +CREATE INDEX IF NOT EXISTS idx_vendors_flexoptix ON vendors (flexoptix_supported) WHERE flexoptix_supported = TRUE; +CREATE INDEX IF NOT EXISTS idx_vendors_category ON vendors (vendor_category); diff --git a/sql/008-product-assets.sql b/sql/008-product-assets.sql new file mode 100644 index 0000000..c487399 --- /dev/null +++ b/sql/008-product-assets.sql @@ -0,0 +1,69 @@ +-- TIP: Transceiver Intelligence Platform +-- Migration 008: Product Assets (Images, Datasheets, Manuals) +-- +-- Adds columns for product images, datasheet PDFs, and manual/guide links +-- to both switches and transceivers tables. + +-- ═══════════════════════════════════════════════════════ +-- SWITCHES: Product assets +-- ═══════════════════════════════════════════════════════ +ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_local_path TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS datasheet_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS datasheet_local_path TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS manual_urls JSONB DEFAULT '[]'::jsonb; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS quick_start_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS cli_reference_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS release_notes_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS product_page_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS eol_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS assets_scraped_at TIMESTAMPTZ; + +-- ═══════════════════════════════════════════════════════ +-- TRANSCEIVERS: Product assets +-- ═══════════════════════════════════════════════════════ +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_url TEXT; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_local_path TEXT; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS datasheet_url TEXT; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS datasheet_local_path TEXT; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS manual_urls JSONB DEFAULT '[]'::jsonb; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS product_page_url TEXT; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS assets_scraped_at TIMESTAMPTZ; + +-- ═══════════════════════════════════════════════════════ +-- VENDORS: Documentation portal URLs +-- ═══════════════════════════════════════════════════════ +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS docs_portal_url TEXT; +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS datasheet_library_url TEXT; +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS image_cdn_base TEXT; +ALTER TABLE vendors ADD COLUMN IF NOT EXISTS support_portal_url TEXT; + +-- ═══════════════════════════════════════════════════════ +-- DOCUMENTS table for downloaded PDFs (datasheets, manuals) +-- ═══════════════════════════════════════════════════════ +CREATE TABLE IF NOT EXISTS product_documents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + switch_id UUID REFERENCES switches(id) ON DELETE CASCADE, + transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE, + vendor_id UUID REFERENCES vendors(id) ON DELETE SET NULL, + doc_type TEXT NOT NULL CHECK (doc_type IN ('datasheet', 'manual', 'quick_start', 'cli_reference', 'release_notes', 'installation_guide', 'compatibility_matrix', 'eol_notice')), + title TEXT NOT NULL, + source_url TEXT NOT NULL, + local_path TEXT, + r2_key TEXT, + file_size_bytes BIGINT, + content_hash TEXT, + page_count INTEGER, + language TEXT DEFAULT 'en', + extracted_text TEXT, + indexed_at TIMESTAMPTZ, + downloaded_at TIMESTAMPTZ DEFAULT NOW(), + created_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT chk_doc_ref CHECK (switch_id IS NOT NULL OR transceiver_id IS NOT NULL) +); + +CREATE INDEX IF NOT EXISTS idx_product_docs_switch ON product_documents(switch_id) WHERE switch_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_product_docs_transceiver ON product_documents(transceiver_id) WHERE transceiver_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_product_docs_type ON product_documents(doc_type); +CREATE INDEX IF NOT EXISTS idx_product_docs_vendor ON product_documents(vendor_id); +CREATE INDEX IF NOT EXISTS idx_product_docs_hash ON product_documents(content_hash); diff --git a/sql/009-seed-switches.sql b/sql/009-seed-switches.sql new file mode 100644 index 0000000..c93e0ce --- /dev/null +++ b/sql/009-seed-switches.sql @@ -0,0 +1,210 @@ +-- TIP: Transceiver Intelligence Platform +-- Migration 009: Seed Switch Data +-- Major network switch models from Cisco, Arista, Juniper, Broadcom-based whitebox + +-- First ensure vendors exist +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('Cisco Systems', 'cisco', 'manufacturer', 'San Jose, CA', 'US', 'https://www.cisco.com') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('Arista Networks', 'arista', 'manufacturer', 'Santa Clara, CA', 'US', 'https://www.arista.com') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('Juniper Networks', 'juniper', 'manufacturer', 'Sunnyvale, CA', 'US', 'https://www.juniper.net') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('Edgecore Networks', 'edgecore', 'manufacturer', 'Hsinchu', 'TW', 'https://www.edge-core.com') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('Celestica', 'celestica', 'manufacturer', 'Toronto', 'CA', 'https://www.celestica.com') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('NVIDIA Networking', 'nvidia-networking', 'manufacturer', 'Santa Clara, CA', 'US', 'https://www.nvidia.com/en-us/networking/') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('Dell Technologies', 'dell', 'manufacturer', 'Round Rock, TX', 'US', 'https://www.dell.com') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('HPE / Aruba', 'hpe-aruba', 'manufacturer', 'San Jose, CA', 'US', 'https://www.arubanetworks.com') +ON CONFLICT (slug) DO NOTHING; + +INSERT INTO vendors (name, slug, type, headquarters, country, website) +VALUES + ('Asterfusion', 'asterfusion', 'manufacturer', 'Beijing', 'CN', 'https://www.asterfusion.com') +ON CONFLICT (slug) DO NOTHING; + +-- Cisco Nexus Data Center +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'N9K-C9364C', 'Nexus 9300', 'DataCenter', 'L3', + '{"100G_QSFP28": 64}'::jsonb, 64, 100, 12.8, 4760, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'cisco'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'N9K-C93600CD-GX', 'Nexus 9300', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 28, "100G_QSFP28": 8}'::jsonb, 36, 400, 12.8, 4760, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'cisco'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'N9K-C9336C-FX2', 'Nexus 9300', 'DataCenter', 'L3', + '{"100G_QSFP28": 36}'::jsonb, 36, 100, 7.2, 2680, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'cisco'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'N9K-C9332D-GX2B', 'Nexus 9300', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'cisco'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, mpls_support) +SELECT v.id, 'N9K-C9508', 'Nexus 9500', 'Core', 'L3', + '{"400G_QSFP-DD": 576}'::jsonb, 576, 400, 230.4, 85000, + 'Cisco', 'Cloud Scale', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'cisco'; + +-- Arista 7000 Series +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support) +SELECT v.id, '7060X6-64PE', 'Arista 7060X', 'DataCenter', 'L3', + '{"800G_OSFP": 64}'::jsonb, 64, 800, 51.2, 19000, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'arista'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support) +SELECT v.id, '7060X5-64', 'Arista 7060X', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 64}'::jsonb, 64, 400, 51.2, 19000, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'arista'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support) +SELECT v.id, '7050X4-32', 'Arista 7050X', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'arista'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support) +SELECT v.id, '7280R3-48YC6', 'Arista 7280R', 'SP', 'L3', + '{"100G_QSFP28": 6, "25G_SFP28": 48}'::jsonb, 54, 100, 6.4, 2380, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'arista'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support) +SELECT v.id, '7800R3-36P-LC', 'Arista 7800R', 'Core', 'L3', + '{"400G_QSFP-DD": 36}'::jsonb, 36, 400, 28.8, 10700, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'arista'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support) +SELECT v.id, '7020R', 'Arista 7020R', 'Campus', 'L3', + '{"1G_SFP": 48, "10G_SFP+": 6}'::jsonb, 54, 10, 0.176, 130, + 'Broadcom', 'Memory Pipeline', 'Active', true, false, false, true +FROM vendors v WHERE v.slug = 'arista'; + +-- Juniper QFX Series +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'QFX5130-32CD', 'QFX5100', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'juniper'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'QFX5220-32CD', 'QFX5200', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'juniper'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'QFX5120-48Y', 'QFX5100', 'DataCenter', 'L3', + '{"25G_SFP28": 48, "100G_QSFP28": 8}'::jsonb, 56, 100, 4.0, 1488, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'juniper'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, mpls_support) +SELECT v.id, 'QFX10008', 'QFX10000', 'Core', 'L3', + '{"400G_QSFP-DD": 288}'::jsonb, 288, 400, 115.2, 42800, + 'Juniper', 'ExpressPlus', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'juniper'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, lifecycle_status, bgp_support, vxlan_support, evpn_support) +SELECT v.id, 'EX4400-48T', 'EX4400', 'Campus', 'L3', + '{"1G_RJ45": 48, "10G_SFP+": 4, "25G_SFP28": 2}'::jsonb, 54, 25, 1.76, 654, + 'Broadcom', 'Active', true, true, true +FROM vendors v WHERE v.slug = 'juniper'; + +-- NVIDIA / Mellanox Spectrum +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, sonic_compatible) +SELECT v.id, 'SN5600', 'Spectrum-4', 'DataCenter', 'L3', + '{"800G_OSFP": 64}'::jsonb, 64, 800, 51.2, 19000, + 'NVIDIA', 'Spectrum-4', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'nvidia-networking'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, sonic_compatible) +SELECT v.id, 'SN4700', 'Spectrum-3', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 12.8, 4760, + 'NVIDIA', 'Spectrum-3', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'nvidia-networking'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, sonic_compatible) +SELECT v.id, 'SN3700', 'Spectrum-2', 'DataCenter', 'L3', + '{"100G_QSFP28": 32}'::jsonb, 32, 100, 6.4, 2380, + 'NVIDIA', 'Spectrum-2', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'nvidia-networking'; + +-- Edgecore Whitebox +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support) +SELECT v.id, 'DCS810', 'AS9516-32D', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 12.8, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'edgecore'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support) +SELECT v.id, 'AS7726-32X', 'AS7726', 'DataCenter', 'L3', + '{"100G_QSFP28": 32}'::jsonb, 32, 100, 6.4, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'edgecore'; + +-- Celestica +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support) +SELECT v.id, 'DS5000', 'Seastone', 'DataCenter', 'L3', + '{"800G_OSFP": 64}'::jsonb, 64, 800, 51.2, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'celestica'; + +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support) +SELECT v.id, 'DS3000', 'Seastone', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 12.8, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'celestica'; + +-- Asterfusion +INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support) +SELECT v.id, 'CX864E-N', 'CX8000', 'DataCenter', 'L3', + '{"400G_QSFP-DD": 64}'::jsonb, 64, 400, 25.6, + 'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true +FROM vendors v WHERE v.slug = 'asterfusion'; + +-- Generate search vectors for switches +UPDATE switches SET search_vector = to_tsvector('english', + coalesce(model, '') || ' ' || + coalesce(series, '') || ' ' || + coalesce(category, '') || ' ' || + coalesce(asic_vendor, '') || ' ' || + coalesce(asic_model, '') +) +WHERE search_vector IS NULL; diff --git a/sql/010-vendor-urls.sql b/sql/010-vendor-urls.sql new file mode 100644 index 0000000..8d33696 --- /dev/null +++ b/sql/010-vendor-urls.sql @@ -0,0 +1,82 @@ +-- 010: Add image_url, product_page_url, datasheet_url columns and populate vendor URLs +-- Run on Erik: PGPASSWORD='***REDACTED***' psql -h localhost -p 5433 -U tip -d transceiver_db -f sql/010-vendor-urls.sql + +-- Add columns (idempotent) +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_url TEXT; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS product_page_url TEXT; +ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS datasheet_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_url TEXT; +ALTER TABLE switches ADD COLUMN IF NOT EXISTS product_page_url TEXT; + +-- FLEXOPTIX product page URLs (strip :variant suffix) +UPDATE transceivers +SET product_page_url = 'https://www.flexoptix.net/en/' || + LOWER(REPLACE(REPLACE( + CASE + WHEN part_number LIKE '%:%' THEN SPLIT_PART(part_number, ':', 1) + ELSE part_number + END, + '.', '-'), ' ', '-')) || '.html' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'FLEXOPTIX') +AND part_number IS NOT NULL +AND product_page_url IS NULL; + +-- 10Gtek product page URLs +UPDATE transceivers +SET product_page_url = 'https://www.10gtek.com/transceiver' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = '10Gtek') +AND product_page_url IS NULL; + +-- Fluxlight product page URLs (pattern: fluxlight.com/{PART_NUMBER}-FL/) +UPDATE transceivers +SET product_page_url = 'https://fluxlight.com/' || REPLACE(COALESCE(part_number, slug), ' ', '-') || '/' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Fluxlight') +AND part_number IS NOT NULL +AND product_page_url IS NULL; + +-- GBICS product page URLs +UPDATE transceivers +SET product_page_url = 'https://gbics.com/compatible-transceivers/' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'GBICS') +AND product_page_url IS NULL; + +-- SFPcables product page URLs +UPDATE transceivers +SET product_page_url = 'https://www.sfpcables.com/' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'SFPcables') +AND product_page_url IS NULL; + +-- Juniper Networks product page URLs (generic Juniper optics page) +UPDATE transceivers +SET product_page_url = 'https://www.juniper.net/us/en/products/pluggable-optics.html' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Juniper Networks') +AND product_page_url IS NULL; + +-- Switch vendor product page URLs +UPDATE switches SET product_page_url = 'https://www.cisco.com/site/us/en/products/networking/switches/index.html' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Cisco') AND product_page_url IS NULL; + +UPDATE switches SET product_page_url = 'https://www.arista.com/en/products/switches' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Arista Networks') AND product_page_url IS NULL; + +UPDATE switches SET product_page_url = 'https://www.juniper.net/us/en/products/switches.html' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Juniper Networks') AND product_page_url IS NULL; + +UPDATE switches SET product_page_url = 'https://www.nvidia.com/en-us/networking/ethernet-switching/' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'NVIDIA') AND product_page_url IS NULL; + +UPDATE switches SET product_page_url = 'https://www.edgecore.com/products' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Edgecore') AND product_page_url IS NULL; + +UPDATE switches SET product_page_url = 'https://www.celestica.com/open-networking-switches' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Celestica') AND product_page_url IS NULL; + +UPDATE switches SET product_page_url = 'https://www.asterfusion.com/products/' +WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Asterfusion') AND product_page_url IS NULL; + +-- Summary +SELECT 'Transceivers with product_page_url' as metric, COUNT(*) as count FROM transceivers WHERE product_page_url IS NOT NULL +UNION ALL +SELECT 'Transceivers with image_url', COUNT(*) FROM transceivers WHERE image_url IS NOT NULL +UNION ALL +SELECT 'Switches with product_page_url', COUNT(*) FROM switches WHERE product_page_url IS NOT NULL;