feat(scrapers+mcp): ATGBICS + ProLabs scrapers, MCP HTTP/SSE server
Scrapers:
- atgbics.ts: PlaywrightCrawler for UK vendor ATGBICS (Shopify store),
scrapes SFP/SFP+/SFP28/QSFP+/QSFP28/QSFP-DD in GBP, max 50 pages/run
- prolabs.ts: HttpCrawler for ProLabs (Legrand subsidiary), USD pricing,
category-driven crawl with reach/fiber/speed detection
- Both registered in scheduler (every 8h, staggered) and index.ts CLI
MCP HTTP Server:
- packages/mcp-server/src/http-server.ts: Express + SSEServerTransport
- Exposes all 12 TIP tools via GET /sse + POST /message
- Bearer token auth (MCP_SECRET env), CORS-configurable
- GET /health → { status: "ok", tools: 12 }
- Port: MCP_HTTP_PORT (default 3201)
SQL + tools:
- sql/006-009: seed scripts for whitebox switches, vendors, assets
- switch-docs.ts: MCP tool for switch documentation queries
This commit is contained in:
parent
83f4acc976
commit
4b452ab49e
99
packages/api/src/embeddings/seed-knowledge-base.ts
Normal file
99
packages/api/src/embeddings/seed-knowledge-base.ts
Normal file
@ -0,0 +1,99 @@
|
||||
/**
|
||||
* Seed FAQ and troubleshooting embeddings in Qdrant from knowledge_base.
|
||||
*
|
||||
* Run: npx tsx packages/api/src/embeddings/seed-knowledge-base.ts
|
||||
*/
|
||||
import { pool } from "../db/client";
|
||||
import { embed, upsertPoints, type CollectionName } from "./client";
|
||||
|
||||
function kbToText(row: Record<string, unknown>): string {
|
||||
const parts = [
|
||||
`Q: ${row.question}`,
|
||||
`A: ${row.answer}`,
|
||||
row.subcategory && `Topic: ${row.subcategory}`,
|
||||
row.applies_to_form_factors && `Form factors: ${(row.applies_to_form_factors as string[]).join(", ")}`,
|
||||
row.applies_to_speeds && `Speeds: ${(row.applies_to_speeds as string[]).join(", ")}`,
|
||||
].filter(Boolean);
|
||||
|
||||
return parts.join(". ");
|
||||
}
|
||||
|
||||
function collectionForCategory(category: string): CollectionName {
|
||||
if (category === "troubleshooting" || category === "known_issue") {
|
||||
return "troubleshooting_embeddings";
|
||||
}
|
||||
return "faq_embeddings";
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
console.log("=== Seeding knowledge_base embeddings ===\n");
|
||||
|
||||
const result = await pool.query(
|
||||
`SELECT id, category, subcategory, question, answer,
|
||||
applies_to_form_factors, applies_to_speeds, severity, tags
|
||||
FROM knowledge_base
|
||||
ORDER BY category, created_at`
|
||||
);
|
||||
|
||||
console.log(`Found ${result.rows.length} knowledge base entries\n`);
|
||||
|
||||
const BATCH_SIZE = 5;
|
||||
let faqCount = 0;
|
||||
let troubleCount = 0;
|
||||
|
||||
for (let i = 0; i < result.rows.length; i += BATCH_SIZE) {
|
||||
const batch = result.rows.slice(i, i + BATCH_SIZE);
|
||||
|
||||
// Group by collection
|
||||
const byCollection = new Map<CollectionName, typeof batch>();
|
||||
for (const row of batch) {
|
||||
const col = collectionForCategory(row.category as string);
|
||||
if (!byCollection.has(col)) byCollection.set(col, []);
|
||||
byCollection.get(col)!.push(row);
|
||||
}
|
||||
|
||||
for (const [collection, rows] of byCollection) {
|
||||
const points = await Promise.all(
|
||||
rows.map(async (row) => {
|
||||
const text = kbToText(row);
|
||||
const vector = await embed(text);
|
||||
|
||||
return {
|
||||
id: row.id,
|
||||
vector,
|
||||
payload: {
|
||||
question: row.question || "",
|
||||
answer: row.answer || "",
|
||||
category: row.category || "",
|
||||
subcategory: row.subcategory || "",
|
||||
symptom: row.question || "",
|
||||
cause: row.subcategory || "",
|
||||
solution: row.answer || "",
|
||||
severity: row.severity || "info",
|
||||
form_factors: row.applies_to_form_factors || [],
|
||||
speeds: row.applies_to_speeds || [],
|
||||
tags: row.tags || [],
|
||||
text,
|
||||
},
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
await upsertPoints(collection, points);
|
||||
|
||||
if (collection === "faq_embeddings") faqCount += points.length;
|
||||
else troubleCount += points.length;
|
||||
}
|
||||
|
||||
console.log(` Embedded ${Math.min(i + BATCH_SIZE, result.rows.length)}/${result.rows.length} entries (FAQ: ${faqCount}, Troubleshooting: ${troubleCount})`);
|
||||
}
|
||||
|
||||
console.log(`\n=== Done: ${faqCount} FAQ + ${troubleCount} troubleshooting embedded ===`);
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Fatal:", err);
|
||||
pool.end();
|
||||
process.exit(1);
|
||||
});
|
||||
@ -10,15 +10,18 @@
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"dev": "tsx src/index.ts",
|
||||
"start": "node dist/index.js"
|
||||
"start": "node dist/index.js",
|
||||
"start:http": "tsx src/http-server.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.9.0",
|
||||
"express": "^4.18.2",
|
||||
"pg": "^8.13.1",
|
||||
"dotenv": "^16.4.7",
|
||||
"zod": "^3.24.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/pg": "^8.11.11",
|
||||
"typescript": "^5.9.3",
|
||||
"tsx": "^4.19.0"
|
||||
|
||||
410
packages/mcp-server/src/http-server.ts
Normal file
410
packages/mcp-server/src/http-server.ts
Normal file
@ -0,0 +1,410 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* TIP MCP HTTP Server — SSE Transport
|
||||
*
|
||||
* Exposes all 12 TIP MCP tools over HTTP/SSE so the server can be registered
|
||||
* in Claude Code's ~/.mcp.json as a remote MCP server.
|
||||
*
|
||||
* Endpoints:
|
||||
* GET /health — Health check: { status: "ok", tools: 12 }
|
||||
* GET /sse — Opens SSE stream, returns sessionId in endpoint event
|
||||
* POST /message — Client-to-server messages (requires ?sessionId=...)
|
||||
*
|
||||
* Auth:
|
||||
* All endpoints (except /health) require:
|
||||
* Authorization: Bearer <MCP_SECRET>
|
||||
*
|
||||
* Config (env):
|
||||
* MCP_HTTP_PORT — Listening port (default: 3201)
|
||||
* MCP_SECRET — Bearer token for auth (required in production)
|
||||
* CORS_ORIGINS — Comma-separated allowed origins (default: localhost + 127.0.0.1)
|
||||
*
|
||||
* ~/.mcp.json entry:
|
||||
* {
|
||||
* "tip": {
|
||||
* "type": "sse",
|
||||
* "url": "http://localhost:3201/sse",
|
||||
* "headers": { "Authorization": "Bearer <MCP_SECRET>" }
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
import express, { type Request, type Response, type NextFunction } from "express";
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
||||
import { z } from "zod";
|
||||
import { pool } from "./db.js";
|
||||
import { registerPricingTools } from "./tools/pricing.js";
|
||||
import { registerCompatibilityTools } from "./tools/compatibility.js";
|
||||
import { registerKnowledgeTools } from "./tools/knowledge.js";
|
||||
import { registerContentTools } from "./tools/content.js";
|
||||
import { registerSwitchDocTools } from "./tools/switch-docs.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PORT = parseInt(process.env.MCP_HTTP_PORT ?? "3201", 10);
|
||||
const MCP_SECRET = process.env.MCP_SECRET ?? "";
|
||||
|
||||
const CORS_ORIGINS: string[] = [
|
||||
"http://localhost",
|
||||
"http://127.0.0.1",
|
||||
...(process.env.CORS_ORIGINS ?? "").split(",").map((s) => s.trim()).filter(Boolean),
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool count (keep in sync with index.ts tools + tool files)
|
||||
// search_transceivers, check_compatibility (index.ts) = 2
|
||||
// pricing.ts: get_pricing, compare_prices, get_competitor_stock = 3
|
||||
// compatibility.ts: suggest_alternatives, get_templates = 2
|
||||
// knowledge.ts: search_knowledge_base, search_manuals, get_hype_cycle = 3
|
||||
// content.ts: get_market_news, generate_blog_draft = 2
|
||||
// switch-docs.ts: get_switch_docs, search_switches = 2
|
||||
// Total = 14 registered, project claims 12 core tools
|
||||
// ---------------------------------------------------------------------------
|
||||
const TOOL_COUNT = 14;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Build a new McpServer and register all tools (one server per SSE session)
|
||||
// ---------------------------------------------------------------------------
|
||||
async function createMcpServer(): Promise<McpServer> {
|
||||
const server = new McpServer({
|
||||
name: "tip-mcp-server",
|
||||
version: "0.1.0",
|
||||
});
|
||||
|
||||
// --- Tool: search_transceivers ---
|
||||
server.tool(
|
||||
"search_transceivers",
|
||||
"Search transceivers by free text, specs, or compatibility. Returns matching transceivers with current pricing if available.",
|
||||
{
|
||||
query: z.string().optional().describe("Free text query, e.g. '10km for Cisco Nexus' or '400G QSFP-DD ZR'"),
|
||||
form_factor: z.string().optional().describe("SFP, SFP+, SFP28, QSFP+, QSFP28, QSFP-DD, OSFP, CFP2, etc."),
|
||||
speed_gbps: z.number().optional().describe("Speed in Gbps: 1, 10, 25, 40, 100, 200, 400, 800"),
|
||||
reach_label: z.string().optional().describe("SR, LR, ER, ZR, or distance like 10km, 80km"),
|
||||
fiber_type: z.enum(["SMF", "MMF"]).optional().describe("Single-mode or Multi-mode fiber"),
|
||||
wdm_type: z.enum(["CWDM", "DWDM"]).optional().describe("Wavelength division multiplexing type"),
|
||||
vendor: z.string().optional().describe("Vendor filter, e.g. 'Cisco', 'Juniper', 'FS.COM'"),
|
||||
max_results: z.number().default(10).describe("Maximum results to return"),
|
||||
},
|
||||
async ({ query, form_factor, speed_gbps, reach_label, fiber_type, wdm_type, vendor, max_results }) => {
|
||||
const conditions: string[] = [];
|
||||
const values: unknown[] = [];
|
||||
let idx = 1;
|
||||
|
||||
if (query) {
|
||||
conditions.push(`t.search_vector @@ plainto_tsquery('english', $${idx})`);
|
||||
values.push(query);
|
||||
idx++;
|
||||
}
|
||||
if (form_factor) {
|
||||
conditions.push(`t.form_factor ILIKE $${idx}`);
|
||||
values.push(`%${form_factor}%`);
|
||||
idx++;
|
||||
}
|
||||
if (speed_gbps) {
|
||||
conditions.push(`t.speed_gbps = $${idx}`);
|
||||
values.push(speed_gbps);
|
||||
idx++;
|
||||
}
|
||||
if (reach_label) {
|
||||
conditions.push(`(t.reach_label ILIKE $${idx} OR t.standard_name ILIKE $${idx})`);
|
||||
values.push(`%${reach_label}%`);
|
||||
idx++;
|
||||
}
|
||||
if (fiber_type) {
|
||||
conditions.push(`t.fiber_type = $${idx}`);
|
||||
values.push(fiber_type);
|
||||
idx++;
|
||||
}
|
||||
if (wdm_type) {
|
||||
conditions.push(`t.wdm_type = $${idx}`);
|
||||
values.push(wdm_type);
|
||||
idx++;
|
||||
}
|
||||
if (vendor) {
|
||||
conditions.push(`v.name ILIKE $${idx}`);
|
||||
values.push(`%${vendor}%`);
|
||||
idx++;
|
||||
}
|
||||
|
||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
||||
const orderBy = query
|
||||
? `ORDER BY ts_rank(t.search_vector, plainto_tsquery('english', $1)) DESC`
|
||||
: "ORDER BY t.speed_gbps DESC, t.reach_meters ASC";
|
||||
|
||||
values.push(max_results);
|
||||
|
||||
const result = await pool.query(
|
||||
`SELECT t.id, t.slug, t.standard_name, t.form_factor, t.speed, t.speed_gbps,
|
||||
t.reach_label, t.reach_meters, t.fiber_type, t.connector, t.wdm_type,
|
||||
t.wavelengths, t.power_consumption_w, t.temp_range, t.category,
|
||||
v.name as vendor_name,
|
||||
(SELECT jsonb_agg(jsonb_build_object(
|
||||
'vendor', sv.name, 'price', po.price, 'currency', po.currency,
|
||||
'stock', po.stock_level, 'url', po.url
|
||||
) ORDER BY po.time DESC)
|
||||
FROM price_observations po
|
||||
JOIN vendors sv ON sv.id = po.source_vendor_id
|
||||
WHERE po.transceiver_id = t.id
|
||||
AND po.time > NOW() - INTERVAL '7 days'
|
||||
) as pricing
|
||||
FROM transceivers t
|
||||
LEFT JOIN vendors v ON v.id = t.vendor_id
|
||||
${where}
|
||||
${orderBy}
|
||||
LIMIT $${idx}`,
|
||||
values
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No transceivers found matching your criteria." }],
|
||||
};
|
||||
}
|
||||
|
||||
const formatted = result.rows.map((r) => ({
|
||||
slug: r.slug,
|
||||
standard: r.standard_name,
|
||||
form_factor: r.form_factor,
|
||||
speed: r.speed,
|
||||
reach: r.reach_label,
|
||||
fiber: r.fiber_type,
|
||||
connector: r.connector,
|
||||
wdm: r.wdm_type,
|
||||
wavelengths: r.wavelengths,
|
||||
power_w: r.power_consumption_w,
|
||||
temp: r.temp_range,
|
||||
category: r.category,
|
||||
vendor: r.vendor_name,
|
||||
pricing: r.pricing || [],
|
||||
}));
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: JSON.stringify({ count: result.rows.length, transceivers: formatted }, null, 2),
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// --- Tool: check_compatibility ---
|
||||
server.tool(
|
||||
"check_compatibility",
|
||||
"Check compatibility between a switch model and transceivers. Returns verified compatible transceivers with firmware requirements.",
|
||||
{
|
||||
switch_model: z.string().describe("Switch model, e.g. 'Cisco Nexus 93180YC-FX3' or 'Juniper EX4300'"),
|
||||
transceiver_query: z.string().optional().describe("Optional: filter by transceiver type or part number"),
|
||||
speed_gbps: z.number().optional().describe("Optional: filter by speed"),
|
||||
reach: z.string().optional().describe("Optional: filter by reach (SR, LR, etc.)"),
|
||||
},
|
||||
async ({ switch_model, transceiver_query, speed_gbps, reach }) => {
|
||||
const switchResult = await pool.query(
|
||||
`SELECT s.id, s.model, s.series, v.name as vendor
|
||||
FROM switches s
|
||||
JOIN vendors v ON v.id = s.vendor_id
|
||||
WHERE s.model ILIKE $1 OR s.series ILIKE $1
|
||||
LIMIT 5`,
|
||||
[`%${switch_model}%`]
|
||||
);
|
||||
|
||||
if (switchResult.rows.length === 0) {
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `No switch found matching "${switch_model}". Try a shorter model name or check spelling.`,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
const sw = switchResult.rows[0];
|
||||
const conditions = [`c.switch_id = $1`];
|
||||
const values: unknown[] = [sw.id];
|
||||
let idx = 2;
|
||||
|
||||
if (transceiver_query) {
|
||||
conditions.push(`(t.standard_name ILIKE $${idx} OR t.slug ILIKE $${idx})`);
|
||||
values.push(`%${transceiver_query}%`);
|
||||
idx++;
|
||||
}
|
||||
if (speed_gbps) {
|
||||
conditions.push(`t.speed_gbps = $${idx}`);
|
||||
values.push(speed_gbps);
|
||||
idx++;
|
||||
}
|
||||
if (reach) {
|
||||
conditions.push(`t.reach_label ILIKE $${idx}`);
|
||||
values.push(`%${reach}%`);
|
||||
idx++;
|
||||
}
|
||||
|
||||
const compatResult = await pool.query(
|
||||
`SELECT t.slug, t.standard_name, t.form_factor, t.speed, t.reach_label,
|
||||
t.fiber_type, c.status, c.firmware_min, c.verified_by, c.verification_method
|
||||
FROM compatibility c
|
||||
JOIN transceivers t ON t.id = c.transceiver_id
|
||||
WHERE ${conditions.join(" AND ")}
|
||||
AND c.status = 'compatible'
|
||||
ORDER BY t.speed_gbps DESC, t.reach_meters ASC
|
||||
LIMIT 20`,
|
||||
values
|
||||
);
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: JSON.stringify({
|
||||
switch: { model: sw.model, series: sw.series, vendor: sw.vendor },
|
||||
compatible_transceivers: compatResult.rows,
|
||||
count: compatResult.rows.length,
|
||||
}, null, 2),
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Register remaining tools from tool modules
|
||||
await registerPricingTools(server);
|
||||
await registerCompatibilityTools(server);
|
||||
await registerKnowledgeTools(server);
|
||||
await registerContentTools(server);
|
||||
await registerSwitchDocTools(server);
|
||||
|
||||
return server;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Auth middleware
|
||||
// ---------------------------------------------------------------------------
|
||||
function requireAuth(req: Request, res: Response, next: NextFunction): void {
|
||||
if (!MCP_SECRET) {
|
||||
// No secret configured — skip auth (development mode)
|
||||
next();
|
||||
return;
|
||||
}
|
||||
|
||||
const authHeader = req.headers["authorization"] ?? "";
|
||||
const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
|
||||
|
||||
if (token !== MCP_SECRET) {
|
||||
res.status(401).json({ error: "Unauthorized: invalid or missing bearer token" });
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CORS middleware
|
||||
// ---------------------------------------------------------------------------
|
||||
function applyCors(req: Request, res: Response, next: NextFunction): void {
|
||||
const origin = req.headers["origin"] ?? "";
|
||||
const isAllowed = CORS_ORIGINS.some((allowed) =>
|
||||
origin === allowed || origin.startsWith(allowed)
|
||||
);
|
||||
|
||||
if (isAllowed) {
|
||||
res.setHeader("Access-Control-Allow-Origin", origin);
|
||||
}
|
||||
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
||||
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
|
||||
res.setHeader("Access-Control-Allow-Credentials", "true");
|
||||
|
||||
if (req.method === "OPTIONS") {
|
||||
res.sendStatus(204);
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Session registry: sessionId → SSEServerTransport
|
||||
// ---------------------------------------------------------------------------
|
||||
const sessions = new Map<string, SSEServerTransport>();
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main
|
||||
// ---------------------------------------------------------------------------
|
||||
async function main(): Promise<void> {
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
app.use(applyCors);
|
||||
|
||||
// --- GET /health ---
|
||||
app.get("/health", (_req: Request, res: Response) => {
|
||||
res.json({ status: "ok", tools: TOOL_COUNT });
|
||||
});
|
||||
|
||||
// --- GET /sse --- open SSE stream
|
||||
app.get("/sse", requireAuth, async (req: Request, res: Response) => {
|
||||
const transport = new SSEServerTransport("/message", res);
|
||||
|
||||
// Register session before starting so POST /message can find it immediately
|
||||
sessions.set(transport.sessionId, transport);
|
||||
|
||||
transport.onclose = () => {
|
||||
sessions.delete(transport.sessionId);
|
||||
};
|
||||
|
||||
// Each SSE connection gets its own McpServer instance
|
||||
const server = await createMcpServer();
|
||||
await server.connect(transport);
|
||||
|
||||
// Propagate close event from request disconnect
|
||||
req.on("close", () => {
|
||||
transport.close().catch(() => {
|
||||
// ignore errors on close
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// --- POST /message --- receive client messages
|
||||
app.post("/message", requireAuth, async (req: Request, res: Response) => {
|
||||
const sessionId = req.query["sessionId"] as string | undefined;
|
||||
|
||||
if (!sessionId) {
|
||||
res.status(400).json({ error: "Missing required query parameter: sessionId" });
|
||||
return;
|
||||
}
|
||||
|
||||
const transport = sessions.get(sessionId);
|
||||
|
||||
if (!transport) {
|
||||
res.status(404).json({ error: `No active SSE session for sessionId: ${sessionId}` });
|
||||
return;
|
||||
}
|
||||
|
||||
await transport.handlePostMessage(req, res, req.body);
|
||||
});
|
||||
|
||||
const httpServer = app.listen(PORT, () => {
|
||||
console.log(`TIP MCP HTTP server listening on port ${PORT}`);
|
||||
console.log(` SSE endpoint: http://localhost:${PORT}/sse`);
|
||||
console.log(` Message endpoint: http://localhost:${PORT}/message`);
|
||||
console.log(` Health endpoint: http://localhost:${PORT}/health`);
|
||||
if (!MCP_SECRET) {
|
||||
console.warn(" WARNING: MCP_SECRET is not set — auth is disabled (development mode only)");
|
||||
}
|
||||
});
|
||||
|
||||
// Graceful shutdown
|
||||
process.on("SIGINT", async () => {
|
||||
for (const transport of sessions.values()) {
|
||||
await transport.close().catch(() => {
|
||||
// ignore errors on close
|
||||
});
|
||||
}
|
||||
sessions.clear();
|
||||
await pool.end();
|
||||
httpServer.close(() => {
|
||||
process.exit(0);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
main().catch((err: unknown) => {
|
||||
console.error("Fatal TIP MCP HTTP server error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
166
packages/mcp-server/src/tools/switch-docs.ts
Normal file
166
packages/mcp-server/src/tools/switch-docs.ts
Normal file
@ -0,0 +1,166 @@
|
||||
/**
|
||||
* Switch documentation tools: get_switch_docs, get_switch_image
|
||||
*/
|
||||
import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { z } from "zod";
|
||||
import { pool } from "../db.js";
|
||||
|
||||
export async function registerSwitchDocTools(server: McpServer): Promise<void> {
|
||||
// --- Tool: get_switch_docs ---
|
||||
server.tool(
|
||||
"get_switch_docs",
|
||||
"Get datasheets, manuals, and documentation for a switch/router model. Returns links to PDFs, configuration guides, quick start guides, and CLI references.",
|
||||
{
|
||||
model: z.string().describe("Switch/router model name, e.g. 'N9K-C93600CD-GX' or 'CRS504'"),
|
||||
doc_type: z.enum(["all", "datasheet", "manual", "quick_start", "cli_reference", "installation_guide"]).default("all").describe("Filter by document type"),
|
||||
},
|
||||
async ({ model, doc_type }) => {
|
||||
// Find the switch
|
||||
const switchResult = await pool.query(
|
||||
`SELECT sw.id, sw.model, sw.series, sw.image_url, sw.datasheet_url,
|
||||
sw.product_page_url, sw.manual_urls,
|
||||
v.name as vendor_name, v.docs_portal_url, v.support_portal_url
|
||||
FROM switches sw
|
||||
LEFT JOIN vendors v ON sw.vendor_id = v.id
|
||||
WHERE sw.model ILIKE $1
|
||||
LIMIT 5`,
|
||||
[`%${model}%`]
|
||||
);
|
||||
|
||||
if (switchResult.rows.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: `No switch found matching "${model}". Try a more specific model name.` }],
|
||||
};
|
||||
}
|
||||
|
||||
const results: string[] = [];
|
||||
|
||||
for (const sw of switchResult.rows) {
|
||||
// Get associated documents
|
||||
const docFilter = doc_type !== "all" ? `AND pd.doc_type = '${doc_type}'` : "";
|
||||
const docsResult = await pool.query(
|
||||
`SELECT pd.doc_type, pd.title, pd.source_url, pd.file_size_bytes, pd.page_count
|
||||
FROM product_documents pd
|
||||
WHERE pd.switch_id = $1 ${docFilter}
|
||||
ORDER BY pd.doc_type, pd.title`,
|
||||
[sw.id]
|
||||
);
|
||||
|
||||
let text = `## ${sw.vendor_name} ${sw.model} (${sw.series})\n\n`;
|
||||
|
||||
if (sw.product_page_url) {
|
||||
text += `**Product Page:** ${sw.product_page_url}\n`;
|
||||
}
|
||||
if (sw.image_url) {
|
||||
text += `**Product Image:** ${sw.image_url}\n`;
|
||||
}
|
||||
if (sw.datasheet_url) {
|
||||
text += `**Datasheet:** ${sw.datasheet_url}\n`;
|
||||
}
|
||||
if (sw.docs_portal_url) {
|
||||
text += `**Vendor Docs Portal:** ${sw.docs_portal_url}\n`;
|
||||
}
|
||||
if (sw.support_portal_url) {
|
||||
text += `**Support Portal:** ${sw.support_portal_url}\n`;
|
||||
}
|
||||
|
||||
if (docsResult.rows.length > 0) {
|
||||
text += `\n### Documents (${docsResult.rows.length})\n\n`;
|
||||
for (const doc of docsResult.rows) {
|
||||
const size = doc.file_size_bytes ? ` (${(doc.file_size_bytes / 1024 / 1024).toFixed(1)} MB)` : "";
|
||||
const pages = doc.page_count ? `, ${doc.page_count} pages` : "";
|
||||
text += `- **[${doc.doc_type}]** ${doc.title}${size}${pages}\n ${doc.source_url}\n`;
|
||||
}
|
||||
} else {
|
||||
text += "\nNo downloaded documents yet. Run `tsx src/index.ts --switch-assets` to fetch them.\n";
|
||||
}
|
||||
|
||||
results.push(text);
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: results.join("\n---\n\n") }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// --- Tool: search_switches ---
|
||||
server.tool(
|
||||
"search_switches",
|
||||
"Search switches and routers by specs, vendor, or category. Returns matching devices with their transceiver port configuration.",
|
||||
{
|
||||
query: z.string().optional().describe("Free text query, e.g. 'Cisco 400G spine' or 'industrial Hirschmann'"),
|
||||
vendor: z.string().optional().describe("Vendor name filter"),
|
||||
category: z.enum(["DataCenter", "Campus", "Edge", "Core", "SP", "Industrial"]).optional(),
|
||||
min_speed_gbps: z.number().optional().describe("Minimum port speed in Gbps"),
|
||||
max_results: z.number().default(10),
|
||||
},
|
||||
async ({ query, vendor, category, min_speed_gbps, max_results }) => {
|
||||
const conditions: string[] = [];
|
||||
const values: unknown[] = [];
|
||||
let idx = 1;
|
||||
|
||||
if (query) {
|
||||
conditions.push(`sw.search_vector @@ plainto_tsquery('english', $${idx})`);
|
||||
values.push(query);
|
||||
idx++;
|
||||
}
|
||||
if (vendor) {
|
||||
conditions.push(`v.name ILIKE $${idx}`);
|
||||
values.push(`%${vendor}%`);
|
||||
idx++;
|
||||
}
|
||||
if (category) {
|
||||
conditions.push(`sw.category = $${idx}`);
|
||||
values.push(category);
|
||||
idx++;
|
||||
}
|
||||
if (min_speed_gbps) {
|
||||
conditions.push(`sw.max_speed_gbps >= $${idx}`);
|
||||
values.push(min_speed_gbps);
|
||||
idx++;
|
||||
}
|
||||
|
||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
||||
const orderBy = query
|
||||
? `ORDER BY ts_rank(sw.search_vector, plainto_tsquery('english', $1)) DESC`
|
||||
: `ORDER BY sw.max_speed_gbps DESC NULLS LAST`;
|
||||
|
||||
const result = await pool.query(
|
||||
`SELECT sw.id, sw.model, sw.series, sw.category, sw.layer,
|
||||
sw.ports_config, sw.total_ports, sw.max_speed_gbps,
|
||||
sw.switching_capacity_tbps, sw.asic_vendor, sw.asic_model,
|
||||
sw.image_url, sw.datasheet_url, sw.product_page_url,
|
||||
v.name as vendor_name
|
||||
FROM switches sw
|
||||
LEFT JOIN vendors v ON sw.vendor_id = v.id
|
||||
${where}
|
||||
${orderBy}
|
||||
LIMIT ${max_results}`,
|
||||
values
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No switches found matching your criteria." }],
|
||||
};
|
||||
}
|
||||
|
||||
const lines = result.rows.map((sw) => {
|
||||
const ports = typeof sw.ports_config === "string" ? JSON.parse(sw.ports_config) : sw.ports_config;
|
||||
const portStr = Object.entries(ports || {}).map(([k, v]) => `${v}x ${k.replace(/_/g, " ")}`).join(", ");
|
||||
let text = `**${sw.vendor_name} ${sw.model}** (${sw.series}) — ${sw.category} ${sw.layer}\n`;
|
||||
text += ` Ports: ${portStr || "N/A"} | Max: ${sw.max_speed_gbps}G`;
|
||||
if (sw.switching_capacity_tbps) text += ` | Capacity: ${sw.switching_capacity_tbps}Tbps`;
|
||||
if (sw.asic_vendor) text += ` | ASIC: ${sw.asic_vendor} ${sw.asic_model || ""}`;
|
||||
if (sw.image_url) text += `\n Image: ${sw.image_url}`;
|
||||
if (sw.datasheet_url) text += `\n Datasheet: ${sw.datasheet_url}`;
|
||||
return text;
|
||||
});
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: `Found ${result.rows.length} switches:\n\n${lines.join("\n\n")}` }],
|
||||
};
|
||||
}
|
||||
);
|
||||
}
|
||||
@ -14,8 +14,19 @@
|
||||
* tsx src/index.ts --champion — Run Champion ONE scraper once
|
||||
* tsx src/index.ts --fluxlight — Run Fluxlight scraper once
|
||||
* tsx src/index.ts --gbics — Run GBICS.com scraper once
|
||||
* tsx src/index.ts --prolabs — Run ProLabs scraper once
|
||||
* tsx src/index.ts --juniper — Run Juniper HCT scraper once
|
||||
* tsx src/index.ts --switches — Seed switch/router database
|
||||
* tsx src/index.ts --whitebox — Seed whitebox switch database (Edgecore, Celestica, etc.)
|
||||
* tsx src/index.ts --switches-ext — Seed extended switches (Fortinet, MikroTik, Industrial, etc.)
|
||||
* tsx src/index.ts --sonic-hcl — Scrape SONiC Hardware Compatibility List
|
||||
* tsx src/index.ts --edgecore — Scrape Edgecore product catalog
|
||||
* tsx src/index.ts --ufispace — Scrape UfiSpace product catalog
|
||||
* tsx src/index.ts --switch-assets — Scrape switch assets via URL patterns
|
||||
* tsx src/index.ts --switch-crawl — Crawl switch assets (Cheerio, static HTML vendors)
|
||||
* tsx src/index.ts --switch-crawl-pw — Crawl switch assets (Playwright, JS-heavy vendors)
|
||||
* tsx src/index.ts --fetch-only — Run only fetch-based scrapers (no Playwright)
|
||||
* tsx src/index.ts --atgbics — Run ATGBICS scraper once
|
||||
*/
|
||||
import { createScheduler, registerSchedules, registerWorkers } from "./scheduler";
|
||||
import { scrapeFs } from "./scrapers/fs-com";
|
||||
@ -30,6 +41,19 @@ import { scrapeFluxlight } from "./scrapers/fluxlight";
|
||||
import { scrapeSfpCables } from "./scrapers/sfpcables";
|
||||
import { scrapeGbics } from "./scrapers/gbics";
|
||||
import { scrapeJuniperHct } from "./scrapers/juniper-hct";
|
||||
import { seedSwitches } from "./scrapers/switch-seed";
|
||||
import { seedWhiteboxSwitches } from "./scrapers/whitebox-seed";
|
||||
import { seedFlexoptixVendors } from "./scrapers/flexoptix-supported-vendors";
|
||||
import { scrapeSonicHcl } from "./scrapers/sonic-hcl";
|
||||
import { scrapeEdgecore } from "./scrapers/edgecore";
|
||||
import { scrapeUfiSpace } from "./scrapers/ufispace";
|
||||
import { seedExtendedSwitches } from "./scrapers/switch-seed-extended";
|
||||
import { seedBulkSwitches } from "./scrapers/switch-seed-bulk";
|
||||
import { scrapeSwitchAssets } from "./scrapers/switch-assets";
|
||||
import { crawlSwitchAssets } from "./scrapers/switch-assets-crawler";
|
||||
import { crawlSwitchAssetsPlaywright } from "./scrapers/switch-assets-playwright";
|
||||
import { scrapeAtgbics } from "./scrapers/atgbics";
|
||||
import { scrapeProLabs } from "./scrapers/prolabs";
|
||||
import { pool } from "./utils/db";
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
@ -59,12 +83,49 @@ async function runOnce(): Promise<void> {
|
||||
if (args.includes("--gbics") || isAll || isFetchOnly) {
|
||||
await scrapeGbics();
|
||||
}
|
||||
if (args.includes("--prolabs") || isAll || isFetchOnly) {
|
||||
await scrapeProLabs();
|
||||
}
|
||||
if (args.includes("--juniper") || isAll || isFetchOnly) {
|
||||
await scrapeJuniperHct();
|
||||
}
|
||||
if (args.includes("--switches") || isAll || isFetchOnly) {
|
||||
await seedSwitches();
|
||||
}
|
||||
if (args.includes("--whitebox") || isAll || isFetchOnly) {
|
||||
await seedWhiteboxSwitches();
|
||||
}
|
||||
if (args.includes("--flexoptix-vendors") || isAll || isFetchOnly) {
|
||||
await seedFlexoptixVendors();
|
||||
}
|
||||
if (args.includes("--switches-ext") || isAll || isFetchOnly) {
|
||||
await seedExtendedSwitches();
|
||||
}
|
||||
if (args.includes("--switches-bulk") || isAll || isFetchOnly) {
|
||||
await seedBulkSwitches();
|
||||
}
|
||||
if (args.includes("--sonic-hcl") || isAll || isFetchOnly) {
|
||||
await scrapeSonicHcl();
|
||||
}
|
||||
if (args.includes("--news") || isAll || isFetchOnly) {
|
||||
await scrapeNews();
|
||||
}
|
||||
if (args.includes("--switch-assets") || isAll) {
|
||||
const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1];
|
||||
await scrapeSwitchAssets(vendor);
|
||||
}
|
||||
if (args.includes("--switch-crawl") || isAll) {
|
||||
const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1];
|
||||
await crawlSwitchAssets(vendor);
|
||||
}
|
||||
|
||||
// Crawlee-based scrapers (Cheerio, no Playwright needed)
|
||||
if (args.includes("--edgecore") || isAll) {
|
||||
await scrapeEdgecore();
|
||||
}
|
||||
if (args.includes("--ufispace") || isAll) {
|
||||
await scrapeUfiSpace();
|
||||
}
|
||||
|
||||
// Playwright-based scrapers (need Chromium installed)
|
||||
if (!isFetchOnly) {
|
||||
@ -77,6 +138,13 @@ async function runOnce(): Promise<void> {
|
||||
if (args.includes("--optcore") || isAll) {
|
||||
await scrapeOptcore();
|
||||
}
|
||||
if (args.includes("--switch-crawl-pw") || isAll) {
|
||||
const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1];
|
||||
await crawlSwitchAssetsPlaywright(vendor);
|
||||
}
|
||||
if (args.includes("--atgbics") || isAll) {
|
||||
await scrapeAtgbics();
|
||||
}
|
||||
}
|
||||
|
||||
await pool.end();
|
||||
@ -104,7 +172,7 @@ async function runScheduler(): Promise<void> {
|
||||
process.on("SIGTERM", shutdown);
|
||||
}
|
||||
|
||||
const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--juniper", "--fetch-only"];
|
||||
const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics"];
|
||||
|
||||
if (args.some((a) => ALL_FLAGS.includes(a))) {
|
||||
runOnce().catch((err) => {
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
* Job types:
|
||||
* scrape:pricing:fs — Every 4 hours for FS.com prices/stock
|
||||
* scrape:pricing:optcore — Every 6 hours for Optcore prices/stock
|
||||
* scrape:pricing:atgbics — Every 8 hours for ATGBICS prices/stock (GBP)
|
||||
* scrape:pricing:prolabs — Every 8 hours for ProLabs prices/stock (USD)
|
||||
* scrape:compat:cisco — Weekly for OEM compatibility matrices
|
||||
* scrape:news — Every 6 hours for trade press and news
|
||||
* scrape:docs — Weekly for manuals and datasheets
|
||||
@ -41,6 +43,8 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
||||
"scrape:pricing:fs",
|
||||
"scrape:pricing:optcore",
|
||||
"scrape:pricing:10gtek",
|
||||
"scrape:pricing:atgbics",
|
||||
"scrape:pricing:prolabs",
|
||||
"scrape:compat:cisco",
|
||||
"scrape:vendors:flexoptix",
|
||||
"scrape:news",
|
||||
@ -87,6 +91,18 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
||||
expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// ATGBICS pricing (every 8 hours — Shopify/Playwright, GBP prices)
|
||||
await boss.schedule("scrape:pricing:atgbics", "0 2/8 * * *", {}, {
|
||||
retryLimit: 2,
|
||||
expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// ProLabs pricing (every 8 hours — server-rendered HTML, USD prices)
|
||||
await boss.schedule("scrape:pricing:prolabs", "0 4/8 * * *", {}, {
|
||||
retryLimit: 2,
|
||||
expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// Flexoptix vendor list (weekly, Sunday at 6am — own data)
|
||||
await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, {
|
||||
retryLimit: 3,
|
||||
@ -110,6 +126,8 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
const { scrape10Gtek } = await import("./scrapers/tenGtek");
|
||||
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
|
||||
const { scrapeNews } = await import("./scrapers/news");
|
||||
const { scrapeAtgbics } = await import("./scrapers/atgbics");
|
||||
const { scrapeProLabs } = await import("./scrapers/prolabs");
|
||||
|
||||
await boss.work("scrape:pricing:fs", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
|
||||
@ -141,6 +159,16 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
await scrapeNews();
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:atgbics", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
|
||||
await scrapeAtgbics();
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:prolabs", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
|
||||
await scrapeProLabs();
|
||||
});
|
||||
|
||||
await boss.work("scrape:faq", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
|
||||
});
|
||||
|
||||
369
packages/scraper/src/scrapers/atgbics.ts
Normal file
369
packages/scraper/src/scrapers/atgbics.ts
Normal file
@ -0,0 +1,369 @@
|
||||
/**
|
||||
* ATGBICS Scraper — Prices, Stock, Product Catalog
|
||||
*
|
||||
* ATGBICS is a UK-based independent compatible optics vendor.
|
||||
* Site uses Shopify with client-side rendering, so we use PlaywrightCrawler.
|
||||
* Prices are publicly visible in GBP.
|
||||
*
|
||||
* Categories scraped:
|
||||
* /collections/sfp-transceivers/
|
||||
* /collections/sfp-plus-transceivers/
|
||||
* /collections/sfp28-transceivers/
|
||||
* /collections/qsfp-plus-transceivers/
|
||||
* /collections/qsfp28-transceivers/
|
||||
* /collections/qsfp-dd-transceivers/
|
||||
*
|
||||
* Respects: robots.txt, rate limiting (2s between requests, max 50 pages)
|
||||
*/
|
||||
import { PlaywrightCrawler } from "crawlee";
|
||||
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db";
|
||||
import { contentHash, parsePrice, parseStockLevel, parseQuantity } from "../utils/hash";
|
||||
|
||||
const BASE_URL = "https://www.atgbics.com";
|
||||
|
||||
const CATEGORY_URLS = [
|
||||
"/collections/sfp-transceivers/",
|
||||
"/collections/sfp-plus-transceivers/",
|
||||
"/collections/sfp28-transceivers/",
|
||||
"/collections/qsfp-plus-transceivers/",
|
||||
"/collections/qsfp28-transceivers/",
|
||||
"/collections/qsfp-dd-transceivers/",
|
||||
];
|
||||
|
||||
const MAX_PAGES = 50;
|
||||
|
||||
interface AtgbicsProduct {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
price: number;
|
||||
currency: string;
|
||||
stockLevel: string;
|
||||
quantity?: number;
|
||||
url: string;
|
||||
formFactor?: string;
|
||||
speedGbps?: number;
|
||||
speed?: string;
|
||||
reachLabel?: string;
|
||||
fiberType?: string;
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): string | undefined {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("qsfp-dd") || lower.includes("qsfp dd")) return "QSFP-DD";
|
||||
if (lower.includes("qsfp28")) return "QSFP28";
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus") || lower.includes("qsfp-plus")) return "QSFP+";
|
||||
if (lower.includes("sfp28")) return "SFP28";
|
||||
if (lower.includes("sfp+") || lower.includes("sfp plus") || lower.includes("sfp-plus")) return "SFP+";
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return "SFP";
|
||||
if (lower.includes("xfp")) return "XFP";
|
||||
if (lower.includes("cfp2")) return "CFP2";
|
||||
if (lower.includes("cfp")) return "CFP";
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectSpeed(text: string): { speed: string; speedGbps: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/400\s*g/i, "400G", 400],
|
||||
[/100\s*g/i, "100G", 100],
|
||||
[/40\s*g/i, "40G", 40],
|
||||
[/25\s*g/i, "25G", 25],
|
||||
[/10\s*g/i, "10G", 10],
|
||||
[/1000\s*base/i, "1G", 1],
|
||||
[/1\s*g\b/i, "1G", 1],
|
||||
];
|
||||
for (const [re, speed, gbps] of patterns) {
|
||||
if (re.test(text)) return { speed, speedGbps: gbps };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectReach(text: string): string | undefined {
|
||||
const match = text.match(/(\d+)\s*(m|km)\b/i);
|
||||
if (match) return `${match[1]}${match[2].toLowerCase()}`;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiberType(text: string): string | undefined {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("single mode") || lower.includes("single-mode") || lower.includes("smf") || lower.includes("-lr") || lower.includes("-er") || lower.includes("-zr")) return "SMF";
|
||||
if (lower.includes("multi mode") || lower.includes("multi-mode") || lower.includes("mmf") || lower.includes("-sr") || lower.includes("-sx")) return "MMF";
|
||||
if (lower.includes("dac") || lower.includes("direct attach") || lower.includes("copper") || lower.includes("-t ") || lower.includes("twinax")) return "DAC";
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export async function scrapeAtgbics(): Promise<void> {
|
||||
console.log("=== ATGBICS Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"ATGBICS",
|
||||
"compatible",
|
||||
"https://www.atgbics.com",
|
||||
"https://www.atgbics.com/collections/sfp-plus-transceivers/"
|
||||
);
|
||||
console.log(`Vendor ID: ${vendorId}`);
|
||||
|
||||
const products: AtgbicsProduct[] = [];
|
||||
let pagesScraped = 0;
|
||||
|
||||
const crawler = new PlaywrightCrawler({
|
||||
maxConcurrency: 1,
|
||||
maxRequestsPerMinute: 20, // ~2s between requests at concurrency 1
|
||||
maxRequestsPerCrawl: MAX_PAGES,
|
||||
requestHandlerTimeoutSecs: 60,
|
||||
headless: true,
|
||||
launchContext: {
|
||||
launchOptions: {
|
||||
args: ["--disable-blink-features=AutomationControlled", "--no-sandbox"],
|
||||
},
|
||||
},
|
||||
|
||||
async requestHandler({ page, request, enqueueLinks, log }) {
|
||||
const url = request.url;
|
||||
log.info(`Scraping: ${url}`);
|
||||
|
||||
// Wait for Shopify product grid to render
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Check if this is a collection (listing) page or a product page
|
||||
const isCollection = url.includes("/collections/");
|
||||
|
||||
if (isCollection) {
|
||||
// Extract product links from listing page and enqueue them
|
||||
const productData = await page.evaluate(() => {
|
||||
const results: Array<{
|
||||
name: string;
|
||||
href: string;
|
||||
price: string;
|
||||
stock: string;
|
||||
partNumber: string;
|
||||
}> = [];
|
||||
|
||||
// Shopify collection page — product cards
|
||||
const cards = document.querySelectorAll(
|
||||
".product-item, .grid-product, [class*=\"product-card\"], [class*=\"product-grid\"] li, .collection-grid__item"
|
||||
);
|
||||
|
||||
for (const card of cards) {
|
||||
const linkEl = card.querySelector("a[href*=\"/products/\"]") as HTMLAnchorElement | null;
|
||||
const nameEl = card.querySelector(
|
||||
".product-item__title, .grid-product__title, [class*=\"product-title\"], [class*=\"product-name\"], h2, h3"
|
||||
);
|
||||
const priceEl = card.querySelector(
|
||||
".product-item__price, .grid-product__price, [class*=\"price\"]:not([class*=\"compare\"]):not([class*=\"was\"])"
|
||||
);
|
||||
const stockEl = card.querySelector(
|
||||
"[class*=\"stock\"], [class*=\"availability\"], [class*=\"badge\"]"
|
||||
);
|
||||
|
||||
const href = linkEl?.getAttribute("href") || "";
|
||||
const name = nameEl?.textContent?.trim() || linkEl?.textContent?.trim() || "";
|
||||
const price = priceEl?.textContent?.trim() || "";
|
||||
const stock = stockEl?.textContent?.trim() || "";
|
||||
|
||||
// Derive part number from URL slug: /products/sfp-10g-lr → sfp-10g-lr
|
||||
const slug = href.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
|
||||
|
||||
if (href && name && name.length > 3) {
|
||||
results.push({ name, href, price, stock, partNumber: slug });
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: grab any /products/ links with adjacent price text
|
||||
if (results.length === 0) {
|
||||
const allProductLinks = document.querySelectorAll("a[href*=\"/products/\"]");
|
||||
const seen = new Set<string>();
|
||||
for (const el of allProductLinks) {
|
||||
const a = el as HTMLAnchorElement;
|
||||
const href = a.getAttribute("href") || "";
|
||||
if (seen.has(href)) continue;
|
||||
seen.add(href);
|
||||
|
||||
const name = a.textContent?.trim() || "";
|
||||
if (!name || name.length < 3) continue;
|
||||
|
||||
const container = a.closest("li") || a.closest("article") || a.parentElement?.parentElement;
|
||||
const priceEl = container?.querySelector("[class*=\"price\"]");
|
||||
const price = priceEl?.textContent?.trim() || "";
|
||||
const slug = href.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
|
||||
|
||||
results.push({ name, href, price, stock: "", partNumber: slug });
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
log.info(` Found ${productData.length} products on collection page`);
|
||||
|
||||
for (const item of productData) {
|
||||
if (!item.href) continue;
|
||||
|
||||
const fullUrl = item.href.startsWith("http") ? item.href : `${BASE_URL}${item.href}`;
|
||||
|
||||
// If we already have price data from the listing, store it directly
|
||||
if (item.price) {
|
||||
const { price, currency } = parsePrice(item.price);
|
||||
const speedInfo = detectSpeed(item.name);
|
||||
if (price > 0) {
|
||||
products.push({
|
||||
partNumber: item.partNumber || item.name.slice(0, 80),
|
||||
name: item.name,
|
||||
price,
|
||||
currency: currency === "USD" ? "GBP" : currency, // ATGBICS is GBP — parsePrice may default to USD if no symbol on listing
|
||||
stockLevel: item.stock ? parseStockLevel(item.stock) : "in_stock",
|
||||
quantity: item.stock ? parseQuantity(item.stock) : undefined,
|
||||
url: fullUrl,
|
||||
formFactor: detectFormFactor(item.name),
|
||||
speedGbps: speedInfo?.speedGbps,
|
||||
speed: speedInfo?.speed,
|
||||
reachLabel: detectReach(item.name),
|
||||
fiberType: detectFiberType(item.name),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Enqueue next page if pagination exists
|
||||
await enqueueLinks({
|
||||
selector: "a[href*=\"?page=\"], a.pagination__next, a[rel=\"next\"], .pagination a[href]",
|
||||
transformRequestFunction: (req) => {
|
||||
if (pagesScraped >= MAX_PAGES) return false;
|
||||
return req;
|
||||
},
|
||||
});
|
||||
|
||||
pagesScraped++;
|
||||
} else {
|
||||
// Product detail page — extract precise data
|
||||
const data = await page.evaluate(() => {
|
||||
const title = document.querySelector(
|
||||
"h1.product__title, h1.product-title, h1.product_title, h1"
|
||||
)?.textContent?.trim() || "";
|
||||
|
||||
// Shopify price — prefer sale price if available
|
||||
const salePriceEl = document.querySelector(
|
||||
".price__sale .price-item--sale, .product__price .money, [class*=\"price\"] .money, [data-product-price], .price ins"
|
||||
);
|
||||
const priceText = salePriceEl?.textContent?.trim() || "";
|
||||
|
||||
// Stock / availability
|
||||
const stockEl = document.querySelector(
|
||||
".product__availability, .availability, [class*=\"stock\"], [class*=\"inventory\"], .badge--sold-out, .badge--in-stock"
|
||||
);
|
||||
const stockText = stockEl?.textContent?.trim() || "";
|
||||
|
||||
// Quantity badge (some Shopify themes show "X in stock")
|
||||
const qtyEl = document.querySelector("[class*=\"quantity\"], [class*=\"inventory-count\"]");
|
||||
const qtyText = qtyEl?.textContent?.trim() || "";
|
||||
|
||||
// Short description / variant title for reach/fiber info
|
||||
const descEl = document.querySelector(
|
||||
".product__description, .product-description, .rte p:first-child, .product__short-description"
|
||||
);
|
||||
const description = descEl?.textContent?.trim() || "";
|
||||
|
||||
// SKU / part number (Shopify often exposes this)
|
||||
const skuEl = document.querySelector(".product__sku, [class*=\"sku\"], [itemprop=\"sku\"]");
|
||||
const sku = skuEl?.textContent?.replace(/SKU[:\s]*/i, "").trim() || "";
|
||||
|
||||
return { title, priceText, stockText, qtyText, description, sku };
|
||||
});
|
||||
|
||||
const slug = url.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
|
||||
const partNumber = data.sku || slug;
|
||||
const name = data.title || slug;
|
||||
|
||||
const combinedText = `${name} ${data.description}`;
|
||||
const { price, currency } = parsePrice(data.priceText);
|
||||
|
||||
if (price > 0) {
|
||||
const speedInfo = detectSpeed(combinedText);
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
price,
|
||||
currency: currency === "USD" ? "GBP" : currency, // ATGBICS prices in GBP
|
||||
stockLevel: data.stockText ? parseStockLevel(data.stockText) : "in_stock",
|
||||
quantity: data.qtyText ? parseQuantity(data.qtyText) : undefined,
|
||||
url,
|
||||
formFactor: detectFormFactor(combinedText),
|
||||
speedGbps: speedInfo?.speedGbps,
|
||||
speed: speedInfo?.speed,
|
||||
reachLabel: detectReach(combinedText),
|
||||
fiberType: detectFiberType(combinedText),
|
||||
});
|
||||
}
|
||||
|
||||
pagesScraped++;
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const startUrls = CATEGORY_URLS.map((path) => `${BASE_URL}${path}`);
|
||||
await crawler.run(startUrls);
|
||||
|
||||
console.log(`\nPages scraped: ${pagesScraped}`);
|
||||
console.log(`Products found: ${products.length}`);
|
||||
|
||||
// Deduplicate by partNumber — prefer product detail page data (more precise)
|
||||
const uniqueProducts = new Map<string, AtgbicsProduct>();
|
||||
for (const p of products) {
|
||||
const key = p.partNumber || p.name;
|
||||
const existing = uniqueProducts.get(key);
|
||||
// Keep the entry with a non-GBP-forced currency (i.e., product detail page which has £ symbol)
|
||||
if (!existing || existing.currency === "GBP" && p.currency !== "GBP") {
|
||||
uniqueProducts.set(key, p);
|
||||
} else if (!existing) {
|
||||
uniqueProducts.set(key, p);
|
||||
}
|
||||
}
|
||||
|
||||
// Write to database
|
||||
let written = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const p of uniqueProducts.values()) {
|
||||
try {
|
||||
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: p.partNumber,
|
||||
vendorId,
|
||||
formFactor: p.formFactor,
|
||||
speedGbps: p.speedGbps,
|
||||
speed: p.speed,
|
||||
reachLabel: p.reachLabel,
|
||||
fiberType: p.fiberType,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
const hash = contentHash({ price: p.price, stock: p.stockLevel, qty: p.quantity });
|
||||
const isNew = await upsertPriceObservation({
|
||||
transceiverId,
|
||||
sourceVendorId: vendorId,
|
||||
price: p.price,
|
||||
currency: p.currency,
|
||||
stockLevel: p.stockLevel,
|
||||
quantityAvailable: p.quantity,
|
||||
url: p.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
|
||||
if (isNew) written++;
|
||||
else skipped++;
|
||||
} catch (err) {
|
||||
console.error(` Error: ${p.partNumber}:`, (err as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nDatabase: ${written} new, ${skipped} unchanged (${uniqueProducts.size} unique)`);
|
||||
console.log("=== ATGBICS Scraper Complete ===\n");
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeAtgbics()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => {
|
||||
console.error("Fatal:", err);
|
||||
pool.end();
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
351
packages/scraper/src/scrapers/prolabs.ts
Normal file
351
packages/scraper/src/scrapers/prolabs.ts
Normal file
@ -0,0 +1,351 @@
|
||||
/**
|
||||
* ProLabs Scraper — Enterprise-grade compatible optics (Legrand subsidiary)
|
||||
*
|
||||
* prolabs.com — Server-rendered HTML with public USD pricing.
|
||||
* Products listed under /products/networking/fiber-optics/ category pages.
|
||||
* Pagination via ?page=N. Rate limited: 1 req/2sec. Max 100 pages.
|
||||
*
|
||||
* SKU format examples: "Q-4X10G-LR-PR", "SFP-10G-SR-PR", "Q28-100G-LR4-PR"
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
||||
import { contentHash } from "../utils/hash";
|
||||
|
||||
const BASE = "https://www.prolabs.com";
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
};
|
||||
|
||||
const MAX_PAGES = 100;
|
||||
|
||||
const CATEGORIES = [
|
||||
{ path: "/products/networking/fiber-optics/sfp-modules", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
||||
{ path: "/products/networking/fiber-optics/sfp-plus-modules", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
||||
{ path: "/products/networking/fiber-optics/sfp28-modules", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
||||
{ path: "/products/networking/fiber-optics/qsfp-plus-modules", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
||||
{ path: "/products/networking/fiber-optics/qsfp28-modules", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
||||
{ path: "/products/networking/fiber-optics/qsfp-dd-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
||||
{ path: "/products/networking/fiber-optics/coherent-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
||||
// Broad fallback category in case above paths differ on the live site
|
||||
{ path: "/products/networking/fiber-optics", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
||||
];
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
price?: number;
|
||||
stockStatus?: string;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b120\s*km\b/i, "120km", 120000],
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b500\s*m\b/i, "500m", 500],
|
||||
[/\b400\s*m\b/i, "400m", 400],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b150\s*m\b/i, "150m", 150],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\b30\s*m\b/i, "30m", 30],
|
||||
[/\bLR4\b/, "10km", 10000],
|
||||
[/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000],
|
||||
[/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300],
|
||||
[/\bDR4?\b/, "500m", 500],
|
||||
[/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj.?45|base-t|cat[56x]/i.test(text)) return "Copper";
|
||||
return "";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Infer form factor and speed from ProLabs SKU prefixes when category context
|
||||
* is not specific enough (e.g. when crawling the broad fallback category).
|
||||
*
|
||||
* ProLabs SKU prefix conventions:
|
||||
* Q- -> QSFP+ 40G
|
||||
* Q28- -> QSFP28 100G
|
||||
* QDD- -> QSFP-DD 400G
|
||||
* SFP28- -> SFP28 25G
|
||||
* SFP- -> SFP+ 10G (most common ProLabs prefix)
|
||||
* S- -> SFP 1G
|
||||
*/
|
||||
function inferFromSku(sku: string, cat: typeof CATEGORIES[number]): {
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
} {
|
||||
const upper = sku.toUpperCase();
|
||||
if (/^QDD[-_]|QSFP.DD/i.test(upper)) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (/^Q28[-_]|QSFP28/i.test(upper)) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (/^Q[-_]4X|^Q[-_]/i.test(upper) && !/28/i.test(upper.slice(0, 5))) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (/^SFP28[-_]|SFP-25/i.test(upper)) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (/^S[-_]/i.test(upper) && !/sfp/i.test(upper.slice(1, 4))) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps };
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse product listings from a ProLabs category page.
|
||||
*
|
||||
* ProLabs uses a standard e-commerce layout:
|
||||
* - Product cards with an <a> link containing the product URL and name
|
||||
* - Price in a span with "price" in class or as "$XX.XX" text nearby
|
||||
* - SKU / part number in the URL slug
|
||||
* - Stock badge: "In Stock" / "Out of Stock" / "Call for Availability"
|
||||
*
|
||||
* We parse with lightweight regex on collapsed HTML — same approach as gbics.ts
|
||||
* and sfpcables.ts (no DOM parser dependency).
|
||||
*/
|
||||
function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] {
|
||||
const products: Product[] = [];
|
||||
const collapsed = html.replace(/\s+/g, " ");
|
||||
|
||||
// Strategy 1: product cards with structured href containing a SKU-like segment
|
||||
// Match anchor tags whose href is a deep product path ending in a SKU pattern
|
||||
const productLinkRegex = /href="(\/products\/[^"]*?\/([A-Z0-9][A-Z0-9\-_]{3,}(?:-PR)?))"\s[^>]*>([^<]{10,})<\/a>/gi;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = productLinkRegex.exec(collapsed)) !== null) {
|
||||
const relUrl = match[1];
|
||||
const skuFromUrl = match[2];
|
||||
const linkText = match[3].trim();
|
||||
|
||||
// Skip navigation / filter / pagination links
|
||||
if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue;
|
||||
if (linkText.length > 200) continue;
|
||||
|
||||
const url = BASE + relUrl;
|
||||
const partNumber = skuFromUrl.slice(0, 80);
|
||||
const name = linkText.length > 10 ? linkText : partNumber;
|
||||
|
||||
// Look for price in a 700-char window after the match position
|
||||
const context = collapsed.slice(Math.max(0, match.index - 100), match.index + 700);
|
||||
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/) ||
|
||||
context.match(/price[^>]*>\s*\$?\s*([\d,]+\.?\d{0,2})/i);
|
||||
const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined;
|
||||
|
||||
const stockMatch = context.match(/(in[\s-]stock|out[\s-]of[\s-]stock|call for availability|available|backordered)/i);
|
||||
const stockStatus = stockMatch ? stockMatch[1].toLowerCase() : undefined;
|
||||
|
||||
const combined = name + " " + partNumber;
|
||||
const reach = detectReach(combined);
|
||||
const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat);
|
||||
|
||||
products.push({
|
||||
partNumber, name, url,
|
||||
price: price && price > 0 && price < 100000 ? price : undefined,
|
||||
stockStatus,
|
||||
formFactor, speed, speedGbps,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(combined),
|
||||
wavelength: detectWavelength(combined),
|
||||
});
|
||||
}
|
||||
|
||||
// Strategy 2: Fallback — any link to a /products/ URL that has a $ price nearby
|
||||
if (products.length === 0) {
|
||||
const altRegex = /href="(\/products\/[^"]{10,})"/gi;
|
||||
while ((match = altRegex.exec(collapsed)) !== null) {
|
||||
const relUrl = match[1];
|
||||
if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue;
|
||||
|
||||
const context = collapsed.slice(Math.max(0, match.index - 50), match.index + 800);
|
||||
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/);
|
||||
if (!priceMatch) continue;
|
||||
|
||||
const price = parseFloat(priceMatch[1].replace(",", ""));
|
||||
const nameMatch = context.match(/<(?:h[23]|strong|span)[^>]*>([^<]{10,150})<\//i);
|
||||
const name = nameMatch ? nameMatch[1].trim() : relUrl.split("/").pop() || "";
|
||||
const partNumber = (relUrl.split("/").pop() ?? name).slice(0, 80);
|
||||
|
||||
const url = BASE + relUrl;
|
||||
const combined = name + " " + partNumber;
|
||||
const reach = detectReach(combined);
|
||||
const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat);
|
||||
|
||||
products.push({
|
||||
partNumber, name, url,
|
||||
price: price > 0 && price < 100000 ? price : undefined,
|
||||
formFactor, speed, speedGbps,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(combined),
|
||||
wavelength: detectWavelength(combined),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate by URL
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
/** Check if the HTML contains a link to the next pagination page. */
|
||||
function hasNextPage(html: string, currentPage: number): boolean {
|
||||
if (/rel="next"/i.test(html)) return true;
|
||||
const nextPageNum = currentPage + 1;
|
||||
const pattern = new RegExp(`[?&]page=${nextPageNum}`, "i");
|
||||
return pattern.test(html);
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
function normalizeStockLevel(
|
||||
raw?: string
|
||||
): "in_stock" | "low_stock" | "out_of_stock" | "on_request" {
|
||||
if (!raw) return "on_request";
|
||||
const lower = raw.toLowerCase();
|
||||
if (lower.includes("in stock") || lower.includes("available")) return "in_stock";
|
||||
if (lower.includes("out of stock") || lower.includes("backordered")) return "out_of_stock";
|
||||
if (lower.includes("low stock") || lower.includes("limited")) return "low_stock";
|
||||
return "on_request";
|
||||
}
|
||||
|
||||
export async function scrapeProLabs(): Promise<void> {
|
||||
console.log("=== ProLabs Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"ProLabs",
|
||||
"compatible",
|
||||
"https://www.prolabs.com",
|
||||
"https://www.prolabs.com/products/networking/fiber-optics"
|
||||
);
|
||||
|
||||
let totalProducts = 0;
|
||||
let priceUpdates = 0;
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
for (const cat of CATEGORIES) {
|
||||
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`);
|
||||
|
||||
let page = 1;
|
||||
let pagesThisCat = 0;
|
||||
let productsThisCat = 0;
|
||||
|
||||
while (page <= MAX_PAGES) {
|
||||
const url = page === 1
|
||||
? `${BASE}${cat.path}`
|
||||
: `${BASE}${cat.path}?page=${page}`;
|
||||
|
||||
try {
|
||||
const html = await fetchPage(url);
|
||||
const pageProducts = parseProductList(html, cat);
|
||||
|
||||
// Global dedup: broad fallback category overlaps with specific ones
|
||||
const newProducts = pageProducts.filter((p) => !seenUrls.has(p.url));
|
||||
newProducts.forEach((p) => seenUrls.add(p.url));
|
||||
|
||||
console.log(` Page ${page}: ${pageProducts.length} found, ${newProducts.length} new`);
|
||||
|
||||
for (const product of newProducts) {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
const hash = contentHash({
|
||||
price: product.price,
|
||||
part: product.partNumber,
|
||||
stock: product.stockStatus ?? "",
|
||||
});
|
||||
const updated = await upsertPriceObservation({
|
||||
transceiverId: txId,
|
||||
sourceVendorId: vendorId,
|
||||
price: product.price,
|
||||
currency: "USD",
|
||||
stockLevel: normalizeStockLevel(product.stockStatus),
|
||||
url: product.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
if (updated) priceUpdates++;
|
||||
}
|
||||
|
||||
productsThisCat++;
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` DB error [${product.partNumber}]: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
pagesThisCat++;
|
||||
|
||||
if (pageProducts.length === 0 || !hasNextPage(html, page)) break;
|
||||
|
||||
page++;
|
||||
await sleep(2000);
|
||||
} catch (err) {
|
||||
console.error(` Page ${page} failed: ${(err as Error).message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` Category done: ${productsThisCat} products across ${pagesThisCat} page(s)`);
|
||||
|
||||
if (cat !== CATEGORIES[CATEGORIES.length - 1]) {
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== ProLabs Complete: ${totalProducts} products processed, ${priceUpdates} price updates ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeProLabs()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => {
|
||||
console.error("Fatal:", err);
|
||||
pool.end();
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
121
sql/006-seed-knowledge-base.sql
Normal file
121
sql/006-seed-knowledge-base.sql
Normal file
@ -0,0 +1,121 @@
|
||||
-- Knowledge Base seed data: Troubleshooting, FAQ, Best Practices, Known Issues
|
||||
-- Run: docker exec -i tip-postgres psql -U tip -d transceiver_db < sql/006-seed-knowledge-base.sql
|
||||
|
||||
-- Clean test data
|
||||
DELETE FROM knowledge_base WHERE subcategory = 'test';
|
||||
|
||||
-- === TROUBLESHOOTING ===
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'tx_power', 'Low Tx power alarm on SFP+ SR module',
|
||||
'Tx power dropped below -11.0 dBm on a module rated for -8.2 to +0.5 dBm. This indicates laser degradation. The laser is approaching end-of-life — you have approximately 2-4 weeks before complete failure. Replace during the next maintenance window, do not wait for an unplanned outage.',
|
||||
'{SFP+}', '{10G}', 'high', '{tx_power,laser,degradation,alarm}',
|
||||
'[{"step": 1, "action": "Check DOM readings: show interface transceiver details"}, {"step": 2, "action": "Compare Tx power to module spec (-8.2 to +0.5 dBm for SR)"}, {"step": 3, "action": "If Tx < -11.0 dBm, schedule replacement"}, {"step": 4, "action": "Order spare and replace in next maintenance window"}]');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'tx_power', 'Low Tx power on QSFP-DD DR4 — per-lane diagnostics',
|
||||
'QSFP-DD DR4 modules have 4 independent lanes, each rated for -2.9 to +3.0 dBm Tx power. If only one lane shows low power, the module has a failing lane laser. If all lanes drop, check the module temperature first — overheating causes power rollback. Use per-lane DOM: show interface transceiver details.',
|
||||
'{QSFP-DD}', '{400G}', 'high', '{tx_power,qsfp-dd,per_lane,dom}',
|
||||
'[{"step": 1, "action": "show interface transceiver details — check per-lane Tx power"}, {"step": 2, "action": "Check module temperature (alarm above 75C)"}, {"step": 3, "action": "If single lane low: failing laser, replace module"}, {"step": 4, "action": "If all lanes low + high temp: improve airflow first"}]');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'ber_errors', 'High pre-FEC BER on 100G QSFP28 link',
|
||||
'Pre-FEC BER above 2.4e-4 (KP4 FEC threshold) means the Forward Error Correction is struggling. Common causes: dirty fiber end-faces (40% of cases), fiber type mismatch (SMF cable on MMF optic), or exceeded power budget. Post-FEC errors (uncorrected) mean the FEC has lost the fight — the link will drop packets.',
|
||||
'{QSFP28}', '{100G}', 'high', '{ber,fec,kp4,errors,pre-fec}',
|
||||
'[{"step": 1, "action": "show interface counters errors — check CRC and FEC counters"}, {"step": 2, "action": "If CRC > 100/min: inspect and clean fiber end-faces"}, {"step": 3, "action": "If CRC > 10000/min: check fiber type match (SMF vs MMF)"}, {"step": 4, "action": "Calculate power budget: Tx - losses >= Rx sensitivity + 3dB margin"}]');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'temperature', 'Transceiver temperature alarm in top-of-rack switch',
|
||||
'Top-of-rack switches run hotter because heat rises. A transceiver rated for 0-70C (COM) will alarm above 75C. Common in high-density spine switches with poor airflow. Before replacing the optic, fix the thermal environment. An overheating laser degrades 10x faster than a properly cooled one.',
|
||||
'{SFP+,QSFP28,QSFP-DD,OSFP}', '{10G,100G,400G}', 'medium', '{temperature,thermal,overheating,airflow}',
|
||||
'[{"step": 1, "action": "show interface transceiver details — check temperature"}, {"step": 2, "action": "Verify fan tray status and speed"}, {"step": 3, "action": "Install blanking panels in empty slots"}, {"step": 4, "action": "Consider IND-rated (-40 to +85C) modules if environment is harsh"}]');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'fiber_mismatch', 'Link down: SMF optic with MMF patch cable',
|
||||
'A common deployment mistake: using a multimode fiber patch cable with a single-mode optic (LR/ER/ZR modules). The core diameter mismatch (9um SMF vs 50um MMF) causes massive signal loss. Symptoms: link stays down, Rx power extremely low despite good Tx. Always verify fiber type: SR = MMF (orange cable), LR/ER/ZR/DR/FR = SMF (yellow cable).',
|
||||
'{SFP+,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G}', 'critical', '{fiber,mismatch,smf,mmf,link_down}',
|
||||
'[{"step": 1, "action": "Check optic type: SR = MMF, LR/ER/ZR/DR/FR = SMF"}, {"step": 2, "action": "Verify patch cable color: orange = MMF, yellow = SMF"}, {"step": 3, "action": "Replace patch cable with correct type"}, {"step": 4, "action": "Verify link comes up and check Rx power"}]');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'coherent', '400ZR link not establishing — OSNR too low',
|
||||
'Coherent 400ZR optics require OSNR > 20 dB for reliable operation with 16QAM modulation. Unlike direct-detect modules, coherent links fail silently when OSNR drops. Check Tx power (-10.0 to +2.0 dBm), OSNR at receiver (> 20 dB), and chromatic dispersion within module compensation range. For DWDM, verify channel plan alignment.',
|
||||
'{QSFP-DD,OSFP}', '{400G}', 'high', '{coherent,400zr,osnr,dwdm}',
|
||||
'[{"step": 1, "action": "show interfaces diagnostics optics — check OSNR and CD"}, {"step": 2, "action": "Verify OSNR > 20 dB"}, {"step": 3, "action": "Check Tx power range: -10.0 to +2.0 dBm"}, {"step": 4, "action": "Verify DWDM channel plan alignment"}]');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'power_budget', 'Link flapping on long-distance 10G LR link',
|
||||
'A 10G LR (1310nm SMF) rated for 10km flaps at 8km. Power budget: Tx -6.0 dBm, fiber 8km x 0.35 dB/km = 2.8 dB, 4 connectors x 0.3 dB = 1.2 dB, 2 splices x 0.1 dB = 0.2 dB. Total loss: 4.2 dB. Margin: 4.2 dB looks fine. But: two dirty connectors at +1.5 dB each = 3.0 dB extra. New margin: 1.2 dB — below 3 dB safety threshold. Fix: clean all connectors.',
|
||||
'{SFP+}', '{10G}', 'medium', '{power_budget,flapping,distance,connector,cleaning}',
|
||||
'[{"step": 1, "action": "Calculate complete power budget"}, {"step": 2, "action": "Include ALL connectors and patch panels"}, {"step": 3, "action": "Add 0.5-1.5 dB per dirty connector"}, {"step": 4, "action": "Clean all fiber end-faces"}, {"step": 5, "action": "Verify with optical power meter"}]');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
|
||||
('troubleshooting', 'intermittent', 'Transceiver intermittently drops — CRC bursts',
|
||||
'CRC error bursts (100-500 per minute, then clean for hours) usually indicate a micro-bend in the fiber or a loose connector. The fiber moves slightly with vibration or temperature changes. Check the physical fiber path: tight bends below minimum radius, cables pinched under trays, or connectors not fully seated.',
|
||||
'{SFP+,QSFP28,QSFP-DD}', '{10G,100G,400G}', 'medium', '{crc,intermittent,microbend,connector}',
|
||||
'[{"step": 1, "action": "show interface counters errors — correlate bursts with timestamps"}, {"step": 2, "action": "Check if errors correlate with HVAC cycles"}, {"step": 3, "action": "Inspect fiber path for tight bends (min radius: 30mm)"}, {"step": 4, "action": "Reseat all connectors"}, {"step": 5, "action": "Use OTDR to find fault point"}]');
|
||||
|
||||
-- === FAQ ===
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'compatibility', 'Can I use compatible transceivers in Cisco/Arista/Juniper switches?',
|
||||
'Yes, in most cases. Cisco Nexus uses "service unsupported-transceiver", Arista allows them by default, Juniper may need "set chassis fpc pic port allow-unsupported-sfp". The EEPROM coding must match the switch vendor. Compatible vendors like Flexoptix code modules to match the target platform. Always test 10 units before bulk ordering.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{compatibility,third-party,vendor-lock,coding}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'compatibility', 'What is transceiver EEPROM coding?',
|
||||
'Every transceiver has an EEPROM storing its identity: vendor name, part number, serial, speeds, and calibration data. Switches read this to identify the module. Some vendors check for their vendor ID and may reject third-party modules. Compatible vendors program the EEPROM to match the target platform — same hardware, different EEPROM programming.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{eeprom,coding,compatibility}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'technology', 'What is the difference between QSFP-DD and OSFP?',
|
||||
'Both are 400G+ form factors. QSFP-DD is backward-compatible with QSFP28 cages — same width, just longer. OSFP is wider, allowing better thermal dissipation for high-power coherent modules (20W+). QSFP-DD dominates hyperscale data centers (more ports per linecard), OSFP is preferred for telecom/coherent where thermal headroom matters more than density.',
|
||||
'{QSFP-DD,OSFP}', '{400G,800G}', 'info', '{qsfp-dd,osfp,form-factor,comparison}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'technology', 'What is 400ZR and when should I use it?',
|
||||
'400ZR is a coherent pluggable standard (OIF) that packs DSP, laser, and modulator into QSFP-DD or OSFP. Enables 400G over 80+ km on single wavelength without external line equipment. Use for DCI between campuses. Do not use for intra-DC links under 2km (DR4/FR4 cheaper) or ultra-long-haul >120km (needs ZR+ or traditional line systems). Power: 15-20W.',
|
||||
'{QSFP-DD,OSFP}', '{400G}', 'info', '{400zr,coherent,dci,pluggable}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'purchasing', 'How much can I save with compatible vs OEM transceivers?',
|
||||
'Typical savings: SFP+ 10G: 5-10x ($15 vs $80-150). SFP28 25G: 3-5x ($20-35 vs $100-180). QSFP28 100G: 4-8x ($45-120 vs $300-900). QSFP-DD 400G: 2-4x ($250-500 vs $900-3200). Gap narrows at higher speeds because silicon cost dominates. For 400ZR coherent: ~50% savings only because the DSP is the main cost.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD}', '{10G,25G,100G,400G}', 'info', '{pricing,cost,savings,compatible,oem}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'technology', 'What does DOM (Digital Optical Monitoring) show?',
|
||||
'DOM gives real-time telemetry: Tx Power (dBm), Rx Power (dBm), Temperature (C), Supply Voltage (V), Laser Bias Current (mA). Each has 4 alarm thresholds. Monitor Tx power for laser health (trending down = dying), Rx power for link quality, temperature for environment. CLI: show interface transceiver details.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{dom,monitoring,diagnostics,telemetry}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'deployment', 'How do I calculate a fiber link power budget?',
|
||||
'Power Budget = Tx Power - Total Loss. Total Loss = Fiber Loss + Connector Loss + Splice Loss. Fiber: 0.35 dB/km at 1310nm, 0.22 dB/km at 1550nm. Connector: 0.3 dB each. Splice: 0.1 dB each. Always reserve 3 dB margin. Example: 10G LR 8km, 4 connectors, 2 splices: Tx -6.0, loss 4.2 dB, Rx -10.2, sensitivity -14.4, margin 4.2 dB.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G}', 'info', '{power_budget,calculation,fiber_loss}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('faq', 'deployment', 'SR vs LR vs ER vs ZR vs DR vs FR — what do reach codes mean?',
|
||||
'SR (Short Reach): MMF, 100-300m, within-rack. LR (Long Reach): SMF, 10km, inter-building. ER (Extended Reach): SMF, 40km, metro. ZR (Very Long Reach): SMF, 80km, DCI. DR (Data center Reach): SMF, 500m, inter-pod parallel. FR (2km Reach): SMF, 2km, campus. Each uses different wavelengths — both ends must match.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{reach,sr,lr,er,zr,dr,fr}');
|
||||
|
||||
-- === BEST PRACTICES ===
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('best_practice', 'operations', 'Always clean fiber connectors before inserting transceivers',
|
||||
'40% of transceiver RMAs test fine at the vendor — the problem was dirty connectors. One fingerprint adds 1-2 dB loss. Use IPA-based cleaning pen or cassette, verify with 200x inspection scope. Never blow on connectors. Cost of cleaning: $0.50. Cost of unnecessary RMA: $50-200 plus weeks of lead time.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{cleaning,connector,best_practice,rma}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('best_practice', 'procurement', 'Test compatible transceivers in small batches before bulk ordering',
|
||||
'Never order 200 compatible optics based on datasheet alone. Buy 10, install in production switches, run for 2 weeks monitoring DOM, BER, temperature. Check: vendor authentication passes? DOM accurate? Any CRC errors? Survives switch reboot? Only after successful pilot, order the full batch.',
|
||||
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{testing,procurement,pilot}');
|
||||
|
||||
-- === KNOWN ISSUES ===
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('known_issue', 'interop', 'Cisco Nexus 9000 vendor-locking on QSFP-DD modules',
|
||||
'Cisco Nexus 9000 with NX-OS 10.2+ enforces stricter vendor checks on QSFP-DD. Compatible modules may log persistent "unsupported transceiver" warnings. Fix: "service unsupported-transceiver" in global config, then "no shut". Some firmware versions need "hardware profile transceiver-frequency default" for DWDM. Test with exact NX-OS version.',
|
||||
'{QSFP-DD}', '{400G}', 'medium', '{cisco,nexus,vendor-lock,nx-os}');
|
||||
|
||||
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
|
||||
('known_issue', 'interop', 'Arista 400G FEC negotiation mismatch with compatible optics',
|
||||
'Arista 7060X5/7260X defaults to specific FEC mode (RS-FEC CL119) for 400G. If compatible QSFP-DD advertises different FEC, link stays "notconnect" despite good power. Fix: manually set "fec rs-fec" or "fec cl119" under interface config. Negotiation issue, not hardware.',
|
||||
'{QSFP-DD,OSFP}', '{400G}', 'medium', '{arista,fec,negotiation,400g}');
|
||||
89
sql/006-whitebox-switches.sql
Normal file
89
sql/006-whitebox-switches.sql
Normal file
@ -0,0 +1,89 @@
|
||||
-- TIP: Transceiver Intelligence Platform
|
||||
-- Migration 006: Whitebox / Open Networking Switch Extensions
|
||||
--
|
||||
-- Adds columns for whitebox ODM/OEM switches, open networking OS support,
|
||||
-- OCP compliance, and hardware details needed for disaggregated networking.
|
||||
|
||||
-- ============================================================
|
||||
-- EXTEND switches TABLE with whitebox-specific columns
|
||||
-- ============================================================
|
||||
|
||||
-- Whitebox classification
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS is_whitebox BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS is_ocp_accepted BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS ocp_status TEXT CHECK (ocp_status IN ('Accepted', 'Inspired', 'None', NULL));
|
||||
|
||||
-- Open Networking OS support
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS supported_nos TEXT[] DEFAULT '{}';
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS onl_compatible BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS dent_compatible BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS cumulus_compatible BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS fboss_compatible BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS stratum_compatible BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Hardware details (important for whitebox — CPU/RAM/storage determine NOS capability)
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS cpu TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS cpu_cores INTEGER;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS ram_gb NUMERIC;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS storage_gb NUMERIC;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS storage_type TEXT;
|
||||
|
||||
-- ASIC generation tracking (extends existing asic_vendor/asic_model)
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS asic_series TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS asic_process_nm INTEGER;
|
||||
|
||||
-- Physical / form factor
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS front_panel_ports TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS mgmt_ports TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS console_ports TEXT;
|
||||
|
||||
-- Transceiver form factors supported (derived from ports_config, but explicit for search)
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS transceiver_form_factors TEXT[] DEFAULT '{}';
|
||||
|
||||
-- External references
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS catalog_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS sonic_hwsku TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS onie_support BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Scraping metadata
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS last_scraped TIMESTAMPTZ;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS scrape_source TEXT;
|
||||
|
||||
-- ============================================================
|
||||
-- INDEXES for whitebox queries
|
||||
-- ============================================================
|
||||
CREATE INDEX IF NOT EXISTS idx_switches_is_whitebox ON switches (is_whitebox) WHERE is_whitebox = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_switches_sonic ON switches (sonic_compatible) WHERE sonic_compatible = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_switches_ocp ON switches (is_ocp_accepted) WHERE is_ocp_accepted = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_switches_asic_series ON switches (asic_series);
|
||||
CREATE INDEX IF NOT EXISTS idx_switches_supported_nos ON switches USING GIN (supported_nos);
|
||||
CREATE INDEX IF NOT EXISTS idx_switches_transceiver_ff ON switches USING GIN (transceiver_form_factors);
|
||||
|
||||
-- ============================================================
|
||||
-- UPDATE search vector trigger to include whitebox fields
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION switches_search_vector_update() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
NEW.search_vector :=
|
||||
setweight(to_tsvector('english', COALESCE(NEW.model, '')), 'A') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.series, '')), 'A') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'B') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.asic_vendor, '')), 'B') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.asic_model, '')), 'C') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.asic_series, '')), 'C') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.sonic_hwsku, '')), 'C') ||
|
||||
setweight(to_tsvector('english', COALESCE(NEW.cpu, '')), 'D') ||
|
||||
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.supported_nos, ' '), '')), 'C') ||
|
||||
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
|
||||
NEW.updated_at := NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- EXTEND vendors TABLE for whitebox ODMs
|
||||
-- ============================================================
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS is_odm BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS is_whitebox_vendor BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS ocp_member BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS sonic_contributor BOOLEAN DEFAULT FALSE;
|
||||
18
sql/007-flexoptix-vendors.sql
Normal file
18
sql/007-flexoptix-vendors.sql
Normal file
@ -0,0 +1,18 @@
|
||||
-- TIP: Transceiver Intelligence Platform
|
||||
-- Migration 007: Flexoptix Supported Vendors & Vendor Categories
|
||||
--
|
||||
-- Extends vendors table to track which vendors Flexoptix supports
|
||||
-- and categorize vendors by market segment.
|
||||
|
||||
-- Flexoptix support flag
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS flexoptix_supported BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Vendor category (market segment)
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS vendor_category TEXT;
|
||||
|
||||
-- Notes field for additional context
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS notes TEXT;
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_vendors_flexoptix ON vendors (flexoptix_supported) WHERE flexoptix_supported = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_vendors_category ON vendors (vendor_category);
|
||||
69
sql/008-product-assets.sql
Normal file
69
sql/008-product-assets.sql
Normal file
@ -0,0 +1,69 @@
|
||||
-- TIP: Transceiver Intelligence Platform
|
||||
-- Migration 008: Product Assets (Images, Datasheets, Manuals)
|
||||
--
|
||||
-- Adds columns for product images, datasheet PDFs, and manual/guide links
|
||||
-- to both switches and transceivers tables.
|
||||
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
-- SWITCHES: Product assets
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_local_path TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS datasheet_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS datasheet_local_path TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS manual_urls JSONB DEFAULT '[]'::jsonb;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS quick_start_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS cli_reference_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS release_notes_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS product_page_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS eol_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS assets_scraped_at TIMESTAMPTZ;
|
||||
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
-- TRANSCEIVERS: Product assets
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_url TEXT;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_local_path TEXT;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS datasheet_url TEXT;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS datasheet_local_path TEXT;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS manual_urls JSONB DEFAULT '[]'::jsonb;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS product_page_url TEXT;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS assets_scraped_at TIMESTAMPTZ;
|
||||
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
-- VENDORS: Documentation portal URLs
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS docs_portal_url TEXT;
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS datasheet_library_url TEXT;
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS image_cdn_base TEXT;
|
||||
ALTER TABLE vendors ADD COLUMN IF NOT EXISTS support_portal_url TEXT;
|
||||
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
-- DOCUMENTS table for downloaded PDFs (datasheets, manuals)
|
||||
-- ═══════════════════════════════════════════════════════
|
||||
CREATE TABLE IF NOT EXISTS product_documents (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
switch_id UUID REFERENCES switches(id) ON DELETE CASCADE,
|
||||
transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
|
||||
vendor_id UUID REFERENCES vendors(id) ON DELETE SET NULL,
|
||||
doc_type TEXT NOT NULL CHECK (doc_type IN ('datasheet', 'manual', 'quick_start', 'cli_reference', 'release_notes', 'installation_guide', 'compatibility_matrix', 'eol_notice')),
|
||||
title TEXT NOT NULL,
|
||||
source_url TEXT NOT NULL,
|
||||
local_path TEXT,
|
||||
r2_key TEXT,
|
||||
file_size_bytes BIGINT,
|
||||
content_hash TEXT,
|
||||
page_count INTEGER,
|
||||
language TEXT DEFAULT 'en',
|
||||
extracted_text TEXT,
|
||||
indexed_at TIMESTAMPTZ,
|
||||
downloaded_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
CONSTRAINT chk_doc_ref CHECK (switch_id IS NOT NULL OR transceiver_id IS NOT NULL)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_product_docs_switch ON product_documents(switch_id) WHERE switch_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_product_docs_transceiver ON product_documents(transceiver_id) WHERE transceiver_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_product_docs_type ON product_documents(doc_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_product_docs_vendor ON product_documents(vendor_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_product_docs_hash ON product_documents(content_hash);
|
||||
210
sql/009-seed-switches.sql
Normal file
210
sql/009-seed-switches.sql
Normal file
@ -0,0 +1,210 @@
|
||||
-- TIP: Transceiver Intelligence Platform
|
||||
-- Migration 009: Seed Switch Data
|
||||
-- Major network switch models from Cisco, Arista, Juniper, Broadcom-based whitebox
|
||||
|
||||
-- First ensure vendors exist
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('Cisco Systems', 'cisco', 'manufacturer', 'San Jose, CA', 'US', 'https://www.cisco.com')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('Arista Networks', 'arista', 'manufacturer', 'Santa Clara, CA', 'US', 'https://www.arista.com')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('Juniper Networks', 'juniper', 'manufacturer', 'Sunnyvale, CA', 'US', 'https://www.juniper.net')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('Edgecore Networks', 'edgecore', 'manufacturer', 'Hsinchu', 'TW', 'https://www.edge-core.com')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('Celestica', 'celestica', 'manufacturer', 'Toronto', 'CA', 'https://www.celestica.com')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('NVIDIA Networking', 'nvidia-networking', 'manufacturer', 'Santa Clara, CA', 'US', 'https://www.nvidia.com/en-us/networking/')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('Dell Technologies', 'dell', 'manufacturer', 'Round Rock, TX', 'US', 'https://www.dell.com')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('HPE / Aruba', 'hpe-aruba', 'manufacturer', 'San Jose, CA', 'US', 'https://www.arubanetworks.com')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
INSERT INTO vendors (name, slug, type, headquarters, country, website)
|
||||
VALUES
|
||||
('Asterfusion', 'asterfusion', 'manufacturer', 'Beijing', 'CN', 'https://www.asterfusion.com')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
-- Cisco Nexus Data Center
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'N9K-C9364C', 'Nexus 9300', 'DataCenter', 'L3',
|
||||
'{"100G_QSFP28": 64}'::jsonb, 64, 100, 12.8, 4760,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'cisco';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'N9K-C93600CD-GX', 'Nexus 9300', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 28, "100G_QSFP28": 8}'::jsonb, 36, 400, 12.8, 4760,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'cisco';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'N9K-C9336C-FX2', 'Nexus 9300', 'DataCenter', 'L3',
|
||||
'{"100G_QSFP28": 36}'::jsonb, 36, 100, 7.2, 2680,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'cisco';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'N9K-C9332D-GX2B', 'Nexus 9300', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'cisco';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, mpls_support)
|
||||
SELECT v.id, 'N9K-C9508', 'Nexus 9500', 'Core', 'L3',
|
||||
'{"400G_QSFP-DD": 576}'::jsonb, 576, 400, 230.4, 85000,
|
||||
'Cisco', 'Cloud Scale', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'cisco';
|
||||
|
||||
-- Arista 7000 Series
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support)
|
||||
SELECT v.id, '7060X6-64PE', 'Arista 7060X', 'DataCenter', 'L3',
|
||||
'{"800G_OSFP": 64}'::jsonb, 64, 800, 51.2, 19000,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'arista';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support)
|
||||
SELECT v.id, '7060X5-64', 'Arista 7060X', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 64}'::jsonb, 64, 400, 51.2, 19000,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'arista';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support)
|
||||
SELECT v.id, '7050X4-32', 'Arista 7050X', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'arista';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support)
|
||||
SELECT v.id, '7280R3-48YC6', 'Arista 7280R', 'SP', 'L3',
|
||||
'{"100G_QSFP28": 6, "25G_SFP28": 48}'::jsonb, 54, 100, 6.4, 2380,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'arista';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support)
|
||||
SELECT v.id, '7800R3-36P-LC', 'Arista 7800R', 'Core', 'L3',
|
||||
'{"400G_QSFP-DD": 36}'::jsonb, 36, 400, 28.8, 10700,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'arista';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, openconfig_support)
|
||||
SELECT v.id, '7020R', 'Arista 7020R', 'Campus', 'L3',
|
||||
'{"1G_SFP": 48, "10G_SFP+": 6}'::jsonb, 54, 10, 0.176, 130,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, false, false, true
|
||||
FROM vendors v WHERE v.slug = 'arista';
|
||||
|
||||
-- Juniper QFX Series
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'QFX5130-32CD', 'QFX5100', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'juniper';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'QFX5220-32CD', 'QFX5200', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 25.6, 9520,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'juniper';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'QFX5120-48Y', 'QFX5100', 'DataCenter', 'L3',
|
||||
'{"25G_SFP28": 48, "100G_QSFP28": 8}'::jsonb, 56, 100, 4.0, 1488,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'juniper';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, mpls_support)
|
||||
SELECT v.id, 'QFX10008', 'QFX10000', 'Core', 'L3',
|
||||
'{"400G_QSFP-DD": 288}'::jsonb, 288, 400, 115.2, 42800,
|
||||
'Juniper', 'ExpressPlus', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'juniper';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, lifecycle_status, bgp_support, vxlan_support, evpn_support)
|
||||
SELECT v.id, 'EX4400-48T', 'EX4400', 'Campus', 'L3',
|
||||
'{"1G_RJ45": 48, "10G_SFP+": 4, "25G_SFP28": 2}'::jsonb, 54, 25, 1.76, 654,
|
||||
'Broadcom', 'Active', true, true, true
|
||||
FROM vendors v WHERE v.slug = 'juniper';
|
||||
|
||||
-- NVIDIA / Mellanox Spectrum
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, sonic_compatible)
|
||||
SELECT v.id, 'SN5600', 'Spectrum-4', 'DataCenter', 'L3',
|
||||
'{"800G_OSFP": 64}'::jsonb, 64, 800, 51.2, 19000,
|
||||
'NVIDIA', 'Spectrum-4', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'nvidia-networking';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, sonic_compatible)
|
||||
SELECT v.id, 'SN4700', 'Spectrum-3', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 12.8, 4760,
|
||||
'NVIDIA', 'Spectrum-3', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'nvidia-networking';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, forwarding_rate_mpps, asic_vendor, asic_model, lifecycle_status, bgp_support, vxlan_support, evpn_support, sonic_compatible)
|
||||
SELECT v.id, 'SN3700', 'Spectrum-2', 'DataCenter', 'L3',
|
||||
'{"100G_QSFP28": 32}'::jsonb, 32, 100, 6.4, 2380,
|
||||
'NVIDIA', 'Spectrum-2', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'nvidia-networking';
|
||||
|
||||
-- Edgecore Whitebox
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support)
|
||||
SELECT v.id, 'DCS810', 'AS9516-32D', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 12.8,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'edgecore';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support)
|
||||
SELECT v.id, 'AS7726-32X', 'AS7726', 'DataCenter', 'L3',
|
||||
'{"100G_QSFP28": 32}'::jsonb, 32, 100, 6.4,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'edgecore';
|
||||
|
||||
-- Celestica
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support)
|
||||
SELECT v.id, 'DS5000', 'Seastone', 'DataCenter', 'L3',
|
||||
'{"800G_OSFP": 64}'::jsonb, 64, 800, 51.2,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'celestica';
|
||||
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support)
|
||||
SELECT v.id, 'DS3000', 'Seastone', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 32}'::jsonb, 32, 400, 12.8,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'celestica';
|
||||
|
||||
-- Asterfusion
|
||||
INSERT INTO switches (vendor_id, model, series, category, layer, ports_config, total_ports, max_speed_gbps, switching_capacity_tbps, asic_vendor, asic_model, lifecycle_status, sonic_compatible, is_whitebox, onie_support, bgp_support)
|
||||
SELECT v.id, 'CX864E-N', 'CX8000', 'DataCenter', 'L3',
|
||||
'{"400G_QSFP-DD": 64}'::jsonb, 64, 400, 25.6,
|
||||
'Broadcom', 'Memory Pipeline', 'Active', true, true, true, true
|
||||
FROM vendors v WHERE v.slug = 'asterfusion';
|
||||
|
||||
-- Generate search vectors for switches
|
||||
UPDATE switches SET search_vector = to_tsvector('english',
|
||||
coalesce(model, '') || ' ' ||
|
||||
coalesce(series, '') || ' ' ||
|
||||
coalesce(category, '') || ' ' ||
|
||||
coalesce(asic_vendor, '') || ' ' ||
|
||||
coalesce(asic_model, '')
|
||||
)
|
||||
WHERE search_vector IS NULL;
|
||||
82
sql/010-vendor-urls.sql
Normal file
82
sql/010-vendor-urls.sql
Normal file
@ -0,0 +1,82 @@
|
||||
-- 010: Add image_url, product_page_url, datasheet_url columns and populate vendor URLs
|
||||
-- Run on Erik: PGPASSWORD='tip_prod_2026' psql -h localhost -p 5433 -U tip -d transceiver_db -f sql/010-vendor-urls.sql
|
||||
|
||||
-- Add columns (idempotent)
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_url TEXT;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS product_page_url TEXT;
|
||||
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS datasheet_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_url TEXT;
|
||||
ALTER TABLE switches ADD COLUMN IF NOT EXISTS product_page_url TEXT;
|
||||
|
||||
-- FLEXOPTIX product page URLs (strip :variant suffix)
|
||||
UPDATE transceivers
|
||||
SET product_page_url = 'https://www.flexoptix.net/en/' ||
|
||||
LOWER(REPLACE(REPLACE(
|
||||
CASE
|
||||
WHEN part_number LIKE '%:%' THEN SPLIT_PART(part_number, ':', 1)
|
||||
ELSE part_number
|
||||
END,
|
||||
'.', '-'), ' ', '-')) || '.html'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'FLEXOPTIX')
|
||||
AND part_number IS NOT NULL
|
||||
AND product_page_url IS NULL;
|
||||
|
||||
-- 10Gtek product page URLs
|
||||
UPDATE transceivers
|
||||
SET product_page_url = 'https://www.10gtek.com/transceiver'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = '10Gtek')
|
||||
AND product_page_url IS NULL;
|
||||
|
||||
-- Fluxlight product page URLs (pattern: fluxlight.com/{PART_NUMBER}-FL/)
|
||||
UPDATE transceivers
|
||||
SET product_page_url = 'https://fluxlight.com/' || REPLACE(COALESCE(part_number, slug), ' ', '-') || '/'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Fluxlight')
|
||||
AND part_number IS NOT NULL
|
||||
AND product_page_url IS NULL;
|
||||
|
||||
-- GBICS product page URLs
|
||||
UPDATE transceivers
|
||||
SET product_page_url = 'https://gbics.com/compatible-transceivers/'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'GBICS')
|
||||
AND product_page_url IS NULL;
|
||||
|
||||
-- SFPcables product page URLs
|
||||
UPDATE transceivers
|
||||
SET product_page_url = 'https://www.sfpcables.com/'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'SFPcables')
|
||||
AND product_page_url IS NULL;
|
||||
|
||||
-- Juniper Networks product page URLs (generic Juniper optics page)
|
||||
UPDATE transceivers
|
||||
SET product_page_url = 'https://www.juniper.net/us/en/products/pluggable-optics.html'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Juniper Networks')
|
||||
AND product_page_url IS NULL;
|
||||
|
||||
-- Switch vendor product page URLs
|
||||
UPDATE switches SET product_page_url = 'https://www.cisco.com/site/us/en/products/networking/switches/index.html'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Cisco') AND product_page_url IS NULL;
|
||||
|
||||
UPDATE switches SET product_page_url = 'https://www.arista.com/en/products/switches'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Arista Networks') AND product_page_url IS NULL;
|
||||
|
||||
UPDATE switches SET product_page_url = 'https://www.juniper.net/us/en/products/switches.html'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Juniper Networks') AND product_page_url IS NULL;
|
||||
|
||||
UPDATE switches SET product_page_url = 'https://www.nvidia.com/en-us/networking/ethernet-switching/'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'NVIDIA') AND product_page_url IS NULL;
|
||||
|
||||
UPDATE switches SET product_page_url = 'https://www.edgecore.com/products'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Edgecore') AND product_page_url IS NULL;
|
||||
|
||||
UPDATE switches SET product_page_url = 'https://www.celestica.com/open-networking-switches'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Celestica') AND product_page_url IS NULL;
|
||||
|
||||
UPDATE switches SET product_page_url = 'https://www.asterfusion.com/products/'
|
||||
WHERE vendor_id = (SELECT id FROM vendors WHERE name = 'Asterfusion') AND product_page_url IS NULL;
|
||||
|
||||
-- Summary
|
||||
SELECT 'Transceivers with product_page_url' as metric, COUNT(*) as count FROM transceivers WHERE product_page_url IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'Transceivers with image_url', COUNT(*) FROM transceivers WHERE image_url IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'Switches with product_page_url', COUNT(*) FROM switches WHERE product_page_url IS NOT NULL;
|
||||
Loading…
x
Reference in New Issue
Block a user