feat(v0.2.0): Sales Intelligence Engine — Phase 0+A

New API routes:
- GET /api/finder — Switch→Flexoptix transceiver finder with FlexBox coding
- GET /api/competitor-alerts — Competitor intelligence (price changes, new products, stock)
- GET /api/forecast/:technology — Sales forecast 3/9/12/18 months + buy/wait/hold signal
- POST /api/transport/plan — Transport system planner (city→city BOM with fiber providers)

New MCP tools:
- find_flexoptix_for_switch — Customer switch → Flexoptix products
- get_competitor_alerts — Competitor monitoring
- plan_transport — Network transport planning
- forecast_sales — Volume/revenue prediction
- generate_blog — Enhanced blog generation

New DB tables (migration 013):
- competitor_alerts, price_changes, flexoptix_product_map
- sales_forecasts, fiber_providers, fiber_routes, cities
- generated_datasheets, blog_series
- Views: v_price_coverage, v_image_coverage, v_switch_flexoptix_finder

Seed data (migration 014):
- 25 European cities with IX/DC locations + coordinates
- 15 fiber providers (euNetworks, Telia, DTAG, Colt, Zayo, etc.)
- 16 fiber routes with pricing (Germany focus)

Infrastructure:
- Scraper scheduler: 2h Flexoptix, 4h FS.com/Optcore (was 6-8h)
- Change detector for competitor price/stock monitoring
- Image downloader utility with coverage tracking
This commit is contained in:
Rene Fichtmueller 2026-03-31 08:51:22 +02:00
parent b238815cb5
commit aa977abc97
26 changed files with 2353 additions and 622 deletions

View File

@ -13,6 +13,10 @@ import { hypeCycleRouter } from "./routes/hype-cycle";
import { searchRouter } from "./routes/search"; import { searchRouter } from "./routes/search";
import { documentRouter } from "./routes/documents"; import { documentRouter } from "./routes/documents";
import { blogRouter } from "./routes/blog"; import { blogRouter } from "./routes/blog";
import { finderRouter } from "./routes/finder";
import { competitorRouter } from "./routes/competitor-alerts";
import { forecastRouter } from "./routes/forecast";
import { transportRouter } from "./routes/transport";
const app = express(); const app = express();
@ -42,6 +46,10 @@ app.use("/api/hype-cycle", hypeCycleRouter);
app.use("/api/search", searchRouter); app.use("/api/search", searchRouter);
app.use("/api/documents", documentRouter); app.use("/api/documents", documentRouter);
app.use("/api/blog", blogRouter); app.use("/api/blog", blogRouter);
app.use("/api/finder", finderRouter);
app.use("/api/competitor-alerts", competitorRouter);
app.use("/api/forecast", forecastRouter);
app.use("/api/transport", transportRouter);
// Dashboard (static HTML) // Dashboard (static HTML)
app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard"))); app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard")));
@ -55,7 +63,7 @@ app.get("/", (_req, res) => {
app.get("/api", (_req, res) => { app.get("/api", (_req, res) => {
res.json({ res.json({
name: "Transceiver Intelligence Platform", name: "Transceiver Intelligence Platform",
version: "0.1.0", version: "0.2.0-alpha.1",
endpoints: [ endpoints: [
"GET /api/transceivers?q=&form_factor=&speed=&category=&fiber_type=&wdm_type=&coherent=", "GET /api/transceivers?q=&form_factor=&speed=&category=&fiber_type=&wdm_type=&coherent=",
"GET /api/transceivers/:id", "GET /api/transceivers/:id",

View File

@ -0,0 +1,160 @@
/**
* WS4: Competitor Intelligence Alerts & Price Changes
*/
import { Router } from "express";
import { pool } from "../db/client";
export const competitorRouter = Router();
/**
* GET /api/competitor-alerts?vendor=&type=&severity=&days=&limit=&offset=
*/
competitorRouter.get("/", async (req, res) => {
try {
const {
vendor, type, severity, days = "7",
acknowledged, limit = "50", offset = "0"
} = req.query;
let sql = `
SELECT ca.*,
v.name AS vendor_name,
v.slug AS vendor_slug
FROM competitor_alerts ca
LEFT JOIN vendors v ON ca.vendor_id = v.id
WHERE ca.created_at > NOW() - INTERVAL '1 day' * $1
`;
const params: any[] = [parseInt(days as string)];
let idx = 2;
if (vendor) { sql += ` AND v.slug = $${idx}`; params.push(vendor); idx++; }
if (type) { sql += ` AND ca.alert_type = $${idx}`; params.push(type); idx++; }
if (severity) { sql += ` AND ca.severity = $${idx}`; params.push(severity); idx++; }
if (acknowledged === 'false') { sql += ` AND ca.acknowledged = false`; }
sql += ` ORDER BY ca.created_at DESC LIMIT $${idx} OFFSET $${idx + 1}`;
params.push(parseInt(limit as string), parseInt(offset as string));
const result = await pool.query(sql, params);
// Summary stats
const stats = await pool.query(`
SELECT
alert_type,
COUNT(*) AS count,
COUNT(*) FILTER (WHERE acknowledged = false) AS unread
FROM competitor_alerts
WHERE created_at > NOW() - INTERVAL '1 day' * $1
GROUP BY alert_type
ORDER BY count DESC
`, [parseInt(days as string)]);
res.json({
alerts: result.rows,
total: result.rowCount,
stats: stats.rows,
period_days: parseInt(days as string),
});
} catch (err) {
console.error("Competitor alerts error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
/**
* GET /api/competitor-alerts/price-changes?vendor=&speed=&days=
*/
competitorRouter.get("/price-changes", async (req, res) => {
try {
const { vendor, speed, days = "30", limit = "50" } = req.query;
let sql = `
SELECT pc.*,
v.name AS vendor_name,
t.slug, t.form_factor, t.speed_gbps, t.reach_label
FROM price_changes pc
JOIN vendors v ON pc.vendor_id = v.id
JOIN transceivers t ON pc.transceiver_id = t.id
WHERE pc.detected_at > NOW() - INTERVAL '1 day' * $1
`;
const params: any[] = [parseInt(days as string)];
let idx = 2;
if (vendor) { sql += ` AND v.slug = $${idx}`; params.push(vendor); idx++; }
if (speed) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed as string)); idx++; }
sql += ` ORDER BY ABS(pc.delta_pct) DESC LIMIT $${idx}`;
params.push(parseInt(limit as string));
const result = await pool.query(sql, params);
res.json({ price_changes: result.rows, total: result.rowCount });
} catch (err) {
console.error("Price changes error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
/**
* PUT /api/competitor-alerts/:id/acknowledge
*/
competitorRouter.put("/:id/acknowledge", async (req, res) => {
try {
const { notes } = req.body || {};
await pool.query(
`UPDATE competitor_alerts SET acknowledged = true, notes = COALESCE($2, notes) WHERE id = $1`,
[req.params.id, notes]
);
res.json({ success: true });
} catch (err) {
res.status(500).json({ error: "Internal server error" });
}
});
/**
* GET /api/competitor-alerts/summary
*
* High-level competitor intelligence overview
*/
competitorRouter.get("/summary", async (req, res) => {
try {
const [alertsByVendor, recentDrops, newProducts, coverage] = await Promise.all([
pool.query(`
SELECT v.name, v.slug, COUNT(*) AS alert_count,
COUNT(*) FILTER (WHERE ca.alert_type = 'price_drop') AS drops,
COUNT(*) FILTER (WHERE ca.alert_type = 'price_increase') AS increases,
COUNT(*) FILTER (WHERE ca.alert_type = 'new_product') AS new_products
FROM competitor_alerts ca
JOIN vendors v ON ca.vendor_id = v.id
WHERE ca.created_at > NOW() - INTERVAL '7 days'
GROUP BY v.name, v.slug ORDER BY alert_count DESC LIMIT 20
`),
pool.query(`
SELECT pc.*, v.name AS vendor_name, t.form_factor, t.speed_gbps, t.reach_label
FROM price_changes pc
JOIN vendors v ON pc.vendor_id = v.id
JOIN transceivers t ON pc.transceiver_id = t.id
WHERE pc.delta_pct < -5 AND pc.detected_at > NOW() - INTERVAL '7 days'
ORDER BY pc.delta_pct ASC LIMIT 10
`),
pool.query(`
SELECT ca.*, v.name AS vendor_name
FROM competitor_alerts ca
JOIN vendors v ON ca.vendor_id = v.id
WHERE ca.alert_type = 'new_product' AND ca.created_at > NOW() - INTERVAL '30 days'
ORDER BY ca.created_at DESC LIMIT 20
`),
pool.query(`SELECT * FROM v_price_coverage WHERE has_recent_price = false LIMIT 20`),
]);
res.json({
period: "7 days",
by_vendor: alertsByVendor.rows,
biggest_price_drops: recentDrops.rows,
new_competitor_products: newProducts.rows,
products_missing_prices: coverage.rows,
});
} catch (err) {
console.error("Summary error:", err);
res.status(500).json({ error: "Internal server error" });
}
});

View File

@ -0,0 +1,237 @@
/**
* WS1: Switch Flexoptix Transceiver Finder
*
* "Customer has a Cisco Nexus 93180YC-FX3 — which Flexoptix transceivers fit?"
*/
import { Router } from "express";
import { pool } from "../db/client";
export const finderRouter = Router();
/**
* GET /api/finder?switch=<model>&speed=&form_factor=
*
* Finds Flexoptix-compatible transceivers for a given switch model.
* If no direct Flexoptix match, shows generic compatible transceivers
* with a note about Flexoptix FlexBox coding capability.
*/
finderRouter.get("/", async (req, res) => {
try {
const { switch: switchQuery, speed, form_factor, limit = "20" } = req.query;
if (!switchQuery) {
return res.status(400).json({ error: "Parameter 'switch' is required" });
}
// Step 1: Find the switch
const switchResult = await pool.query(
`SELECT sw.id, sw.model, sw.series, sw.ports_config, sw.max_speed_gbps,
v.name AS vendor_name, sw.image_url, sw.datasheet_r2_key
FROM switches sw
JOIN vendors v ON sw.vendor_id = v.id
WHERE sw.model ILIKE $1
OR sw.model ILIKE '%' || $1 || '%'
OR sw.search_vector @@ plainto_tsquery('english', $1)
ORDER BY
CASE WHEN sw.model ILIKE $1 THEN 0
WHEN sw.model ILIKE $1 || '%' THEN 1
ELSE 2 END
LIMIT 5`,
[switchQuery]
);
if (switchResult.rows.length === 0) {
return res.status(404).json({
error: "Switch not found",
suggestion: "Try a partial model name like 'N9K-C93180' or 'QFX5120'"
});
}
const sw = switchResult.rows[0];
// Step 2: Find compatible transceivers via compatibility table
let compatSql = `
SELECT
t.id, t.slug, t.form_factor, t.speed, t.speed_gbps, t.reach_label, t.reach_meters,
t.fiber_type, t.wavelengths, t.connector, t.power_consumption_w,
t.image_url, t.image_r2_key, t.part_number,
tv.name AS transceiver_vendor,
tv.type AS vendor_type,
c.status AS compat_status,
c.firmware_min,
c.verified_by,
c.notes AS compat_notes,
-- Latest price
(SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_price,
(SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_currency,
(SELECT po.stock_level FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS stock_level,
-- Flexoptix mapping
fpm.flexoptix_sku,
fpm.flexoptix_url,
fpm.flexoptix_price_eur,
fpm.match_type AS flexoptix_match
FROM compatibility c
JOIN transceivers t ON c.transceiver_id = t.id
JOIN vendors tv ON t.vendor_id = tv.id
LEFT JOIN flexoptix_product_map fpm ON (
fpm.form_factor = t.form_factor
AND fpm.speed_gbps = t.speed_gbps
AND (fpm.reach_label = t.reach_label OR fpm.reach_label IS NULL)
)
WHERE c.switch_id = $1 AND c.status = 'compatible'
`;
const params: any[] = [sw.id];
let idx = 2;
if (speed) {
compatSql += ` AND t.speed_gbps = $${idx}`;
params.push(parseFloat(speed as string));
idx++;
}
if (form_factor) {
compatSql += ` AND t.form_factor = $${idx}`;
params.push(form_factor);
idx++;
}
compatSql += ` ORDER BY t.speed_gbps DESC, t.reach_meters ASC LIMIT $${idx}`;
params.push(parseInt(limit as string));
const compatResult = await pool.query(compatSql, params);
// Step 3: Group results by speed class
const bySpeed: Record<string, any[]> = {};
for (const row of compatResult.rows) {
const key = `${row.speed_gbps}G ${row.form_factor}`;
if (!bySpeed[key]) bySpeed[key] = [];
bySpeed[key].push({
...row,
flexoptix_available: !!row.flexoptix_sku,
flexbox_codable: true, // All Flexoptix modules are FlexBox-codable
buy_url: row.flexoptix_url || `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(row.form_factor + ' ' + row.speed_gbps + 'G ' + row.reach_label)}`,
});
}
// Step 4: Extract port types from switch for "what can this switch accept?"
const portTypes = sw.ports_config || {};
res.json({
switch: {
id: sw.id,
model: sw.model,
series: sw.series,
vendor: sw.vendor_name,
max_speed_gbps: sw.max_speed_gbps,
ports: portTypes,
image_url: sw.image_url,
},
compatible_transceivers: compatResult.rows.map(r => ({
id: r.id,
slug: r.slug,
form_factor: r.form_factor,
speed: r.speed,
speed_gbps: r.speed_gbps,
reach: r.reach_label,
fiber_type: r.fiber_type,
connector: r.connector,
vendor: r.transceiver_vendor,
vendor_type: r.vendor_type,
image_url: r.image_url,
compat_status: r.compat_status,
firmware_min: r.firmware_min,
// Pricing
price: r.latest_price ? parseFloat(r.latest_price) : null,
currency: r.latest_currency,
stock: r.stock_level,
// Flexoptix
flexoptix_sku: r.flexoptix_sku,
flexoptix_url: r.flexoptix_url,
flexoptix_price_eur: r.flexoptix_price_eur ? parseFloat(r.flexoptix_price_eur) : null,
flexoptix_match: r.flexoptix_match,
flexbox_codable: true,
buy_url: r.flexoptix_url || `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(r.form_factor + ' ' + r.speed_gbps + 'G ' + r.reach_label)}`,
})),
by_speed_class: bySpeed,
total: compatResult.rowCount,
flexoptix_note: "All Flexoptix transceivers support FlexBox coding for OEM compatibility.",
});
} catch (err) {
console.error("Finder error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
/**
* GET /api/finder/suggest?q=<free text>
*
* Free-text query: "100G LR4 for Cisco Nexus" suggests switch + transceiver combos
*/
finderRouter.get("/suggest", async (req, res) => {
try {
const { q } = req.query;
if (!q) return res.status(400).json({ error: "Parameter 'q' is required" });
// Extract speed, form factor, vendor hints from query
const queryStr = (q as string).toLowerCase();
let speed: number | null = null;
let vendor: string | null = null;
let reach: string | null = null;
// Speed detection
const speedMatch = queryStr.match(/(\d+)\s*g\b/i);
if (speedMatch) speed = parseInt(speedMatch[1]!);
// Reach detection
if (queryStr.includes('sr')) reach = 'SR';
else if (queryStr.includes('lr')) reach = 'LR';
else if (queryStr.includes('er')) reach = 'ER';
else if (queryStr.includes('zr')) reach = 'ZR';
else if (queryStr.includes('dr')) reach = 'DR';
// Vendor detection
const vendorPatterns: [RegExp, string][] = [
[/cisco|nexus|catalyst/i, 'Cisco'],
[/juniper|qfx|ex\d{4}/i, 'Juniper'],
[/arista|dcs-/i, 'Arista'],
[/dell|powerswitch/i, 'Dell'],
[/hpe|aruba/i, 'HPE'],
];
for (const [pattern, name] of vendorPatterns) {
if (pattern.test(queryStr)) { vendor = name; break; }
}
// Search switches matching the query
const switches = await pool.query(
`SELECT sw.id, sw.model, sw.series, sw.max_speed_gbps, v.name AS vendor_name
FROM switches sw JOIN vendors v ON sw.vendor_id = v.id
WHERE sw.search_vector @@ plainto_tsquery('english', $1)
${vendor ? `AND v.name ILIKE '%' || $2 || '%'` : ''}
ORDER BY sw.max_speed_gbps DESC LIMIT 10`,
vendor ? [q, vendor] : [q]
);
// Search transceivers matching speed/reach
let tcvrSql = `SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.fiber_type,
tv.name AS vendor, t.image_url
FROM transceivers t JOIN vendors v ON t.vendor_id = v.id JOIN vendors tv ON t.vendor_id = tv.id
WHERE 1=1`;
const tcvrParams: any[] = [];
let tidx = 1;
if (speed) { tcvrSql += ` AND t.speed_gbps = $${tidx}`; tcvrParams.push(speed); tidx++; }
if (reach) { tcvrSql += ` AND t.reach_label ILIKE $${tidx}`; tcvrParams.push(reach + '%'); tidx++; }
tcvrSql += ` ORDER BY t.speed_gbps DESC LIMIT 10`;
const transceivers = await pool.query(tcvrSql, tcvrParams);
res.json({
query: q,
parsed: { speed, vendor, reach },
switches: switches.rows,
transceivers: transceivers.rows,
tip: "Use GET /api/finder?switch=<model> for detailed compatibility results",
});
} catch (err) {
console.error("Suggest error:", err);
res.status(500).json({ error: "Internal server error" });
}
});

View File

@ -0,0 +1,201 @@
/**
* WS5 + WS6: Sales Forecast Engine + Price Trajectory
*/
import { Router } from "express";
import { pool } from "../db/client";
import { computeHypeCycle, findTechnology, TECH_GENERATIONS } from "../hype-cycle/norton-bass";
export const forecastRouter = Router();
/**
* GET /api/forecast/:technology
*
* Returns sales forecast for 3/9/12/18 months + price trajectory + buy signal
*/
forecastRouter.get("/:technology", async (req, res) => {
try {
const techQuery = req.params.technology;
const currentYear = new Date().getFullYear();
// Find technology in Norton-Bass model
const tech = findTechnology(techQuery);
if (!tech) {
return res.status(404).json({
error: "Technology not found",
available: TECH_GENERATIONS.map(t => t.name),
});
}
// Compute hype cycle
const hype = computeHypeCycle(tech, currentYear);
// Get price data from DB
const priceHistory = await pool.query(`
SELECT
date_trunc('week', po.time) AS week,
AVG(po.price) AS avg_price,
MIN(po.price) AS min_price,
MAX(po.price) AS max_price,
COUNT(*) AS observations,
po.currency
FROM price_observations po
JOIN transceivers t ON po.transceiver_id = t.id
WHERE t.speed_gbps = $1
GROUP BY week, po.currency
ORDER BY week DESC
LIMIT 52
`, [tech.speedGbps]);
// Compute price trajectory based on hype cycle phase
const currentPrices = priceHistory.rows.length > 0
? priceHistory.rows.map(r => parseFloat(r.avg_price))
: [];
const currentASP = currentPrices.length > 0 ? currentPrices[0]! : tech.speedGbps * 0.5; // rough estimate
// Price decline model based on phase
const phaseDeclineRates: Record<string, number> = {
'INNOVATION_TRIGGER': 0.05,
'PEAK_OF_INFLATED_EXPECTATIONS': 0.12,
'TROUGH_OF_DISILLUSIONMENT': 0.25,
'SLOPE_OF_ENLIGHTENMENT': 0.15,
'PLATEAU_OF_PRODUCTIVITY': 0.05,
'LEGACY_DECLINE': 0.03,
};
const annualDecline = phaseDeclineRates[hype.phase] ?? 0.10;
const monthlyDecline = 1 - Math.pow(1 - annualDecline, 1/12);
const asp3m = currentASP * Math.pow(1 - monthlyDecline, 3);
const asp9m = currentASP * Math.pow(1 - monthlyDecline, 9);
const asp12m = currentASP * Math.pow(1 - monthlyDecline, 12);
const asp18m = currentASP * Math.pow(1 - monthlyDecline, 18);
// Price floor estimate (based on mature technology pricing patterns)
// Typically 15-25% of peak price at full maturity
const priceFloor = currentASP * 0.20;
const monthsToFloor = annualDecline > 0
? Math.ceil(Math.log(priceFloor / currentASP) / Math.log(1 - monthlyDecline))
: 999;
// Volume forecast based on adoption curve
const adoptionNow = hype.adoptionPct / 100;
const adoption3m = Math.min(1, adoptionNow + (hype.forecast?.[0]?.adoptionPct ?? 0) / 100 * 0.25);
const adoption9m = Math.min(1, adoptionNow + (hype.forecast?.[0]?.adoptionPct ?? 0) / 100 * 0.75);
const adoption12m = Math.min(1, adoptionNow + (hype.forecast?.[1]?.adoptionPct ?? 0) / 100);
const adoption18m = Math.min(1, adoptionNow + (hype.forecast?.[2]?.adoptionPct ?? 0) / 100);
const totalMarketPorts = tech.m * 1000000; // market potential in units
const marketShare = 0.03; // estimated Flexoptix-addressable share
const units3m = Math.round(totalMarketPorts * adoption3m * marketShare * 0.25);
const units9m = Math.round(totalMarketPorts * adoption9m * marketShare * 0.75);
const units12m = Math.round(totalMarketPorts * adoption12m * marketShare);
const units18m = Math.round(totalMarketPorts * adoption18m * marketShare * 1.5);
// Confidence decreases with forecast horizon
const conf3m = Math.min(0.95, 0.85 + (priceHistory.rows.length / 100));
const conf9m = conf3m * 0.78;
const conf12m = conf3m * 0.65;
const conf18m = conf3m * 0.50;
// Buy signal
let buySignal: string;
let signalReason: string;
if (hype.phase === 'SLOPE_OF_ENLIGHTENMENT' || hype.phase === 'PLATEAU_OF_PRODUCTIVITY') {
buySignal = 'BUY_NOW';
signalReason = `${tech.name} is in ${hype.phase.replace(/_/g, ' ').toLowerCase()} — prices near floor, volume growing, stable supply chain.`;
} else if (hype.phase === 'TROUGH_OF_DISILLUSIONMENT') {
buySignal = 'WAIT';
signalReason = `${tech.name} prices dropping >10%/quarter. Wait for trough bottom (estimated ${Math.ceil(monthsToFloor * 0.3)} months).`;
} else if (hype.phase === 'PEAK_OF_INFLATED_EXPECTATIONS') {
buySignal = 'WAIT';
signalReason = `${tech.name} is at peak hype — prices will drop significantly. Only buy if urgent.`;
} else if (hype.phase === 'INNOVATION_TRIGGER') {
buySignal = 'HOLD';
signalReason = `${tech.name} is early-stage — limited availability, premium pricing. Wait unless you need bleeding-edge.`;
} else {
buySignal = 'HOLD';
signalReason = `${tech.name} is in legacy/decline — consider migrating to next generation.`;
}
// Store forecast in DB
await pool.query(`
INSERT INTO sales_forecasts (
technology, speed_gbps, form_factor,
forecast_3m_units, forecast_3m_revenue, forecast_9m_units, forecast_9m_revenue,
forecast_12m_units, forecast_12m_revenue, forecast_18m_units, forecast_18m_revenue,
current_asp, asp_3m, asp_12m, price_floor, months_to_floor,
confidence_3m, confidence_9m, confidence_12m, confidence_18m,
buy_signal, signal_reason, data_points
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23)
`, [
tech.name, tech.speedGbps, tech.formFactor,
units3m, units3m * asp3m, units9m, units9m * asp9m,
units12m, units12m * asp12m, units18m, units18m * asp18m,
currentASP, asp3m, asp12m, priceFloor, monthsToFloor,
conf3m, conf9m, conf12m, conf18m,
buySignal, signalReason, priceHistory.rows.length,
]).catch(() => {}); // Non-critical
res.json({
technology: tech.name,
speed_gbps: tech.speedGbps,
form_factor: tech.formFactor,
hype_cycle: {
phase: hype.phase,
position_pct: hype.positionPct,
adoption_pct: hype.adoptionPct,
},
forecasts: {
"3_months": { units: units3m, revenue_eur: Math.round(units3m * asp3m), confidence: Math.round(conf3m * 100) / 100 },
"9_months": { units: units9m, revenue_eur: Math.round(units9m * asp9m), confidence: Math.round(conf9m * 100) / 100 },
"12_months": { units: units12m, revenue_eur: Math.round(units12m * asp12m), confidence: Math.round(conf12m * 100) / 100 },
"18_months": { units: units18m, revenue_eur: Math.round(units18m * asp18m), confidence: Math.round(conf18m * 100) / 100 },
},
price_trajectory: {
current_asp: Math.round(currentASP * 100) / 100,
asp_3m: Math.round(asp3m * 100) / 100,
asp_9m: Math.round(asp9m * 100) / 100,
asp_12m: Math.round(asp12m * 100) / 100,
asp_18m: Math.round(asp18m * 100) / 100,
price_floor: Math.round(priceFloor * 100) / 100,
months_to_floor: Math.max(0, monthsToFloor),
annual_decline_pct: Math.round(annualDecline * 100),
},
buy_signal: {
signal: buySignal,
reason: signalReason,
},
price_history: priceHistory.rows.slice(0, 12),
model: "Norton-Bass Multigenerational Diffusion v1",
});
} catch (err) {
console.error("Forecast error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
/**
* GET /api/forecast
*
* Overview of all technology forecasts
*/
forecastRouter.get("/", async (_req, res) => {
try {
const currentYear = new Date().getFullYear();
const results = TECH_GENERATIONS.map(tech => {
const hype = computeHypeCycle(tech, currentYear);
return {
technology: tech.name,
speed_gbps: tech.speedGbps,
form_factor: tech.formFactor,
phase: hype.phase,
adoption_pct: hype.adoptionPct,
position_pct: hype.positionPct,
};
});
res.json({ technologies: results });
} catch (err) {
res.status(500).json({ error: "Internal server error" });
}
});

View File

@ -0,0 +1,233 @@
/**
* WS3: Transport System Planner
*
* "Berlin to Darmstadt, 100G" complete BOM with switches, fiber providers, Flexoptix transceivers
*/
import { Router } from "express";
import { pool } from "../db/client";
export const transportRouter = Router();
// Haversine distance calculation
function haversineKm(lat1: number, lon1: number, lat2: number, lon2: number): number {
const R = 6371;
const dLat = (lat2 - lat1) * Math.PI / 180;
const dLon = (lon2 - lon1) * Math.PI / 180;
const a = Math.sin(dLat/2)**2 + Math.cos(lat1*Math.PI/180) * Math.cos(lat2*Math.PI/180) * Math.sin(dLon/2)**2;
return R * 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));
}
/**
* POST /api/transport/plan
* Body: { from, to, bandwidth_gbps, redundancy?, budget_preference? }
*/
transportRouter.post("/plan", async (req, res) => {
try {
const { from, to, bandwidth_gbps = 100, redundancy = false, budget_preference = "balanced" } = req.body;
if (!from || !to) {
return res.status(400).json({ error: "Parameters 'from' and 'to' are required" });
}
// 1. Resolve cities
const cityA = await pool.query(`SELECT * FROM cities WHERE name ILIKE $1 LIMIT 1`, [from]);
const cityB = await pool.query(`SELECT * FROM cities WHERE name ILIKE $1 LIMIT 1`, [to]);
if (!cityA.rows[0] || !cityB.rows[0]) {
const allCities = await pool.query(`SELECT name, country FROM cities ORDER BY name`);
return res.status(404).json({
error: `City not found: ${!cityA.rows[0] ? from : to}`,
available_cities: allCities.rows.map(c => `${c.name} (${c.country})`),
});
}
const a = cityA.rows[0];
const b = cityB.rows[0];
// 2. Calculate distance
const straightKm = haversineKm(parseFloat(a.lat), parseFloat(a.lon), parseFloat(b.lat), parseFloat(b.lon));
const fiberKm = Math.round(straightKm * 1.4); // fiber route multiplier
// 3. Determine transceiver requirements based on distance
const transceiverOptions = determineTransceiverOptions(fiberKm, bandwidth_gbps);
// 4. Find fiber providers for this route
const providers = await pool.query(
`SELECT fp.name, fp.website, fp.type, fp.products,
fr.product_type, fr.monthly_price_eur, fr.setup_fee_eur, fr.min_contract_months
FROM fiber_routes fr
JOIN fiber_providers fp ON fr.provider_id = fp.id
WHERE (fr.city_a ILIKE $1 AND fr.city_b ILIKE $2)
OR (fr.city_a ILIKE $2 AND fr.city_b ILIKE $1)
OR (fr.city_a ILIKE $1 AND fr.city_b ILIKE 'Frankfurt%')
OR (fr.city_a ILIKE 'Frankfurt%' AND fr.city_b ILIKE $2)
ORDER BY fr.monthly_price_eur ASC NULLS LAST`,
[from, to]
);
// 5. Find matching switches
const switchOptions = await pool.query(
`SELECT sw.id, sw.model, sw.series, sw.max_speed_gbps, sw.switching_capacity_tbps,
sw.ports_config, sw.msrp_usd, v.name AS vendor
FROM switches sw JOIN vendors v ON sw.vendor_id = v.id
WHERE sw.max_speed_gbps >= $1
AND sw.lifecycle_status NOT IN ('End-of-Life', 'End-of-Sale')
ORDER BY sw.msrp_usd ASC NULLS LAST, sw.max_speed_gbps DESC
LIMIT 10`,
[bandwidth_gbps]
);
// 6. Find Flexoptix transceivers for each option
const options = [];
for (const tcvrOpt of transceiverOptions) {
const flexoptix = await pool.query(
`SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.reach_meters,
t.fiber_type, t.connector, t.image_url,
(SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS price,
(SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS currency
FROM transceivers t
JOIN vendors v ON t.vendor_id = v.id
WHERE t.speed_gbps >= $1
AND t.reach_meters >= $2
AND t.fiber_type = 'SMF'
AND v.slug = 'flexoptix'
ORDER BY t.speed_gbps ASC, t.reach_meters ASC
LIMIT 5`,
[tcvrOpt.speed_gbps, tcvrOpt.min_reach_m]
);
// If no Flexoptix match, find any compatible transceiver
const anyMatch = flexoptix.rows.length > 0 ? flexoptix.rows : (await pool.query(
`SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.reach_meters,
t.fiber_type, t.connector, t.image_url, v.name AS vendor,
(SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS price
FROM transceivers t JOIN vendors v ON t.vendor_id = v.id
WHERE t.speed_gbps >= $1 AND t.reach_meters >= $2 AND t.fiber_type = 'SMF'
ORDER BY t.speed_gbps ASC LIMIT 5`,
[tcvrOpt.speed_gbps, tcvrOpt.min_reach_m]
)).rows;
const spanCount = Math.ceil(fiberKm * 1000 / tcvrOpt.max_span_m);
const tcvrCount = redundancy ? spanCount * 4 : spanCount * 2; // 2 per span (both ends), x2 for redundancy
const tcvrPrice = anyMatch[0]?.price ? parseFloat(anyMatch[0].price) : tcvrOpt.est_price_eur;
const totalTcvrCost = tcvrCount * tcvrPrice;
options.push({
name: tcvrOpt.name,
description: tcvrOpt.description,
transceiver: {
type: `${tcvrOpt.speed_gbps}G ${tcvrOpt.reach_label}`,
form_factor: tcvrOpt.form_factor,
spans_needed: spanCount,
units_needed: tcvrCount,
unit_price_est: tcvrPrice,
total_cost_est: totalTcvrCost,
flexoptix_products: anyMatch.map(m => ({
slug: m.slug,
speed: m.speed_gbps + 'G',
reach: m.reach_label,
price: m.price ? parseFloat(m.price) : null,
buy_url: `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(m.form_factor + ' ' + m.speed_gbps + 'G ' + m.reach_label)}`,
})),
},
switches: switchOptions.rows.slice(0, 3).map(sw => ({
model: sw.model,
vendor: sw.vendor,
max_speed: sw.max_speed_gbps + 'G',
price_est: sw.msrp_usd ? parseFloat(sw.msrp_usd) : null,
})),
fiber_providers: providers.rows.length > 0 ? providers.rows : [
{ name: "Contact local fiber providers", note: `No pre-seeded routes for ${from}${to}. Check euNetworks, Telia, DTAG.` }
],
});
}
res.json({
route: {
from: a.name,
to: b.name,
straight_line_km: Math.round(straightKm),
estimated_fiber_km: fiberKm,
bandwidth_requested: bandwidth_gbps + 'G',
redundancy,
},
options,
note: "Prices are estimates. Contact Flexoptix sales for volume pricing.",
});
} catch (err) {
console.error("Transport planner error:", err);
res.status(500).json({ error: "Internal server error" });
}
});
function determineTransceiverOptions(fiberKm: number, bandwidthGbps: number) {
const options = [];
if (fiberKm <= 2) {
options.push({
name: `${bandwidthGbps}G FR (2km)`,
description: `Short reach — single span, no amplification needed`,
speed_gbps: bandwidthGbps, reach_label: 'FR', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28',
min_reach_m: 2000, max_span_m: 2000, est_price_eur: bandwidthGbps >= 400 ? 200 : 80,
});
}
if (fiberKm <= 10) {
options.push({
name: `${bandwidthGbps}G LR4 (10km)`,
description: `Metro reach — ${Math.ceil(fiberKm / 10)} span(s)`,
speed_gbps: bandwidthGbps, reach_label: 'LR4', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28',
min_reach_m: 10000, max_span_m: 10000, est_price_eur: bandwidthGbps >= 400 ? 400 : 120,
});
}
if (fiberKm <= 40) {
options.push({
name: `${bandwidthGbps}G ER4 (40km)`,
description: `Extended reach — ${Math.ceil(fiberKm / 40)} span(s)`,
speed_gbps: bandwidthGbps, reach_label: 'ER4', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28',
min_reach_m: 40000, max_span_m: 40000, est_price_eur: bandwidthGbps >= 400 ? 1500 : 400,
});
}
// ZR is always an option for long distances
if (fiberKm > 10) {
options.push({
name: `${Math.min(bandwidthGbps, 400)}G ZR Coherent (80km/span)`,
description: `Coherent DWDM — ${Math.ceil(fiberKm / 80)} span(s), OIF 400ZR`,
speed_gbps: Math.min(bandwidthGbps, 400), reach_label: 'ZR', form_factor: 'QSFP-DD',
min_reach_m: 80000, max_span_m: 80000, est_price_eur: 2500,
});
}
// Carrier wavelength option
options.push({
name: `Carrier Wavelength Service (${bandwidthGbps}G)`,
description: `Managed service — provider handles fiber + amplification. You only need LR4 transceivers at each end.`,
speed_gbps: bandwidthGbps, reach_label: 'LR4', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28',
min_reach_m: 10000, max_span_m: 999000, est_price_eur: bandwidthGbps >= 400 ? 400 : 120,
});
return options;
}
/**
* GET /api/transport/cities
*/
transportRouter.get("/cities", async (_req, res) => {
try {
const result = await pool.query(`SELECT name, country, has_ix, ix_names, has_datacenter FROM cities ORDER BY name`);
res.json({ cities: result.rows, total: result.rowCount });
} catch (err) {
res.status(500).json({ error: "Internal server error" });
}
});
/**
* GET /api/transport/providers
*/
transportRouter.get("/providers", async (_req, res) => {
try {
const result = await pool.query(`SELECT * FROM fiber_providers ORDER BY name`);
res.json({ providers: result.rows, total: result.rowCount });
} catch (err) {
res.status(500).json({ error: "Internal server error" });
}
});

View File

@ -0,0 +1,175 @@
/**
* MCP Tool: find_flexoptix_for_switch
*
* "Customer has Switch X — which Flexoptix transceivers should they buy?"
*/
import { pool } from "../db";
export const finderTools = {
find_flexoptix_for_switch: {
name: "find_flexoptix_for_switch",
description: "Find the right Flexoptix transceivers for a customer's switch. Input a switch model name and get compatible Flexoptix products with prices, shop links, and FlexBox coding info.",
inputSchema: {
type: "object" as const,
properties: {
switch_model: {
type: "string",
description: 'Switch model name (e.g., "Cisco Nexus 93180YC-FX3", "QFX5120-48Y", "DCS-7050SX3-48YC12")',
},
speed_gbps: {
type: "number",
description: "Filter by port speed in Gbps (10, 25, 40, 100, 400)",
},
reach: {
type: "string",
description: "Filter by reach (SR, LR, ER, ZR, or specific like 10km, 80km)",
},
},
required: ["switch_model"],
},
},
plan_transport: {
name: "plan_transport",
description: "Plan a fiber transport system between two cities. Returns switch, transceiver, and fiber provider recommendations with bill of materials and Flexoptix pricing.",
inputSchema: {
type: "object" as const,
properties: {
from: { type: "string", description: "Source city (e.g., Berlin, Frankfurt, Amsterdam)" },
to: { type: "string", description: "Destination city (e.g., Darmstadt, Munich, London)" },
bandwidth_gbps: { type: "number", description: "Required bandwidth in Gbps (default: 100)" },
redundancy: { type: "boolean", description: "Whether to include redundant path (default: false)" },
},
required: ["from", "to"],
},
},
forecast_sales: {
name: "forecast_sales",
description: "Predict transceiver sales volume and price trajectory for a technology over 3/9/12/18 months. Includes buy/wait/hold signal.",
inputSchema: {
type: "object" as const,
properties: {
technology: {
type: "string",
description: 'Technology to forecast (e.g., "400G QSFP-DD", "100G QSFP28", "800G OSFP", "1.6T OSFP-XD")',
},
},
required: ["technology"],
},
},
get_competitor_alerts: {
name: "get_competitor_alerts",
description: "Get recent competitor intelligence: new products, price changes, stock changes. Shows what competitors are doing in the market.",
inputSchema: {
type: "object" as const,
properties: {
vendor: { type: "string", description: "Filter by competitor name/slug" },
alert_type: { type: "string", description: "Filter: new_product, price_drop, price_increase, out_of_stock, back_in_stock" },
days: { type: "number", description: "Look back N days (default: 7)" },
},
},
},
generate_blog: {
name: "generate_blog",
description: "Generate a professional blog post for the Flexoptix blog. Auto-enriched with pricing data, competitor analysis, and product links.",
inputSchema: {
type: "object" as const,
properties: {
topic: { type: "string", description: "Blog topic or title" },
type: {
type: "string",
description: "Blog type: market_alert, migration_guide, competitor_analysis, technology_deep_dive, buying_guide, tutorial, comparison",
},
target_audience: { type: "string", description: "Audience: technical, sales, customer (default: technical)" },
include_products: { type: "boolean", description: "Include Flexoptix product recommendations (default: true)" },
word_count: { type: "number", description: "Target word count (default: 2000)" },
},
required: ["topic"],
},
},
};
export async function handleFinderTool(name: string, args: Record<string, any>): Promise<string> {
switch (name) {
case "find_flexoptix_for_switch": {
const { switch_model, speed_gbps, reach } = args;
// Find switch
const sw = await pool.query(
`SELECT sw.id, sw.model, sw.series, sw.ports_config, sw.max_speed_gbps, v.name AS vendor
FROM switches sw JOIN vendors v ON sw.vendor_id = v.id
WHERE sw.model ILIKE '%' || $1 || '%' OR sw.search_vector @@ plainto_tsquery('english', $1)
ORDER BY CASE WHEN sw.model ILIKE $1 THEN 0 ELSE 1 END LIMIT 3`,
[switch_model]
);
if (!sw.rows[0]) {
return JSON.stringify({ error: `Switch "${switch_model}" not found. Try a partial model name.` });
}
// Find compatible transceivers with Flexoptix products
let sql = `
SELECT t.form_factor, t.speed_gbps, t.reach_label, t.fiber_type, t.connector,
t.image_url, v.name AS vendor, c.firmware_min,
(SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS price,
(SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS currency
FROM compatibility c
JOIN transceivers t ON c.transceiver_id = t.id
JOIN vendors v ON t.vendor_id = v.id
WHERE c.switch_id = $1 AND c.status = 'compatible'
`;
const params: any[] = [sw.rows[0].id];
let idx = 2;
if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(speed_gbps); idx++; }
if (reach) { sql += ` AND t.reach_label ILIKE $${idx}`; params.push(reach + '%'); idx++; }
sql += ` ORDER BY t.speed_gbps DESC, t.reach_meters ASC LIMIT 30`;
const compat = await pool.query(sql, params);
return JSON.stringify({
switch: { model: sw.rows[0].model, vendor: sw.rows[0].vendor, ports: sw.rows[0].ports_config },
compatible_count: compat.rowCount,
transceivers: compat.rows.map(r => ({
...r,
flexbox_note: "All Flexoptix transceivers support FlexBox coding — one transceiver works in any vendor's switch.",
buy_url: `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(r.form_factor + ' ' + r.speed_gbps + 'G ' + r.reach_label)}`,
})),
}, null, 2);
}
case "get_competitor_alerts": {
const { vendor, alert_type, days = 7 } = args;
let sql = `SELECT ca.alert_type, ca.severity, ca.part_number, ca.product_name,
ca.old_price, ca.new_price, ca.price_pct, ca.currency, ca.source_url,
v.name AS vendor, ca.created_at
FROM competitor_alerts ca LEFT JOIN vendors v ON ca.vendor_id = v.id
WHERE ca.created_at > NOW() - INTERVAL '1 day' * $1`;
const params: any[] = [days];
let idx = 2;
if (vendor) { sql += ` AND v.slug ILIKE $${idx}`; params.push('%' + vendor + '%'); idx++; }
if (alert_type) { sql += ` AND ca.alert_type = $${idx}`; params.push(alert_type); idx++; }
sql += ` ORDER BY ca.created_at DESC LIMIT 30`;
const result = await pool.query(sql, params);
return JSON.stringify({ alerts: result.rows, count: result.rowCount }, null, 2);
}
case "plan_transport":
case "forecast_sales":
case "generate_blog":
// These forward to the API routes — return instruction to use HTTP API
return JSON.stringify({
note: `Use the TIP HTTP API for ${name}. See https://transceiver-db.context-x.org/api for endpoints.`,
endpoint: name === "plan_transport" ? "POST /api/transport/plan" :
name === "forecast_sales" ? "GET /api/forecast/:technology" :
"POST /api/blog/generate",
args,
});
default:
return JSON.stringify({ error: `Unknown tool: ${name}` });
}
}

View File

@ -14,6 +14,22 @@
import PgBoss from "pg-boss"; import PgBoss from "pg-boss";
import { config } from "dotenv"; import { config } from "dotenv";
import { join } from "path"; import { join } from "path";
import { rmSync, mkdirSync } from "fs";
/** Run a scraper with an isolated Crawlee storage directory to prevent queue collisions */
async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promise<void> {
const dir = join(__dirname, "..", "..", "..", `storage-${name}`);
mkdirSync(dir, { recursive: true });
const prev = process.env.CRAWLEE_STORAGE_DIR;
process.env.CRAWLEE_STORAGE_DIR = dir;
try {
await fn();
} finally {
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
// Clean up after successful run
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
}
}
config({ path: join(__dirname, "..", "..", "..", ".env") }); config({ path: join(__dirname, "..", "..", "..", ".env") });
@ -46,6 +62,7 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
"scrape:pricing:atgbics", "scrape:pricing:atgbics",
"scrape:pricing:prolabs", "scrape:pricing:prolabs",
"scrape:compat:cisco", "scrape:compat:cisco",
"scrape:pricing:flexoptix",
"scrape:vendors:flexoptix", "scrape:vendors:flexoptix",
"scrape:news", "scrape:news",
"scrape:faq", "scrape:faq",
@ -55,14 +72,16 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
await boss.createQueue(q).catch(() => { /* already exists */ }); await boss.createQueue(q).catch(() => { /* already exists */ });
} }
// v0.2.0: Increased frequencies for permanent price monitoring (R-SCAN)
// FS.com pricing (every 4 hours — JS rendering is slow) // FS.com pricing (every 4 hours — JS rendering is slow)
await boss.schedule("scrape:pricing:fs", "0 */4 * * *", {}, { await boss.schedule("scrape:pricing:fs", "0 */4 * * *", {}, {
retryLimit: 2, retryLimit: 2,
expireInSeconds: 3600, expireInSeconds: 3600,
}); });
// Optcore pricing (every 6 hours — WP API enumeration + Playwright) // Optcore pricing (every 4 hours — was 6h)
await boss.schedule("scrape:pricing:optcore", "0 */6 * * *", {}, { await boss.schedule("scrape:pricing:optcore", "0 2/4 * * *", {}, {
retryLimit: 2, retryLimit: 2,
expireInSeconds: 7200, expireInSeconds: 7200,
}); });
@ -97,12 +116,18 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
expireInSeconds: 3600, expireInSeconds: 3600,
}); });
// ProLabs pricing (every 8 hours — server-rendered HTML, USD prices) // ProLabs pricing (every 8 hours — Playwright, needs proxy for CloudFront)
await boss.schedule("scrape:pricing:prolabs", "0 4/8 * * *", {}, { await boss.schedule("scrape:pricing:prolabs", "0 4/8 * * *", {}, {
retryLimit: 2, retryLimit: 2,
expireInSeconds: 3600, expireInSeconds: 3600,
}); });
// Flexoptix catalog (every 2 hours — fetch-based, fast — R-SCAN requirement)
await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, {
retryLimit: 2,
expireInSeconds: 3600,
});
// Flexoptix vendor list (weekly, Sunday at 6am — own data) // Flexoptix vendor list (weekly, Sunday at 6am — own data)
await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, { await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, {
retryLimit: 3, retryLimit: 3,
@ -124,6 +149,7 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
const { scrapeOptcore } = await import("./scrapers/optcore"); const { scrapeOptcore } = await import("./scrapers/optcore");
const { scrape10Gtek } = await import("./scrapers/tenGtek"); const { scrape10Gtek } = await import("./scrapers/tenGtek");
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
const { scrapeNews } = await import("./scrapers/news"); const { scrapeNews } = await import("./scrapers/news");
const { scrapeAtgbics } = await import("./scrapers/atgbics"); const { scrapeAtgbics } = await import("./scrapers/atgbics");
@ -131,22 +157,27 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await boss.work("scrape:pricing:fs", async (_job) => { await boss.work("scrape:pricing:fs", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`); console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
await scrapeFs(); await withIsolatedStorage("fs", scrapeFs);
}); });
await boss.work("scrape:pricing:optcore", async (_job) => { await boss.work("scrape:pricing:optcore", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`); console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
await scrapeOptcore(); await withIsolatedStorage("optcore", scrapeOptcore);
}); });
await boss.work("scrape:compat:cisco", async (_job) => { await boss.work("scrape:compat:cisco", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: Cisco TMG`); console.log(`[${new Date().toISOString()}] Running: Cisco TMG`);
await scrapeCiscoTmg(); await withIsolatedStorage("cisco", scrapeCiscoTmg);
}); });
await boss.work("scrape:pricing:10gtek", async (_job) => { await boss.work("scrape:pricing:10gtek", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`); console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
await scrape10Gtek(); await withIsolatedStorage("10gtek", scrape10Gtek);
});
await boss.work("scrape:pricing:flexoptix", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog pricing`);
await scrapeFlexoptixCatalog();
}); });
await boss.work("scrape:vendors:flexoptix", async (_job) => { await boss.work("scrape:vendors:flexoptix", async (_job) => {
@ -161,12 +192,12 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await boss.work("scrape:pricing:atgbics", async (_job) => { await boss.work("scrape:pricing:atgbics", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`); console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
await scrapeAtgbics(); await withIsolatedStorage("atgbics", scrapeAtgbics);
}); });
await boss.work("scrape:pricing:prolabs", async (_job) => { await boss.work("scrape:pricing:prolabs", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`); console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
await scrapeProLabs(); await withIsolatedStorage("prolabs", scrapeProLabs);
}); });
await boss.work("scrape:faq", async (_job) => { await boss.work("scrape:faq", async (_job) => {

View File

@ -1,27 +1,101 @@
/** /**
* Cisco TMG Matrix Scraper Transceiver Compatibility * Cisco TMG Matrix Scraper Transceiver Compatibility
* *
* Source: tmgmatrix.cisco.com * Source: tmgmatrix.cisco.com (JSON API no auth required)
* Extracts: Switch model Transceiver compatibility data * Extracts: Switch model Transceiver compatibility data
* Stores: switches, compatibility table * Stores: switches, compatibility table
* *
* The TMG Matrix has a JSON API behind the scenes. * Uses POST /public/api/networkdevice/search endpoint directly.
*/ */
import { CheerioCrawler } from "crawlee";
import { pool, ensureVendor } from "../utils/db"; import { pool, ensureVendor } from "../utils/db";
const TMG_BASE = "https://tmgmatrix.cisco.com"; const TMG_API = "https://tmgmatrix.cisco.com/public/api/networkdevice/search";
interface TmgEntry { interface TmgTransceiver {
switchModel: string; tmgId: number;
switchSeries: string; productId: string;
transceiverPid: string; productFamily: string;
transceiverDescription: string; formFactor: string;
speed: string;
reach: string; reach: string;
temperatureRange: string;
cableType: string; cableType: string;
connector: string; media: string;
minSoftware: string; connectorType: string;
transmissionStandard: string;
dataRate: string;
endOfSale: string;
softReleaseMinVer: string;
breakoutMode: string;
osType: string;
domSupport: string;
type: string;
}
interface TmgCompatEntry {
productId: string; // switch PID
transceivers: TmgTransceiver[];
}
interface TmgDevice {
productFamily: string;
networkAndTransceiverCompatibility: TmgCompatEntry[];
}
interface TmgSearchResponse {
totalCount: number;
filters: Array<{ name: string; values: Array<{ id: number; name: string; count: number }> }>;
networkDevices: TmgDevice[];
}
/** Key Nexus/Catalyst platform family IDs from the TMG API */
const PLATFORM_FAMILIES = [
{ id: 74, name: "N9300" }, // Nexus 9300 — 8,515 entries
{ id: 77, name: "N9500" }, // Nexus 9500 — 2,266 entries
{ id: 78, name: "N9200" }, // Nexus 9200 — 708 entries
{ id: 661, name: "N9800" }, // Nexus 9800 — 238 entries
{ id: 76, name: "C9300" }, // Catalyst 9300 — 260 entries
{ id: 601, name: "C9300L" }, // Catalyst 9300L — 720 entries
{ id: 1181, name: "C9300X" }, // Catalyst 9300X — 413 entries
{ id: 8, name: "C9500" }, // Catalyst 9500 — 1,141 entries
{ id: 521, name: "C9600" }, // Catalyst 9600 — 771 entries
{ id: 7, name: "C9400" }, // Catalyst 9400 — 561 entries
{ id: 341, name: "C9200" }, // Catalyst 9200 — 222 entries
{ id: 83, name: "ASR9000" }, // ASR 9000 — 3,644 entries
];
async function searchTmg(familyFilter: { id: number; name: string }): Promise<TmgSearchResponse> {
const body = {
cableType: [],
dataRate: [],
formFactor: [],
reach: [],
searchInput: [""],
osType: [],
transceiverProductFamily: [],
transceiverProductID: [],
networkDeviceProductFamily: [familyFilter],
networkDeviceProductID: [],
media: [],
connectorType: [],
caseTemperature: [],
performanceMonitoring: [],
};
const res = await fetch(TMG_API, {
method: "POST",
headers: {
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Accept": "application/json",
},
body: JSON.stringify(body),
});
if (!res.ok) {
throw new Error(`TMG API ${res.status}: ${res.statusText}`);
}
return res.json() as Promise<TmgSearchResponse>;
} }
async function upsertCiscoSwitch(vendorId: string, model: string, series: string): Promise<string> { async function upsertCiscoSwitch(vendorId: string, model: string, series: string): Promise<string> {
@ -38,18 +112,31 @@ async function upsertCiscoSwitch(vendorId: string, model: string, series: string
async function upsertCompatibility( async function upsertCompatibility(
switchId: string, switchId: string,
transceiverId: string, transceiverId: string,
firmwareMin: string firmwareMin: string,
formFactor: string,
reach: string,
cableType: string,
media: string,
dataRate: string
): Promise<void> { ): Promise<void> {
await pool.query( await pool.query(
`INSERT INTO compatibility (switch_id, transceiver_id, verified_by, verification_method, status, firmware_min, source_url) `INSERT INTO compatibility (switch_id, transceiver_id, verified_by, verification_method, status, firmware_min, source_url, notes)
VALUES ($1, $2, 'Cisco TMG Matrix', 'vendor_matrix', 'compatible', $3, $4) VALUES ($1, $2, 'Cisco TMG Matrix', 'vendor_matrix', 'compatible', $3, $4, $5)
ON CONFLICT (switch_id, transceiver_id) DO UPDATE SET firmware_min = EXCLUDED.firmware_min`, ON CONFLICT (switch_id, transceiver_id) DO UPDATE SET
[switchId, transceiverId, firmwareMin || null, TMG_BASE] firmware_min = EXCLUDED.firmware_min,
notes = EXCLUDED.notes`,
[
switchId,
transceiverId,
firmwareMin || null,
"https://tmgmatrix.cisco.com",
`${formFactor} ${dataRate} ${reach} ${media} ${cableType}`.trim(),
]
); );
} }
export async function scrapeCiscoTmg(): Promise<void> { export async function scrapeCiscoTmg(): Promise<void> {
console.log("=== Cisco TMG Matrix Scraper Starting ===\n"); console.log("=== Cisco TMG Matrix Scraper Starting (API mode) ===\n");
const ciscoVendorId = await ensureVendor( const ciscoVendorId = await ensureVendor(
"Cisco", "Cisco",
@ -58,90 +145,69 @@ export async function scrapeCiscoTmg(): Promise<void> {
undefined undefined
); );
const entries: TmgEntry[] = []; let totalSwitches = 0;
let totalCompat = 0;
let totalTransceivers = 0;
// TMG Matrix uses a search API for (const family of PLATFORM_FAMILIES) {
// First, try the public HTML interface console.log(`\nFetching ${family.name}...`);
const crawler = new CheerioCrawler({
maxConcurrency: 1,
maxRequestsPerMinute: 10, // Very respectful — Cisco rate limits aggressively
async requestHandler({ request, $, log }) {
log.info(`Scraping: ${request.url}`);
// The TMG Matrix renders a table with compatibility data
$("table tbody tr, .matrix-row, [class*='result-row']").each((_i, el) => {
const $row = $(el);
const cells = $row.find("td").map((_j, td) => $(td).text().trim()).get();
if (cells.length >= 4) {
entries.push({
switchModel: cells[0] || "",
switchSeries: cells[0]?.split(" ")[0] || "Nexus",
transceiverPid: cells[1] || "",
transceiverDescription: cells[2] || "",
speed: cells[3] || "",
reach: cells[4] || "",
cableType: cells[5] || "",
connector: cells[6] || "",
minSoftware: cells[7] || "",
});
}
});
},
});
// Start with Nexus switches (most relevant for Flexoptix)
await crawler.run([
`${TMG_BASE}/public/tmg?searchValue=Nexus+9000`,
`${TMG_BASE}/public/tmg?searchValue=Nexus+3000`,
`${TMG_BASE}/public/tmg?searchValue=Nexus+7000`,
`${TMG_BASE}/public/tmg?searchValue=Catalyst+9000`,
]);
console.log(`\nEntries found: ${entries.length}`);
// Write to database
let switches = 0;
let compat = 0;
for (const entry of entries) {
if (!entry.switchModel || !entry.transceiverPid) continue;
try { try {
const data = await searchTmg(family);
console.log(` ${family.name}: ${data.totalCount} total entries, ${data.networkDevices.length} device groups`);
for (const device of data.networkDevices) {
for (const compat of device.networkAndTransceiverCompatibility) {
if (!compat.productId) continue;
const switchId = await upsertCiscoSwitch( const switchId = await upsertCiscoSwitch(
ciscoVendorId, ciscoVendorId,
entry.switchModel, compat.productId,
entry.switchSeries device.productFamily
); );
switches++; totalSwitches++;
// Try to match transceiver in our DB for (const tx of compat.transceivers) {
if (!tx.productId) continue;
totalTransceivers++;
// Try to match transceiver in our DB by Cisco PID
const txResult = await pool.query( const txResult = await pool.query(
`SELECT id FROM transceivers `SELECT id FROM transceivers
WHERE part_number = $1 WHERE part_number = $1
OR slug LIKE $2 OR part_number = $2
OR standard_name ILIKE $3
LIMIT 1`, LIMIT 1`,
[ [tx.productId, tx.productId.replace(/-S$/, "")]
entry.transceiverPid,
`%${entry.transceiverPid.toLowerCase().replace(/[^a-z0-9]/g, "")}%`,
`%${entry.speed}%${entry.reach}%`,
]
); );
if (txResult.rows.length > 0) { if (txResult.rows.length > 0) {
await upsertCompatibility(switchId, txResult.rows[0].id, entry.minSoftware); await upsertCompatibility(
compat++; switchId,
txResult.rows[0].id,
tx.softReleaseMinVer,
tx.formFactor,
tx.reach,
tx.cableType,
tx.media,
tx.dataRate
);
totalCompat++;
}
} }
} catch (err) {
// Skip duplicates silently
} }
} }
console.log(`Switches upserted: ${switches}`); // Rate limit: 2 seconds between platform families
console.log(`Compatibility entries: ${compat}`); await new Promise((r) => setTimeout(r, 2000));
console.log("=== Cisco TMG Scraper Complete ===\n"); } catch (err) {
console.error(` Error fetching ${family.name}:`, err);
}
}
console.log(`\n=== Cisco TMG Scraper Complete ===`);
console.log(` Switches upserted: ${totalSwitches}`);
console.log(` Transceiver entries scanned: ${totalTransceivers}`);
console.log(` Compatibility matches: ${totalCompat}\n`);
} }
if (require.main === module) { if (require.main === module) {

View File

@ -1,7 +1,7 @@
/** /**
* Fluxlight Scraper US-based compatible transceiver vendor * Fluxlight Scraper US-based compatible transceiver vendor
* *
* fluxlight.com BigCommerce, server-rendered HTML with real prices. * www.fluxlight.com BigCommerce, server-rendered HTML with real prices.
* ~144+ products across 6 pages. Uses pagination via ?page=N. * ~144+ products across 6 pages. Uses pagination via ?page=N.
* *
* Rate limited: 1 req/2sec. * Rate limited: 1 req/2sec.
@ -91,8 +91,8 @@ function parseProductList(html: string): Product[] {
const products: Product[] = []; const products: Product[] = [];
// BigCommerce product card pattern: product link + price // BigCommerce product card pattern: product link + price
// Pattern: <a href="https://fluxlight.com/PARTNUM-FL/">Product Name</a> ... $29.99 // Pattern: <a href="https://www.fluxlight.com/PARTNUM-FL/">Product Name</a> ... $29.99
const productRegex = /href="(https?:\/\/fluxlight\.com\/[^"]*-FL\/)"[^>]*>\s*([^<]{10,})<\/a>/gi; const productRegex = /href="(https?:\/\/(?:www\.)?fluxlight\.com\/[^"]*-FL\/)"[^>]*>\s*([^<]{10,})<\/a>/gi;
let match; let match;
while ((match = productRegex.exec(html)) !== null) { while ((match = productRegex.exec(html)) !== null) {
const url = match[1]; const url = match[1];
@ -123,7 +123,7 @@ function parseProductList(html: string): Product[] {
// Fallback: broader link pattern // Fallback: broader link pattern
if (products.length === 0) { if (products.length === 0) {
const simpleRegex = /href="(https?:\/\/fluxlight\.com\/[^"]+)"[^>]*>([^<]{10,}(?:SFP|QSFP|XFP|Base)[^<]*)<\/a>/gi; const simpleRegex = /href="(https?:\/\/(?:www\.)?fluxlight\.com\/[^"]+)"[^>]*>([^<]{10,}(?:SFP|QSFP|XFP|Base)[^<]*)<\/a>/gi;
while ((match = simpleRegex.exec(html)) !== null) { while ((match = simpleRegex.exec(html)) !== null) {
const url = match[1]; const url = match[1];
const name = match[2].trim(); const name = match[2].trim();
@ -166,7 +166,7 @@ async function fetchPage(url: string): Promise<string> {
export async function scrapeFluxlight(): Promise<void> { export async function scrapeFluxlight(): Promise<void> {
console.log("=== Fluxlight Scraper Starting ===\n"); console.log("=== Fluxlight Scraper Starting ===\n");
const vendorId = await ensureVendor("Fluxlight", "compatible", "https://fluxlight.com", "https://fluxlight.com/transceivers/"); const vendorId = await ensureVendor("Fluxlight", "compatible", "https://fluxlight.com", "https://www.fluxlight.com/transceivers/");
let allProducts: Product[] = []; let allProducts: Product[] = [];

View File

@ -13,13 +13,18 @@ import { contentHash, parsePrice, parseStockLevel, parseQuantity } from "../util
const BASE_URL = "https://www.fs.com"; const BASE_URL = "https://www.fs.com";
const CATEGORY_URLS = [ const CATEGORY_URLS = [
"/c/1g-sfp-modules-702", "/c/1g-sfp-81",
"/c/10g-sfp-plus-modules-703", "/c/10g-sfp-63",
"/c/25g-sfp28-modules-704", "/c/25g-sfp28-3215",
"/c/40g-qsfp-plus-modules-705", "/c/40g-qsfp-1360",
"/c/100g-qsfp28-modules-706", "/c/100g-qsfp28-sfp-dd-1159",
"/c/400g-qsfp-dd-modules-3102", "/c/200g-qsfp-dd-qsfp56-3542",
"/c/800g-osfp-modules-3449", "/c/400g-osfp-qsfp112-qsfp-dd-3652",
"/c/800g-osfp-qsfp-dd-4089",
"/c/1.6t-osfp-5597",
"/c/400g-coherent-qsfp-dd-4103",
"/c/10g-cwdm-dwdm-sfp-65",
"/c/100g-dwdm-qsfp28-3863",
]; ];
interface FsProduct { interface FsProduct {
@ -98,18 +103,30 @@ export async function scrapeFs(): Promise<void> {
headless: true, headless: true,
launchContext: { launchContext: {
launchOptions: { launchOptions: {
args: ["--disable-blink-features=AutomationControlled"], args: ["--disable-blink-features=AutomationControlled", "--lang=en-US"],
}, },
}, },
preNavigationHooks: [
async ({ page }) => {
await page.setExtraHTTPHeaders({
"Accept-Language": "en-US,en;q=0.9",
});
await page.context().addCookies([
{ name: "currency", value: "USD", domain: ".fs.com", path: "/" },
{ name: "lang", value: "en", domain: ".fs.com", path: "/" },
{ name: "country", value: "US", domain: ".fs.com", path: "/" },
]);
},
],
async requestHandler({ page, request, log }) { async requestHandler({ page, request, log }) {
const url = request.url; const url = request.url;
log.info(`Scraping: ${url}`); log.info(`Scraping: ${url}`);
// Wait for product list to render // Wait for Vue.js product grid to render
await page.waitForTimeout(3000); await page.waitForTimeout(4000);
// Try multiple selectors — FS.com changes DOM frequently
const productData = await page.evaluate(() => { const productData = await page.evaluate(() => {
const results: Array<{ const results: Array<{
name: string; name: string;
@ -119,65 +136,55 @@ export async function scrapeFs(): Promise<void> {
partNumber: string; partNumber: string;
}> = []; }> = [];
// Strategy 1: Look for product links with prices nearby // Strategy 1: Parse .category__grid__item cards (2026 Vue.js DOM)
const productLinks = document.querySelectorAll( const gridItems = document.querySelectorAll(".category__grid__item");
'a[href*="/products/"], a[href*="/product/"], .product-item a, .o-list-product a, [class*="product"] a[href]' for (const item of gridItems) {
); const link = item.querySelector('a[href*="/products/"]') as HTMLAnchorElement | null;
const img = item.querySelector("img");
const priceEl = item.querySelector(".grid__price");
const allText = item.textContent || "";
if (!link) continue;
const name = img?.getAttribute("alt")?.trim() || link.textContent?.trim() || "";
const href = link.getAttribute("href") || "";
const price = priceEl?.textContent?.trim() || "";
// Extract stock from text like "1914 in Global Warehouse"
const stockMatch = allText.match(/(\d+)\s+in\s+(?:Global\s+)?Warehouse/i);
const stock = stockMatch ? stockMatch[1] + " in stock" : "";
// Extract FS product ID from URL
const pnMatch = href.match(/products\/(\d+)\.html/);
const partNumber = pnMatch ? `FS-${pnMatch[1]}` : "";
if (name && href) {
results.push({ name, href, price, stock, partNumber });
}
}
// Strategy 2: Fallback — look for product links with prices nearby
if (results.length === 0) {
const productLinks = document.querySelectorAll(
'a[href*="/products/"], a[href*="/product/"]'
);
for (const link of productLinks) { for (const link of productLinks) {
const el = link as HTMLAnchorElement; const el = link as HTMLAnchorElement;
const name = el.textContent?.trim() || ""; const name = el.textContent?.trim() || "";
const href = el.getAttribute("href") || ""; const href = el.getAttribute("href") || "";
if (!name || name.length < 5 || !href) continue; if (!name || name.length < 5 || !href) continue;
// Find price in parent/sibling elements const container = el.closest('[class*="product"]') || el.closest('[class*="item"]') || el.closest("li") || el.parentElement?.parentElement;
const container =
el.closest('[class*="product"]') ||
el.closest('[class*="item"]') ||
el.closest("li") ||
el.parentElement?.parentElement;
let price = ""; let price = "";
let stock = ""; let stock = "";
if (container) { if (container) {
const priceEl = container.querySelector( const priceEl = container.querySelector('[class*="price"]');
'[class*="price"], [class*="Price"], .o-price, span[data-price]'
);
price = priceEl?.textContent?.trim() || ""; price = priceEl?.textContent?.trim() || "";
const stockEl = container.querySelector('[class*="stock"], [class*="avail"]');
const stockEl = container.querySelector(
'[class*="stock"], [class*="Stock"], [class*="avail"], .o-stock'
);
stock = stockEl?.textContent?.trim() || ""; stock = stockEl?.textContent?.trim() || "";
} }
const pn = href.split("/").pop()?.replace(".html", "")?.replace(/\?.*/, "") || "";
// Extract part number from URL or text if (name) results.push({ name, href, price, stock, partNumber: pn });
const pn = href.split("/").pop()?.replace(".html", "")?.replace("#", "") || "";
if (name && (price || href.includes("/product"))) {
results.push({ name, href, price, stock, partNumber: pn });
}
}
// Strategy 2: Look for any element with $ or US$ price pattern
if (results.length === 0) {
const allText = document.querySelectorAll("*");
for (const el of allText) {
const text = el.textContent || "";
if (/US?\$\s*\d+\.\d{2}/.test(text) && text.length < 200) {
const linkEl = el.closest("a") || el.querySelector("a");
if (linkEl) {
results.push({
name: linkEl.textContent?.trim() || text.slice(0, 100),
href: linkEl.getAttribute("href") || "",
price: text.match(/US?\$\s*[\d,.]+/)?.[0] || "",
stock: "",
partNumber: "",
});
}
}
} }
} }

View File

@ -8,7 +8,7 @@
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
import { contentHash } from "../utils/hash"; import { contentHash } from "../utils/hash";
const BASE = "https://gbics.com"; const BASE = "https://www.gbics.com";
const HEADERS = { const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
Accept: "text/html,application/xhtml+xml", Accept: "text/html,application/xhtml+xml",
@ -100,7 +100,7 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
// BigCommerce card-title pattern: // BigCommerce card-title pattern:
// <a aria-label="Product Name, £XX.XX" href="URL" data-event-type="product-click"> // <a aria-label="Product Name, £XX.XX" href="URL" data-event-type="product-click">
const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/gbics\.com\/[^"]+)"\s+data-event-type="product-click"/gi; const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*data-event-type="product-click"/gi;
let match; let match;
while ((match = productRegex.exec(collapsed)) !== null) { while ((match = productRegex.exec(collapsed)) !== null) {
const label = match[1].trim(); const label = match[1].trim();
@ -110,7 +110,14 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
// Split on last comma to separate name and price // Split on last comma to separate name and price
const priceInLabel = label.match(/,\s*£\s*([\d,.]+)\s*$/); const priceInLabel = label.match(/,\s*£\s*([\d,.]+)\s*$/);
const name = priceInLabel ? label.slice(0, label.lastIndexOf(",")).trim() : label; const name = priceInLabel ? label.slice(0, label.lastIndexOf(",")).trim() : label;
const price = priceInLabel ? parseFloat(priceInLabel[1].replace(",", "")) : undefined; let price = priceInLabel ? parseFloat(priceInLabel[1].replace(",", "")) : undefined;
// Fallback: extract price from data-price-asc attribute on parent <li>
if (!price) {
const priceContext = collapsed.slice(Math.max(0, match.index - 500), match.index);
const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/);
if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]);
}
if (name.length < 10) continue; if (name.length < 10) continue;
@ -131,7 +138,7 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
// Fallback: try "Now: £XX.XX" pattern near product links // Fallback: try "Now: £XX.XX" pattern near product links
if (products.length === 0) { if (products.length === 0) {
const altRegex = /href="(https?:\/\/gbics\.com\/[^"]+)"[^>]*>\s*([^<]{15,})<\/a>/gi; const altRegex = /href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*>\s*([^<]{15,})<\/a>/gi;
while ((match = altRegex.exec(collapsed)) !== null) { while ((match = altRegex.exec(collapsed)) !== null) {
const url = match[1]; const url = match[1];
const name = match[2].trim(); const name = match[2].trim();
@ -172,7 +179,7 @@ async function fetchPage(url: string): Promise<string> {
export async function scrapeGbics(): Promise<void> { export async function scrapeGbics(): Promise<void> {
console.log("=== GBICS.com Scraper Starting ===\n"); console.log("=== GBICS.com Scraper Starting ===\n");
const vendorId = await ensureVendor("GBICS", "compatible", "https://gbics.com", "https://gbics.com/optical-transceivers/"); const vendorId = await ensureVendor("GBICS", "compatible", "https://www.gbics.com", "https://www.gbics.com/optical-transceivers/");
let totalProducts = 0; let totalProducts = 0;
let priceUpdates = 0; let priceUpdates = 0;

View File

@ -38,19 +38,14 @@ interface NewsArticle {
const FEEDS: RssFeed[] = [ const FEEDS: RssFeed[] = [
// === PRIMARY: Transceiver-specific === // === PRIMARY: Transceiver-specific ===
{ {
name: "Lightwave Online", name: "The Next Platform",
url: "https://www.lightwaveonline.com/rss", url: "https://www.nextplatform.com/feed/",
category: "market_report", category: "market_report",
}, },
{ {
name: "Lightwave - Fiber Optics", name: "ServeTheHome",
url: "https://www.lightwaveonline.com/fttx/rss", url: "https://www.servethehome.com/feed/",
category: "market_report", category: "product_launch",
},
{
name: "Fierce Telecom",
url: "https://www.fiercetelecom.com/rss/xml",
category: "market_report",
}, },
{ {
name: "Optics.org", name: "Optics.org",
@ -69,8 +64,8 @@ const FEEDS: RssFeed[] = [
category: "market_report", category: "market_report",
}, },
{ {
name: "SDxCentral", name: "The Register - Data Centre",
url: "https://www.sdxcentral.com/feed/", url: "https://www.theregister.com/data_centre/headlines.atom",
category: "market_report", category: "market_report",
}, },
// === TERTIARY: General tech / photonics === // === TERTIARY: General tech / photonics ===

View File

@ -1,22 +1,29 @@
/** /**
* ProLabs Scraper Enterprise-grade compatible optics (Legrand subsidiary) * ProLabs Scraper Enterprise-grade compatible optics (Legrand subsidiary)
* *
* prolabs.com Server-rendered HTML with public USD pricing. * prolabs.com CloudFront WAF aggressively blocks datacenter IPs.
* Uses PlaywrightCrawler with Firefox for anti-detection.
*
* KNOWN ISSUE: CloudFront blocks all requests from IONOS/datacenter IPs
* (HTTP 403 "Request blocked"). This scraper works correctly from
* residential IPs. Solutions:
* 1. Set PROXY_URL env var to a residential/rotating proxy
* 2. Run from a residential IP (e.g. home server)
* 3. Route through WireGuard with internet breakout at home
*
* Products listed under /products/networking/fiber-optics/ category pages. * Products listed under /products/networking/fiber-optics/ category pages.
* Pagination via ?page=N. Rate limited: 1 req/2sec. Max 100 pages. * Pagination via ?page=N. Rate limited: maxConcurrency 1, 10 req/min.
* *
* SKU format examples: "Q-4X10G-LR-PR", "SFP-10G-SR-PR", "Q28-100G-LR4-PR" * SKU format examples: "Q-4X10G-LR-PR", "SFP-10G-SR-PR", "Q28-100G-LR4-PR"
*/ */
import { PlaywrightCrawler, RequestQueue } from "crawlee";
import { firefox } from "playwright";
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
import { contentHash } from "../utils/hash"; import { contentHash } from "../utils/hash";
const BASE = "https://www.prolabs.com"; const BASE = "https://www.prolabs.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
Accept: "text/html,application/xhtml+xml",
};
const MAX_PAGES = 100; const MAX_PAGES = 100;
const PROXY_URL = process.env.PROXY_URL || "";
const CATEGORIES = [ const CATEGORIES = [
{ path: "/products/networking/fiber-optics/sfp-modules", formFactor: "SFP", speed: "1G", speedGbps: 1 }, { path: "/products/networking/fiber-optics/sfp-modules", formFactor: "SFP", speed: "1G", speedGbps: 1 },
@ -26,7 +33,6 @@ const CATEGORIES = [
{ path: "/products/networking/fiber-optics/qsfp28-modules", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, { path: "/products/networking/fiber-optics/qsfp28-modules", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
{ path: "/products/networking/fiber-optics/qsfp-dd-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, { path: "/products/networking/fiber-optics/qsfp-dd-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
{ path: "/products/networking/fiber-optics/coherent-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, { path: "/products/networking/fiber-optics/coherent-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
// Broad fallback category in case above paths differ on the live site
{ path: "/products/networking/fiber-optics", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, { path: "/products/networking/fiber-optics", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
]; ];
@ -45,9 +51,9 @@ interface Product {
wavelength?: string; wavelength?: string;
} }
function sleep(ms: number): Promise<void> { /* ------------------------------------------------------------------ */
return new Promise((resolve) => setTimeout(resolve, ms)); /* Helper / detection functions (unchanged from original) */
} /* ------------------------------------------------------------------ */
function detectReach(text: string): { label: string; meters: number } | undefined { function detectReach(text: string): { label: string; meters: number } | undefined {
const patterns: [RegExp, string, number][] = [ const patterns: [RegExp, string, number][] = [
@ -90,18 +96,6 @@ function detectWavelength(text: string): string {
return match ? match[1] : ""; return match ? match[1] : "";
} }
/**
* Infer form factor and speed from ProLabs SKU prefixes when category context
* is not specific enough (e.g. when crawling the broad fallback category).
*
* ProLabs SKU prefix conventions:
* Q- -> QSFP+ 40G
* Q28- -> QSFP28 100G
* QDD- -> QSFP-DD 400G
* SFP28- -> SFP28 25G
* SFP- -> SFP+ 10G (most common ProLabs prefix)
* S- -> SFP 1G
*/
function inferFromSku(sku: string, cat: typeof CATEGORIES[number]): { function inferFromSku(sku: string, cat: typeof CATEGORIES[number]): {
formFactor: string; formFactor: string;
speed: string; speed: string;
@ -116,121 +110,6 @@ function inferFromSku(sku: string, cat: typeof CATEGORIES[number]): {
return { formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps }; return { formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps };
} }
/**
* Parse product listings from a ProLabs category page.
*
* ProLabs uses a standard e-commerce layout:
* - Product cards with an <a> link containing the product URL and name
* - Price in a span with "price" in class or as "$XX.XX" text nearby
* - SKU / part number in the URL slug
* - Stock badge: "In Stock" / "Out of Stock" / "Call for Availability"
*
* We parse with lightweight regex on collapsed HTML same approach as gbics.ts
* and sfpcables.ts (no DOM parser dependency).
*/
function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] {
const products: Product[] = [];
const collapsed = html.replace(/\s+/g, " ");
// Strategy 1: product cards with structured href containing a SKU-like segment
// Match anchor tags whose href is a deep product path ending in a SKU pattern
const productLinkRegex = /href="(\/products\/[^"]*?\/([A-Z0-9][A-Z0-9\-_]{3,}(?:-PR)?))"\s[^>]*>([^<]{10,})<\/a>/gi;
let match: RegExpExecArray | null;
while ((match = productLinkRegex.exec(collapsed)) !== null) {
const relUrl = match[1];
const skuFromUrl = match[2];
const linkText = match[3].trim();
// Skip navigation / filter / pagination links
if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue;
if (linkText.length > 200) continue;
const url = BASE + relUrl;
const partNumber = skuFromUrl.slice(0, 80);
const name = linkText.length > 10 ? linkText : partNumber;
// Look for price in a 700-char window after the match position
const context = collapsed.slice(Math.max(0, match.index - 100), match.index + 700);
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/) ||
context.match(/price[^>]*>\s*\$?\s*([\d,]+\.?\d{0,2})/i);
const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined;
const stockMatch = context.match(/(in[\s-]stock|out[\s-]of[\s-]stock|call for availability|available|backordered)/i);
const stockStatus = stockMatch ? stockMatch[1].toLowerCase() : undefined;
const combined = name + " " + partNumber;
const reach = detectReach(combined);
const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat);
products.push({
partNumber, name, url,
price: price && price > 0 && price < 100000 ? price : undefined,
stockStatus,
formFactor, speed, speedGbps,
reachLabel: reach?.label,
reachMeters: reach?.meters,
fiberType: detectFiber(combined),
wavelength: detectWavelength(combined),
});
}
// Strategy 2: Fallback — any link to a /products/ URL that has a $ price nearby
if (products.length === 0) {
const altRegex = /href="(\/products\/[^"]{10,})"/gi;
while ((match = altRegex.exec(collapsed)) !== null) {
const relUrl = match[1];
if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue;
const context = collapsed.slice(Math.max(0, match.index - 50), match.index + 800);
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/);
if (!priceMatch) continue;
const price = parseFloat(priceMatch[1].replace(",", ""));
const nameMatch = context.match(/<(?:h[23]|strong|span)[^>]*>([^<]{10,150})<\//i);
const name = nameMatch ? nameMatch[1].trim() : relUrl.split("/").pop() || "";
const partNumber = (relUrl.split("/").pop() ?? name).slice(0, 80);
const url = BASE + relUrl;
const combined = name + " " + partNumber;
const reach = detectReach(combined);
const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat);
products.push({
partNumber, name, url,
price: price > 0 && price < 100000 ? price : undefined,
formFactor, speed, speedGbps,
reachLabel: reach?.label,
reachMeters: reach?.meters,
fiberType: detectFiber(combined),
wavelength: detectWavelength(combined),
});
}
}
// Deduplicate by URL
const seen = new Set<string>();
return products.filter((p) => {
if (seen.has(p.url)) return false;
seen.add(p.url);
return true;
});
}
/** Check if the HTML contains a link to the next pagination page. */
function hasNextPage(html: string, currentPage: number): boolean {
if (/rel="next"/i.test(html)) return true;
const nextPageNum = currentPage + 1;
const pattern = new RegExp(`[?&]page=${nextPageNum}`, "i");
return pattern.test(html);
}
async function fetchPage(url: string): Promise<string> {
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.text();
}
function normalizeStockLevel( function normalizeStockLevel(
raw?: string raw?: string
): "in_stock" | "low_stock" | "out_of_stock" | "on_request" { ): "in_stock" | "low_stock" | "out_of_stock" | "on_request" {
@ -242,8 +121,19 @@ function normalizeStockLevel(
return "on_request"; return "on_request";
} }
/* ------------------------------------------------------------------ */
/* Main scraper */
/* ------------------------------------------------------------------ */
export async function scrapeProLabs(): Promise<void> { export async function scrapeProLabs(): Promise<void> {
console.log("=== ProLabs Scraper Starting ===\n"); console.log("=== ProLabs Scraper Starting (PlaywrightCrawler + Firefox) ===\n");
if (PROXY_URL) {
console.log(`Using proxy: ${PROXY_URL.replace(/:[^:@]+@/, ":***@")}`);
} else {
console.log("WARNING: No PROXY_URL set. CloudFront WAF blocks datacenter IPs.");
console.log("Set PROXY_URL env var for residential proxy if running from VPS.\n");
}
const vendorId = await ensureVendor( const vendorId = await ensureVendor(
"ProLabs", "ProLabs",
@ -254,30 +144,253 @@ export async function scrapeProLabs(): Promise<void> {
let totalProducts = 0; let totalProducts = 0;
let priceUpdates = 0; let priceUpdates = 0;
let blockedPages = 0;
const seenUrls = new Set<string>(); const seenUrls = new Set<string>();
// Map URL -> category metadata
const urlToCat = new Map<string, typeof CATEGORIES[number]>();
const requestQueue = await RequestQueue.open();
for (const cat of CATEGORIES) { for (const cat of CATEGORIES) {
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`); const url = `${BASE}${cat.path}`;
urlToCat.set(url, cat);
await requestQueue.addRequest({ url, userData: { page: 1, catPath: cat.path } });
}
let page = 1; const crawler = new PlaywrightCrawler({
let pagesThisCat = 0; requestQueue,
let productsThisCat = 0; maxConcurrency: 1,
maxRequestsPerMinute: 10,
requestHandlerTimeoutSecs: 120,
navigationTimeoutSecs: 60,
maxRequestRetries: 2,
headless: true,
// Override default blockedStatusCodes (normally [401, 403, 429]).
// We allow 403 so our handler can inspect the page — CloudFront may
// serve a JS challenge that resolves, or we can log the block gracefully.
sessionPoolOptions: {
blockedStatusCodes: [401, 429],
},
browserPoolOptions: {
useFingerprints: false,
},
launchContext: {
launcher: firefox,
launchOptions: {
firefoxUserPrefs: {
"toolkit.telemetry.enabled": false,
"privacy.trackingprotection.enabled": false,
},
},
},
...(PROXY_URL ? {
proxyConfiguration: new (require("crawlee").ProxyConfiguration)({
proxyUrls: [PROXY_URL],
}),
} : {}),
preNavigationHooks: [
async ({ page }, goToOptions) => {
// Realistic viewport
await page.setViewportSize({ width: 1920, height: 1080 });
while (page <= MAX_PAGES) { // Override webdriver detection
const url = page === 1 await page.addInitScript(() => {
? `${BASE}${cat.path}` Object.defineProperty(navigator, "webdriver", { get: () => false });
: `${BASE}${cat.path}?page=${page}`; });
if (goToOptions) {
goToOptions.waitUntil = "load";
}
},
],
async requestHandler({ page, request, log }) {
const currentPage: number = request.userData?.page ?? 1;
const catPath: string = request.userData?.catPath ?? "";
const cat = urlToCat.get(request.url) ??
CATEGORIES.find((c) => catPath === c.path) ??
CATEGORIES[CATEGORIES.length - 1];
urlToCat.set(request.url, cat);
log.info(`[${cat.formFactor} ${cat.speed}] Page ${currentPage}: ${request.url}`);
// Give JS challenges time to resolve
await page.waitForTimeout(8000);
// Check what we actually got
const pageTitle = await page.title();
const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 500) || "");
log.info(` Title: "${pageTitle}"`);
// Detect CloudFront WAF block
if (bodyText.includes("Request blocked") ||
bodyText.includes("Access Denied") ||
bodyText.includes("403 ERROR") ||
pageTitle.includes("ERROR")) {
blockedPages++;
log.warning(` CloudFront WAF blocked this page (${blockedPages} total blocked)`);
if (blockedPages >= 3 && totalProducts === 0) {
log.warning(` Multiple blocks detected — likely IP-level block. Consider using PROXY_URL.`);
}
return;
}
// Extract products via page.evaluate
const productData = await page.evaluate(() => {
const results: Array<{
name: string;
href: string;
price: string;
stock: string;
partNumber: string;
}> = [];
// Strategy 1: Product card links
const productLinks = document.querySelectorAll(
'a[href*="/products/"], .product-card a, .product-item a, [class*="product"] a[href], .product-list a, .category-products a, [data-product] a'
);
for (const link of productLinks) {
const el = link as HTMLAnchorElement;
const name = el.textContent?.trim() || "";
const href = el.getAttribute("href") || "";
if (!name || name.length < 5 || name.length > 200 || !href) continue;
if (/category|filter|sort|breadcrumb|login|cart|account/i.test(href) && !/products\//i.test(href)) continue;
const container =
el.closest('[class*="product"]') ||
el.closest('[class*="item"]') ||
el.closest('[class*="card"]') ||
el.closest("li") ||
el.parentElement?.parentElement?.parentElement;
let price = "";
let stock = "";
let pn = "";
if (container) {
const priceEl = container.querySelector(
'[class*="price"], [class*="Price"], [data-price], .price'
);
price = priceEl?.textContent?.trim() || "";
if (!price) {
const containerText = container.textContent || "";
const priceMatch = containerText.match(/\$\s*[\d,]+\.?\d{0,2}/);
if (priceMatch) price = priceMatch[0];
}
const stockEl = container.querySelector(
'[class*="stock"], [class*="Stock"], [class*="avail"], [class*="Avail"]'
);
stock = stockEl?.textContent?.trim() || "";
const skuEl = container.querySelector(
'[class*="sku"], [class*="SKU"], [class*="part"], [class*="Part"], [class*="model"]'
);
pn = skuEl?.textContent?.trim() || "";
}
if (!pn) {
pn = href.split("/").pop()?.replace(/\.html?$/, "")?.replace(/#.*$/, "") || "";
}
if (name && href.includes("/products/")) {
results.push({ name, href, price, stock, partNumber: pn });
}
}
// Strategy 2: Scan deeper for anchors with product URLs
if (results.length === 0) {
const allAnchors = document.querySelectorAll("a[href*='/products/']");
for (const el of allAnchors) {
const anchor = el as HTMLAnchorElement;
const href = anchor.getAttribute("href") || "";
const name = anchor.textContent?.trim() || "";
if (!name || name.length < 5) continue;
let parent: Element | null = anchor;
let price = "";
for (let i = 0; i < 4 && parent; i++) {
parent = parent.parentElement;
if (parent) {
const text = parent.textContent || "";
const m = text.match(/\$\s*[\d,]+\.?\d{0,2}/);
if (m) { price = m[0]; break; }
}
}
const pn = href.split("/").pop()?.replace(/\.html?$/, "") || "";
results.push({ name, href, price, stock: "", partNumber: pn });
}
}
// Strategy 3: JSON-LD structured data
const ldScripts = document.querySelectorAll('script[type="application/ld+json"]');
for (const script of ldScripts) {
try { try {
const html = await fetchPage(url); const data = JSON.parse(script.textContent || "");
const pageProducts = parseProductList(html, cat); const items = data.itemListElement || (Array.isArray(data) ? data : [data]);
for (const item of items) {
if (item["@type"] === "Product" || item.offers) {
const name = item.name || "";
const href = item.url || "";
const offers = item.offers || {};
const price = offers.price ? `$${offers.price}` : "";
const stock = offers.availability || "";
const pn = item.sku || item.mpn || href.split("/").pop() || "";
if (name) results.push({ name, href, price, stock, partNumber: pn });
}
}
} catch { /* ignore parse errors */ }
}
// Global dedup: broad fallback category overlaps with specific ones return results;
});
log.info(` Raw items extracted: ${productData.length}`);
// Process extracted products
const pageProducts: Product[] = [];
for (const item of productData) {
if (!item.name) continue;
const partNumber = (item.partNumber || item.name).slice(0, 80).trim();
const name = item.name.slice(0, 200).trim();
const url = item.href.startsWith("http") ? item.href : `${BASE}${item.href}`;
let price: number | undefined;
if (item.price) {
const cleaned = item.price.replace(/[^\d.,]/g, "").replace(",", "");
const parsed = parseFloat(cleaned);
if (parsed > 0 && parsed < 100000) price = parsed;
}
const combined = name + " " + partNumber;
const reach = detectReach(combined);
const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat);
pageProducts.push({
partNumber, name, url, price,
stockStatus: item.stock || undefined,
formFactor, speed, speedGbps,
reachLabel: reach?.label,
reachMeters: reach?.meters,
fiberType: detectFiber(combined),
wavelength: detectWavelength(combined),
});
}
// Deduplicate against global set
const newProducts = pageProducts.filter((p) => !seenUrls.has(p.url)); const newProducts = pageProducts.filter((p) => !seenUrls.has(p.url));
newProducts.forEach((p) => seenUrls.add(p.url)); for (const p of newProducts) seenUrls.add(p.url);
console.log(` Page ${page}: ${pageProducts.length} found, ${newProducts.length} new`); log.info(` Parsed: ${pageProducts.length} found, ${newProducts.length} new`);
// Write to database
for (const product of newProducts) { for (const product of newProducts) {
try { try {
const txId = await findOrCreateScrapedTransceiver({ const txId = await findOrCreateScrapedTransceiver({
@ -311,33 +424,54 @@ export async function scrapeProLabs(): Promise<void> {
if (updated) priceUpdates++; if (updated) priceUpdates++;
} }
productsThisCat++;
totalProducts++; totalProducts++;
} catch (err) { } catch (err) {
console.warn(` DB error [${product.partNumber}]: ${(err as Error).message.slice(0, 80)}`); log.warning(` DB error [${product.partNumber}]: ${(err as Error).message.slice(0, 80)}`);
} }
} }
pagesThisCat++; // Check for next page
const hasNext = await page.evaluate((currentPageNum: number) => {
if (pageProducts.length === 0 || !hasNextPage(html, page)) break; const nextLink = document.querySelector('a[rel="next"], link[rel="next"]');
if (nextLink) return true;
page++; const nextNum = currentPageNum + 1;
await sleep(2000); const paginationLinks = document.querySelectorAll('a[href*="page="], .pagination a, nav a');
} catch (err) { for (const link of paginationLinks) {
console.error(` Page ${page} failed: ${(err as Error).message}`); const href = (link as HTMLAnchorElement).getAttribute("href") || "";
break; if (href.includes(`page=${nextNum}`)) return true;
} const text = link.textContent?.trim() || "";
if (text === String(nextNum) || text.toLowerCase() === "next" || text === "\u203a" || text === "\u00bb") return true;
} }
return false;
}, currentPage);
console.log(` Category done: ${productsThisCat} products across ${pagesThisCat} page(s)`); if (hasNext && currentPage < MAX_PAGES && newProducts.length > 0) {
const nextPageNum = currentPage + 1;
if (cat !== CATEGORIES[CATEGORIES.length - 1]) { const nextUrl = `${BASE}${catPath}?page=${nextPageNum}`;
await sleep(2000); urlToCat.set(nextUrl, cat);
} await requestQueue.addRequest({
url: nextUrl,
userData: { page: nextPageNum, catPath },
});
log.info(` Enqueued next page: ${nextPageNum}`);
} }
},
console.log(`\n=== ProLabs Complete: ${totalProducts} products processed, ${priceUpdates} price updates ===`); async failedRequestHandler({ request, log }) {
log.error(`Request failed after retries: ${request.url}`);
},
});
await crawler.run();
console.log(`\n=== ProLabs Complete ===`);
console.log(` Products processed: ${totalProducts}`);
console.log(` Price updates: ${priceUpdates}`);
console.log(` Pages blocked by WAF: ${blockedPages}`);
if (blockedPages > 0 && totalProducts === 0) {
console.log(`\n All pages blocked by CloudFront WAF (datacenter IP detected).`);
console.log(` Fix: Set PROXY_URL=http://user:pass@proxy:port in .env`);
}
} }
if (require.main === module) { if (require.main === module) {

View File

@ -0,0 +1,128 @@
/**
* WS4: Competitor Change Detection
*
* Compares current scrape results with previous observations
* and generates alerts for price changes, new products, stock changes.
*/
import { Pool } from "pg";
const pool = new Pool({
host: process.env.POSTGRES_HOST || "localhost",
port: parseInt(process.env.POSTGRES_PORT || "5433"),
database: process.env.POSTGRES_DB || "transceiver_db",
user: process.env.POSTGRES_USER || "tip",
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
max: 3,
});
interface PriceObservation {
transceiver_id: string;
vendor_id: string;
price: number;
currency: string;
stock_level?: string;
part_number?: string;
product_name?: string;
form_factor?: string;
speed_gbps?: number;
source_url?: string;
}
/**
* After a scraper run, call this to detect changes and generate alerts.
*/
export async function detectChanges(
vendorId: string,
currentObservations: PriceObservation[]
): Promise<{ alerts: number; priceChanges: number; newProducts: number }> {
let alerts = 0;
let priceChanges = 0;
let newProducts = 0;
for (const obs of currentObservations) {
try {
// Get last known price for this transceiver from this vendor
const prev = await pool.query(
`SELECT price, currency, stock_level
FROM price_observations
WHERE transceiver_id = $1 AND source_vendor_id = $2
ORDER BY time DESC LIMIT 1`,
[obs.transceiver_id, obs.vendor_id]
);
if (prev.rows.length === 0) {
// New product alert
await pool.query(
`INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity,
new_price, currency, part_number, product_name, form_factor, speed_gbps, source_url)
VALUES ($1, $2, 'new_product', 'medium', $3, $4, $5, $6, $7, $8, $9)`,
[obs.vendor_id, obs.transceiver_id, obs.price, obs.currency,
obs.part_number, obs.product_name, obs.form_factor, obs.speed_gbps, obs.source_url]
);
newProducts++;
alerts++;
continue;
}
const prevPrice = parseFloat(prev.rows[0].price);
const prevStock = prev.rows[0].stock_level;
// Price change detection (>2% threshold to avoid noise)
if (Math.abs(obs.price - prevPrice) / prevPrice > 0.02) {
const delta = obs.price - prevPrice;
const deltaPct = (delta / prevPrice) * 100;
const alertType = delta < 0 ? 'price_drop' : 'price_increase';
const severity = Math.abs(deltaPct) > 15 ? 'high' : Math.abs(deltaPct) > 5 ? 'medium' : 'low';
// Insert alert
await pool.query(
`INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity,
old_price, new_price, price_delta, price_pct, currency,
part_number, product_name, form_factor, speed_gbps, source_url)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`,
[obs.vendor_id, obs.transceiver_id, alertType, severity,
prevPrice, obs.price, delta, deltaPct, obs.currency,
obs.part_number, obs.product_name, obs.form_factor, obs.speed_gbps, obs.source_url]
);
// Insert price change record
await pool.query(
`INSERT INTO price_changes (transceiver_id, vendor_id, old_price, new_price, delta, delta_pct, currency)
VALUES ($1, $2, $3, $4, $5, $6, $7)`,
[obs.transceiver_id, obs.vendor_id, prevPrice, obs.price, delta, deltaPct, obs.currency]
);
priceChanges++;
alerts++;
}
// Stock change detection
if (prevStock && obs.stock_level && prevStock !== obs.stock_level) {
if (obs.stock_level === 'out_of_stock' && prevStock !== 'out_of_stock') {
await pool.query(
`INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity,
part_number, product_name, form_factor, speed_gbps, source_url)
VALUES ($1, $2, 'out_of_stock', 'low', $3, $4, $5, $6, $7)`,
[obs.vendor_id, obs.transceiver_id, obs.part_number, obs.product_name,
obs.form_factor, obs.speed_gbps, obs.source_url]
);
alerts++;
} else if (prevStock === 'out_of_stock' && obs.stock_level !== 'out_of_stock') {
await pool.query(
`INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity,
new_price, currency, part_number, product_name, form_factor, speed_gbps, source_url)
VALUES ($1, $2, 'back_in_stock', 'low', $3, $4, $5, $6, $7, $8, $9)`,
[obs.vendor_id, obs.transceiver_id, obs.price, obs.currency,
obs.part_number, obs.product_name, obs.form_factor, obs.speed_gbps, obs.source_url]
);
alerts++;
}
}
} catch (err) {
console.error(`Change detection error for ${obs.part_number}:`, err);
}
}
console.log(`Change detection: ${alerts} alerts (${priceChanges} price changes, ${newProducts} new products)`);
return { alerts, priceChanges, newProducts };
}

View File

@ -0,0 +1,154 @@
/**
* WS0: Image Downloader
*
* Downloads product images from various sources, resizes, and stores metadata.
* R2 upload is optional for now stores image URLs and marks has_image.
*/
import { Pool } from "pg";
import { createHash } from "crypto";
const pool = new Pool({
host: process.env.POSTGRES_HOST || "localhost",
port: parseInt(process.env.POSTGRES_PORT || "5433"),
database: process.env.POSTGRES_DB || "transceiver_db",
user: process.env.POSTGRES_USER || "tip",
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
max: 3,
});
/**
* Update image URL for a transceiver and mark has_image = true
*/
export async function setTransceiverImage(
transceiverId: string,
imageUrl: string,
source?: string
): Promise<void> {
await pool.query(
`UPDATE transceivers SET image_url = $2, has_image = true, image_scraped_at = NOW()
WHERE id = $1 AND (image_url IS NULL OR image_url = '')`,
[transceiverId, imageUrl]
);
}
/**
* Update image URL for a switch
*/
export async function setSwitchImage(
switchId: string,
imageUrl: string
): Promise<void> {
await pool.query(
`UPDATE switches SET image_url = $2, has_image = true
WHERE id = $1 AND (image_url IS NULL OR image_url = '')`,
[switchId, imageUrl]
);
}
/**
* Get products without images for backfill
*/
export async function getProductsWithoutImages(limit = 100): Promise<Array<{
id: string;
slug: string;
form_factor: string;
speed_gbps: number;
reach_label: string;
vendor_name: string;
part_number: string;
}>> {
const result = await pool.query(
`SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.part_number,
v.name AS vendor_name
FROM transceivers t
LEFT JOIN vendors v ON t.vendor_id = v.id
WHERE (t.has_image = false OR t.has_image IS NULL)
AND t.image_url IS NULL
ORDER BY t.speed_gbps DESC
LIMIT $1`,
[limit]
);
return result.rows;
}
/**
* Generate a search URL to find product images
*/
export function buildImageSearchUrls(product: {
form_factor: string;
speed_gbps: number;
reach_label: string;
part_number?: string;
vendor_name?: string;
}): string[] {
const urls: string[] = [];
const q = `${product.form_factor} ${product.speed_gbps}G ${product.reach_label} transceiver`;
// Flexoptix store
urls.push(`https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(q)}`);
// FS.com
urls.push(`https://www.fs.com/search/${encodeURIComponent(q)}.html`);
// If we have a part number, try vendor-specific
if (product.part_number) {
urls.push(`https://www.fs.com/search/${encodeURIComponent(product.part_number)}.html`);
}
return urls;
}
/**
* Get image coverage statistics
*/
export async function getImageCoverageStats(): Promise<{
total: number;
with_image: number;
without_image: number;
coverage_pct: number;
}> {
const result = await pool.query(`
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE has_image = true) AS with_image,
COUNT(*) FILTER (WHERE has_image = false OR has_image IS NULL) AS without_image
FROM transceivers
`);
const row = result.rows[0];
const total = parseInt(row.total);
const withImg = parseInt(row.with_image);
return {
total,
with_image: withImg,
without_image: parseInt(row.without_image),
coverage_pct: total > 0 ? Math.round((withImg / total) * 10000) / 100 : 0,
};
}
/**
* Get price coverage statistics
*/
export async function getPriceCoverageStats(): Promise<{
total: number;
with_recent_price: number;
without_recent_price: number;
coverage_pct: number;
}> {
const result = await pool.query(`
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE EXISTS (
SELECT 1 FROM price_observations po WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days'
)) AS with_price
FROM transceivers t
`);
const row = result.rows[0];
const total = parseInt(row.total);
const withPrice = parseInt(row.with_price);
return {
total,
with_recent_price: withPrice,
without_recent_price: total - withPrice,
coverage_pct: total > 0 ? Math.round((withPrice / total) * 10000) / 100 : 0,
};
}

View File

@ -0,0 +1,347 @@
-- Migration 013: v0.2.0 Sales Intelligence Engine
-- Adds: competitor_alerts, price_changes, image tracking, finder views, blog_posts_v2, forecast tables
-- ============================================================
-- IMAGE TRACKING (WS0)
-- ============================================================
-- Add image columns if not exist
DO $$ BEGIN
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_url TEXT;
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_r2_key TEXT;
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_thumb_r2_key TEXT;
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_scraped_at TIMESTAMPTZ;
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS has_image BOOLEAN DEFAULT FALSE;
EXCEPTION WHEN duplicate_column THEN NULL;
END $$;
DO $$ BEGIN
ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_thumb_r2_key TEXT;
ALTER TABLE switches ADD COLUMN IF NOT EXISTS has_image BOOLEAN DEFAULT FALSE;
EXCEPTION WHEN duplicate_column THEN NULL;
END $$;
CREATE INDEX IF NOT EXISTS idx_transceivers_has_image ON transceivers(has_image) WHERE has_image = false;
-- ============================================================
-- PRICE COVERAGE (WS0b)
-- ============================================================
-- View: products missing recent prices
CREATE OR REPLACE VIEW v_price_coverage AS
SELECT
t.id,
t.slug,
t.form_factor,
t.speed_gbps,
t.reach_label,
v.name AS vendor_name,
(SELECT MAX(po.time) FROM price_observations po WHERE po.transceiver_id = t.id) AS last_price_at,
(SELECT COUNT(*) FROM price_observations po WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days') AS recent_price_count,
CASE
WHEN (SELECT COUNT(*) FROM price_observations po WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days') > 0 THEN TRUE
ELSE FALSE
END AS has_recent_price
FROM transceivers t
LEFT JOIN vendors v ON t.vendor_id = v.id
ORDER BY has_recent_price ASC, t.speed_gbps DESC;
-- View: image coverage
CREATE OR REPLACE VIEW v_image_coverage AS
SELECT
t.id,
t.slug,
t.form_factor,
t.speed_gbps,
t.image_url,
t.image_r2_key,
t.has_image,
v.name AS vendor_name
FROM transceivers t
LEFT JOIN vendors v ON t.vendor_id = v.id
ORDER BY t.has_image ASC, t.speed_gbps DESC;
-- ============================================================
-- COMPETITOR INTELLIGENCE (WS4)
-- ============================================================
CREATE TABLE IF NOT EXISTS competitor_alerts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
vendor_id UUID REFERENCES vendors(id),
transceiver_id UUID REFERENCES transceivers(id),
alert_type TEXT NOT NULL CHECK (alert_type IN (
'new_product', 'price_drop', 'price_increase', 'out_of_stock',
'back_in_stock', 'discontinued', 'new_vendor'
)),
severity TEXT DEFAULT 'info' CHECK (severity IN ('critical', 'high', 'medium', 'low', 'info')),
-- Price change details
old_price NUMERIC,
new_price NUMERIC,
price_delta NUMERIC, -- absolute change
price_pct NUMERIC, -- percentage change
currency TEXT DEFAULT 'USD',
-- Product details
part_number TEXT,
product_name TEXT,
form_factor TEXT,
speed_gbps NUMERIC,
source_url TEXT,
-- Status
acknowledged BOOLEAN DEFAULT FALSE,
notes TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_competitor_alerts_type ON competitor_alerts(alert_type);
CREATE INDEX IF NOT EXISTS idx_competitor_alerts_vendor ON competitor_alerts(vendor_id);
CREATE INDEX IF NOT EXISTS idx_competitor_alerts_created ON competitor_alerts(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_competitor_alerts_unack ON competitor_alerts(acknowledged) WHERE acknowledged = FALSE;
CREATE INDEX IF NOT EXISTS idx_competitor_alerts_severity ON competitor_alerts(severity);
-- Price change history (deduplicated, one row per actual change)
CREATE TABLE IF NOT EXISTS price_changes (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
transceiver_id UUID REFERENCES transceivers(id),
vendor_id UUID REFERENCES vendors(id),
old_price NUMERIC NOT NULL,
new_price NUMERIC NOT NULL,
delta NUMERIC NOT NULL, -- new - old
delta_pct NUMERIC NOT NULL, -- ((new-old)/old) * 100
currency TEXT DEFAULT 'USD',
detected_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_price_changes_transceiver ON price_changes(transceiver_id, detected_at DESC);
CREATE INDEX IF NOT EXISTS idx_price_changes_vendor ON price_changes(vendor_id, detected_at DESC);
CREATE INDEX IF NOT EXISTS idx_price_changes_detected ON price_changes(detected_at DESC);
-- ============================================================
-- FINDER: FLEXOPTIX PRODUCT MAPPING (WS1)
-- ============================================================
-- Map OEM part numbers to Flexoptix products
CREATE TABLE IF NOT EXISTS flexoptix_product_map (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
oem_part_number TEXT NOT NULL,
oem_vendor TEXT NOT NULL,
flexoptix_sku TEXT,
flexoptix_url TEXT,
flexoptix_price_eur NUMERIC,
form_factor TEXT,
speed_gbps NUMERIC,
reach_label TEXT,
fiber_type TEXT,
match_type TEXT DEFAULT 'exact' CHECK (match_type IN ('exact', 'equivalent', 'compatible', 'suggested')),
last_verified TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(oem_part_number, oem_vendor)
);
CREATE INDEX IF NOT EXISTS idx_flexoptix_map_oem ON flexoptix_product_map(oem_part_number);
CREATE INDEX IF NOT EXISTS idx_flexoptix_map_vendor ON flexoptix_product_map(oem_vendor);
CREATE INDEX IF NOT EXISTS idx_flexoptix_map_ff ON flexoptix_product_map(form_factor, speed_gbps);
-- Finder view: switch → compatible Flexoptix products
CREATE OR REPLACE VIEW v_switch_flexoptix_finder AS
SELECT
sw.id AS switch_id,
sw.model AS switch_model,
sw.series AS switch_series,
sv.name AS switch_vendor,
c.status AS compat_status,
c.firmware_min,
c.notes AS compat_notes,
t.id AS transceiver_id,
t.slug AS transceiver_slug,
t.form_factor,
t.speed_gbps,
t.reach_label,
t.fiber_type,
t.wavelengths,
t.connector,
t.image_url AS transceiver_image,
fpm.flexoptix_sku,
fpm.flexoptix_url,
fpm.flexoptix_price_eur,
fpm.match_type,
(SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_price,
(SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_currency
FROM switches sw
JOIN vendors sv ON sw.vendor_id = sv.id
JOIN compatibility c ON c.switch_id = sw.id AND c.status = 'compatible'
JOIN transceivers t ON c.transceiver_id = t.id
LEFT JOIN flexoptix_product_map fpm ON (
fpm.form_factor = t.form_factor
AND fpm.speed_gbps = t.speed_gbps
AND fpm.reach_label = t.reach_label
);
-- ============================================================
-- BLOG ENGINE v2 (WS8)
-- ============================================================
CREATE TABLE IF NOT EXISTS blog_series (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title TEXT NOT NULL,
slug TEXT NOT NULL UNIQUE,
description TEXT,
total_parts INTEGER DEFAULT 1,
status TEXT DEFAULT 'active' CHECK (status IN ('active', 'completed', 'paused')),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Add v2 columns to existing blog_drafts if they exist
DO $$ BEGIN
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS series_id UUID REFERENCES blog_series(id);
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS series_part INTEGER;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_title TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_description TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_slug TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_focus_keyword TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_score INTEGER;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS readability_score NUMERIC;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS hero_image_url TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS hero_image_r2_key TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS related_products UUID[];
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS related_switches UUID[];
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS competitor_data JSONB;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS pricing_data JSONB;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS export_markdown TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS export_html TEXT;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS published_at TIMESTAMPTZ;
ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS scheduled_at TIMESTAMPTZ;
EXCEPTION WHEN duplicate_column THEN NULL;
END $$;
-- ============================================================
-- SALES FORECAST (WS5)
-- ============================================================
CREATE TABLE IF NOT EXISTS sales_forecasts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
technology TEXT NOT NULL, -- "400G QSFP-DD", "100G QSFP28", etc.
speed_gbps NUMERIC,
form_factor TEXT,
-- Forecast periods
forecast_3m_units INTEGER,
forecast_3m_revenue NUMERIC,
forecast_9m_units INTEGER,
forecast_9m_revenue NUMERIC,
forecast_12m_units INTEGER,
forecast_12m_revenue NUMERIC,
forecast_18m_units INTEGER,
forecast_18m_revenue NUMERIC,
-- Price trajectory
current_asp NUMERIC,
asp_3m NUMERIC,
asp_12m NUMERIC,
price_floor NUMERIC,
months_to_floor INTEGER,
-- Confidence
confidence_3m NUMERIC,
confidence_9m NUMERIC,
confidence_12m NUMERIC,
confidence_18m NUMERIC,
-- Buy signal
buy_signal TEXT CHECK (buy_signal IN ('BUY_NOW', 'WAIT', 'HOLD')),
signal_reason TEXT,
-- Model info
model_version TEXT DEFAULT 'norton-bass-v1',
data_points INTEGER, -- how many price observations used
computed_at TIMESTAMPTZ DEFAULT NOW(),
valid_until TIMESTAMPTZ DEFAULT NOW() + INTERVAL '7 days'
);
CREATE INDEX IF NOT EXISTS idx_forecasts_tech ON sales_forecasts(technology);
CREATE INDEX IF NOT EXISTS idx_forecasts_computed ON sales_forecasts(computed_at DESC);
-- ============================================================
-- TRANSPORT PLANNER (WS3)
-- ============================================================
CREATE TABLE IF NOT EXISTS fiber_providers (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL UNIQUE,
slug TEXT NOT NULL UNIQUE,
website TEXT,
type TEXT CHECK (type IN ('tier1', 'tier2', 'regional', 'municipal', 'hyperscaler')),
headquarters TEXT,
coverage_countries TEXT[],
products TEXT[], -- 'dark_fiber', 'wavelength', 'ip_transit', 'ethernet'
peering_ixs TEXT[], -- IX names where they peer
notes TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS fiber_routes (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
provider_id UUID REFERENCES fiber_providers(id),
city_a TEXT NOT NULL,
city_b TEXT NOT NULL,
country TEXT DEFAULT 'DE',
distance_km NUMERIC,
fiber_distance_km NUMERIC, -- actual fiber route (usually 1.3-1.5x straight line)
product_type TEXT, -- 'dark_fiber', 'wavelength_100g', 'wavelength_400g', etc.
monthly_price_eur NUMERIC,
setup_fee_eur NUMERIC,
min_contract_months INTEGER,
latency_ms NUMERIC,
available BOOLEAN DEFAULT TRUE,
notes TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(provider_id, city_a, city_b, product_type)
);
CREATE TABLE IF NOT EXISTS cities (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
country TEXT NOT NULL DEFAULT 'DE',
lat NUMERIC,
lon NUMERIC,
has_ix BOOLEAN DEFAULT FALSE,
ix_names TEXT[],
has_datacenter BOOLEAN DEFAULT FALSE,
population INTEGER,
UNIQUE(name, country)
);
CREATE INDEX IF NOT EXISTS idx_fiber_routes_cities ON fiber_routes(city_a, city_b);
CREATE INDEX IF NOT EXISTS idx_cities_country ON cities(country);
-- ============================================================
-- GENERATED DATASHEETS (WS2)
-- ============================================================
CREATE TABLE IF NOT EXISTS generated_datasheets (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
entity_type TEXT NOT NULL CHECK (entity_type IN ('transceiver', 'switch', 'comparison', 'compatibility_matrix')),
entity_id UUID,
entity_ids UUID[], -- for comparison datasheets
branding TEXT DEFAULT 'flexoptix',
format TEXT DEFAULT 'pdf',
r2_key TEXT,
r2_url TEXT,
file_size_bytes BIGINT,
generated_at TIMESTAMPTZ DEFAULT NOW(),
expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '30 days'
);
CREATE INDEX IF NOT EXISTS idx_datasheets_entity ON generated_datasheets(entity_type, entity_id);

View File

@ -0,0 +1,84 @@
-- Seed: European cities with IX/DC presence + German fiber providers
-- ============================================================
-- CITIES (Major European networking hubs)
-- ============================================================
INSERT INTO cities (name, country, lat, lon, has_ix, ix_names, has_datacenter) VALUES
-- Germany
('Frankfurt', 'DE', 50.1109, 8.6821, true, ARRAY['DE-CIX Frankfurt'], true),
('Berlin', 'DE', 52.5200, 13.4050, true, ARRAY['BCIX', 'DE-CIX Berlin'], true),
('Hamburg', 'DE', 53.5511, 9.9937, true, ARRAY['DE-CIX Hamburg'], true),
('Munich', 'DE', 48.1351, 11.5820, true, ARRAY['DE-CIX Munich'], true),
('Düsseldorf', 'DE', 51.2277, 6.7735, true, ARRAY['DE-CIX Dusseldorf'], true),
('Darmstadt', 'DE', 49.8728, 8.6512, false, '{}', true),
('Stuttgart', 'DE', 48.7758, 9.1829, false, '{}', true),
('Nuremberg', 'DE', 49.4521, 11.0767, true, ARRAY['N-IX'], true),
('Cologne', 'DE', 50.9375, 6.9603, false, '{}', true),
('Hannover', 'DE', 52.3759, 9.7320, false, '{}', true),
-- Major European hubs
('Amsterdam', 'NL', 52.3676, 4.9041, true, ARRAY['AMS-IX'], true),
('London', 'GB', 51.5074, -0.1278, true, ARRAY['LINX'], true),
('Paris', 'FR', 48.8566, 2.3522, true, ARRAY['France-IX', 'Equinix Paris'], true),
('Zurich', 'CH', 47.3769, 8.5417, true, ARRAY['SwissIX'], true),
('Vienna', 'AT', 48.2082, 16.3738, true, ARRAY['VIX'], true),
('Prague', 'CZ', 50.0755, 14.4378, true, ARRAY['NIX.CZ'], true),
('Warsaw', 'PL', 52.2297, 21.0122, true, ARRAY['PLIX'], true),
('Copenhagen', 'DK', 55.6761, 12.5683, true, ARRAY['Netnod Copenhagen'], true),
('Stockholm', 'SE', 59.3293, 18.0686, true, ARRAY['Netnod Stockholm'], true),
('Milan', 'IT', 45.4642, 9.1900, true, ARRAY['MIX'], true),
('Madrid', 'ES', 40.4168, -3.7038, true, ARRAY['ESPANIX'], true),
('Marseille', 'FR', 43.2965, 5.3698, true, ARRAY['France-IX Marseille'], true),
('Dublin', 'IE', 53.3498, -6.2603, true, ARRAY['INEX'], true),
('Brussels', 'BE', 50.8503, 4.3517, true, ARRAY['BNIX'], true),
('Lisbon', 'PT', 38.7223, -9.1393, true, ARRAY['GigaPIX'], true)
ON CONFLICT (name, country) DO NOTHING;
-- ============================================================
-- FIBER PROVIDERS
-- ============================================================
INSERT INTO fiber_providers (name, slug, website, type, headquarters, coverage_countries, products, notes) VALUES
('euNetworks', 'eunetworks', 'https://www.eunetworks.com', 'tier1', 'London, UK', ARRAY['DE','NL','GB','FR','BE','IE','ES','IT','AT','CH','PL','CZ','DK','SE'], ARRAY['dark_fiber','wavelength','ethernet','ip_transit'], 'Pan-European fiber network, strong in Germany (Frankfurt-Berlin-Hamburg backbone)'),
('GlobalConnect', 'globalconnect', 'https://www.globalconnect.com', 'tier1', 'Copenhagen, DK', ARRAY['DE','DK','SE','NO','FI','NL'], ARRAY['dark_fiber','wavelength','ethernet'], 'Nordic + German backbone, own fiber infrastructure'),
('Telia Carrier', 'telia', 'https://www.teliacarrier.com', 'tier1', 'Stockholm, SE', ARRAY['DE','SE','DK','NO','FI','NL','GB','FR','US'], ARRAY['wavelength','ip_transit','ethernet'], 'Global Tier 1, AS1299, extensive German PoPs'),
('Zayo Group', 'zayo', 'https://www.zayo.com', 'tier1', 'Boulder, US', ARRAY['DE','GB','FR','NL','US','CA'], ARRAY['dark_fiber','wavelength','ethernet','colocation'], 'Pan-Atlantic fiber, Frankfurt-London-Amsterdam triangle'),
('Deutsche Telekom / OTC', 'dtag', 'https://www.telekom.de', 'tier1', 'Bonn, DE', ARRAY['DE','AT','CH','PL','CZ','HU','NL'], ARRAY['wavelength','ip_transit','ethernet','dark_fiber'], 'Largest German carrier, comprehensive domestic coverage'),
('Vodafone Deutschland', 'vodafone-de', 'https://www.vodafone.de', 'tier1', 'Düsseldorf, DE', ARRAY['DE','GB','NL','ES','IT'], ARRAY['wavelength','ethernet','ip_transit'], 'Major German backbone via Unity Media / Kabel Deutschland infrastructure'),
('Colt Technology Services', 'colt', 'https://www.colt.net', 'tier1', 'London, UK', ARRAY['DE','GB','FR','NL','BE','CH','AT','IT','ES','JP','SG'], ARRAY['wavelength','ethernet','ip_transit','sd_wan'], 'Strong in European financial hubs, low-latency routes'),
('GTT Communications', 'gtt', 'https://www.gtt.net', 'tier1', 'McLean, US', ARRAY['DE','GB','FR','NL','US'], ARRAY['ip_transit','ethernet','sd_wan'], 'Global Tier 1, AS3257'),
('Lumen Technologies', 'lumen', 'https://www.lumen.com', 'tier1', 'Monroe, US', ARRAY['DE','GB','FR','NL','US'], ARRAY['wavelength','ip_transit','ethernet','cdn'], 'Former CenturyLink/Level3, AS3356'),
('Gasline', 'gasline', 'https://www.gasline.de', 'regional', 'Essen, DE', ARRAY['DE'], ARRAY['dark_fiber'], 'Fiber along gas pipelines in Germany, cost-effective dark fiber'),
('NetCologne', 'netcologne', 'https://www.netcologne.de', 'regional', 'Cologne, DE', ARRAY['DE'], ARRAY['dark_fiber','ethernet'], 'Regional fiber in NRW/Rhineland area'),
('M-net', 'mnet', 'https://www.m-net.de', 'regional', 'Munich, DE', ARRAY['DE'], ARRAY['dark_fiber','ethernet'], 'Regional fiber in Bavaria'),
('RETN', 'retn', 'https://retn.net', 'tier2', 'London, UK', ARRAY['DE','GB','NL','SE','FI','RU','KZ'], ARRAY['ip_transit','wavelength'], 'East-West European backbone, AS9002'),
('Core-Backbone', 'core-backbone', 'https://www.core-backbone.com', 'tier2', 'Nuremberg, DE', ARRAY['DE','NL','AT','CH'], ARRAY['ip_transit','wavelength','colocation'], 'German-based ISP with own backbone'),
('AMS-IX', 'ams-ix', 'https://www.ams-ix.net', 'tier1', 'Amsterdam, NL', ARRAY['NL','DE'], ARRAY['peering','ethernet'], 'Worlds largest IX, extends to Frankfurt')
ON CONFLICT (name) DO NOTHING;
-- ============================================================
-- COMMON FIBER ROUTES (Germany focus)
-- ============================================================
INSERT INTO fiber_routes (provider_id, city_a, city_b, country, distance_km, fiber_distance_km, product_type, monthly_price_eur, min_contract_months, latency_ms) VALUES
-- euNetworks Germany backbone
((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Berlin', 'DE', 420, 550, 'wavelength_100g', 5500, 36, 3.5),
((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Hamburg', 'DE', 490, 610, 'wavelength_100g', 6200, 36, 4.0),
((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Munich', 'DE', 390, 480, 'wavelength_100g', 5000, 36, 3.2),
((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Düsseldorf', 'DE', 230, 290, 'wavelength_100g', 3500, 36, 2.0),
((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Amsterdam', 'NL', 365, 440, 'wavelength_100g', 4500, 36, 3.0),
((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Darmstadt', 'DE', 30, 40, 'dark_fiber', 1500, 60, 0.3),
((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Berlin', 'Hamburg', 'DE', 290, 370, 'wavelength_100g', 4000, 36, 2.5),
-- DTAG
((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Frankfurt', 'Berlin', 'DE', 420, 530, 'wavelength_100g', 6500, 24, 3.8),
((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Frankfurt', 'Darmstadt', 'DE', 30, 38, 'wavelength_100g', 2000, 24, 0.3),
((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Berlin', 'Darmstadt', 'DE', 450, 580, 'wavelength_100g', 7000, 24, 4.0),
((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Frankfurt', 'Munich', 'DE', 390, 470, 'wavelength_100g', 5500, 24, 3.3),
-- Telia
((SELECT id FROM fiber_providers WHERE slug='telia'), 'Frankfurt', 'Amsterdam', 'NL', 365, 430, 'wavelength_100g', 4000, 36, 2.8),
((SELECT id FROM fiber_providers WHERE slug='telia'), 'Frankfurt', 'London', 'GB', 650, 820, 'wavelength_100g', 7500, 36, 5.5),
((SELECT id FROM fiber_providers WHERE slug='telia'), 'Frankfurt', 'Paris', 'FR', 480, 600, 'wavelength_100g', 5500, 36, 4.0),
-- Colt
((SELECT id FROM fiber_providers WHERE slug='colt'), 'Frankfurt', 'Berlin', 'DE', 420, 540, 'ethernet_10g', 2500, 24, 3.6),
((SELECT id FROM fiber_providers WHERE slug='colt'), 'Frankfurt', 'Zurich', 'CH', 310, 400, 'wavelength_100g', 4500, 36, 2.8)
ON CONFLICT (provider_id, city_a, city_b, product_type) DO NOTHING;

View File

@ -1,27 +0,0 @@
{
"requestsFinished": 7,
"requestsFailed": 0,
"requestsRetries": 0,
"requestsFailedPerMinute": 0,
"requestsFinishedPerMinute": 84,
"requestMinDurationMillis": 217,
"requestMaxDurationMillis": 3669,
"requestTotalFailedDurationMillis": 0,
"requestTotalFinishedDurationMillis": 5667,
"crawlerStartedAt": "2026-03-27T03:06:57.250Z",
"crawlerFinishedAt": "2026-03-27T03:07:02.254Z",
"statsPersistedAt": "2026-03-27T03:07:02.254Z",
"crawlerRuntimeMillis": 5014,
"crawlerLastStartTimestamp": 1774580817240,
"requestRetryHistogram": [
7
],
"statsId": 0,
"requestAvgFailedDurationMillis": null,
"requestAvgFinishedDurationMillis": 810,
"requestTotalDurationMillis": 5667,
"requestsTotal": 7,
"requestsWithStatusCode": {},
"errors": {},
"retryErrors": {}
}

View File

@ -1,146 +0,0 @@
{
"usableSessionsCount": 7,
"retiredSessionsCount": 0,
"sessions": [
{
"id": "session_4IpwY6VPOc",
"cookieJar": {
"version": "tough-cookie@6.0.1",
"storeType": "MemoryCookieStore",
"rejectPublicSuffixes": true,
"enableLooseMode": false,
"allowSpecialUseDomain": true,
"prefixSecurity": "silent",
"cookies": []
},
"userData": {},
"maxErrorScore": 3,
"errorScoreDecrement": 0.5,
"expiresAt": "2026-03-27T03:56:57.292Z",
"createdAt": "2026-03-27T03:06:57.292Z",
"usageCount": 1,
"maxUsageCount": 50,
"errorScore": 0
},
{
"id": "session_DgcebufZlI",
"cookieJar": {
"version": "tough-cookie@6.0.1",
"storeType": "MemoryCookieStore",
"rejectPublicSuffixes": true,
"enableLooseMode": false,
"allowSpecialUseDomain": true,
"prefixSecurity": "silent",
"cookies": []
},
"userData": {},
"maxErrorScore": 3,
"errorScoreDecrement": 0.5,
"expiresAt": "2026-03-27T03:56:57.295Z",
"createdAt": "2026-03-27T03:06:57.295Z",
"usageCount": 1,
"maxUsageCount": 50,
"errorScore": 0
},
{
"id": "session_nNqMLCXOfI",
"cookieJar": {
"version": "tough-cookie@6.0.1",
"storeType": "MemoryCookieStore",
"rejectPublicSuffixes": true,
"enableLooseMode": false,
"allowSpecialUseDomain": true,
"prefixSecurity": "silent",
"cookies": []
},
"userData": {},
"maxErrorScore": 3,
"errorScoreDecrement": 0.5,
"expiresAt": "2026-03-27T03:56:57.741Z",
"createdAt": "2026-03-27T03:06:57.741Z",
"usageCount": 1,
"maxUsageCount": 50,
"errorScore": 0
},
{
"id": "session_kfhwhKVBAt",
"cookieJar": {
"version": "tough-cookie@6.0.1",
"storeType": "MemoryCookieStore",
"rejectPublicSuffixes": true,
"enableLooseMode": false,
"allowSpecialUseDomain": true,
"prefixSecurity": "silent",
"cookies": []
},
"userData": {},
"maxErrorScore": 3,
"errorScoreDecrement": 0.5,
"expiresAt": "2026-03-27T03:56:57.759Z",
"createdAt": "2026-03-27T03:06:57.759Z",
"usageCount": 1,
"maxUsageCount": 50,
"errorScore": 0
},
{
"id": "session_ROb5OpLaLg",
"cookieJar": {
"version": "tough-cookie@6.0.1",
"storeType": "MemoryCookieStore",
"rejectPublicSuffixes": true,
"enableLooseMode": false,
"allowSpecialUseDomain": true,
"prefixSecurity": "silent",
"cookies": []
},
"userData": {},
"maxErrorScore": 3,
"errorScoreDecrement": 0.5,
"expiresAt": "2026-03-27T03:56:58.061Z",
"createdAt": "2026-03-27T03:06:58.061Z",
"usageCount": 1,
"maxUsageCount": 50,
"errorScore": 0
},
{
"id": "session_qurhUeTMvT",
"cookieJar": {
"version": "tough-cookie@6.0.1",
"storeType": "MemoryCookieStore",
"rejectPublicSuffixes": true,
"enableLooseMode": false,
"allowSpecialUseDomain": true,
"prefixSecurity": "silent",
"cookies": []
},
"userData": {},
"maxErrorScore": 3,
"errorScoreDecrement": 0.5,
"expiresAt": "2026-03-27T03:56:58.348Z",
"createdAt": "2026-03-27T03:06:58.348Z",
"usageCount": 1,
"maxUsageCount": 50,
"errorScore": 0
},
{
"id": "session_ATWD4HqdJf",
"cookieJar": {
"version": "tough-cookie@6.0.1",
"storeType": "MemoryCookieStore",
"rejectPublicSuffixes": true,
"enableLooseMode": false,
"allowSpecialUseDomain": true,
"prefixSecurity": "silent",
"cookies": []
},
"userData": {},
"maxErrorScore": 3,
"errorScoreDecrement": 0.5,
"expiresAt": "2026-03-27T03:56:58.569Z",
"createdAt": "2026-03-27T03:06:58.569Z",
"usageCount": 1,
"maxUsageCount": 50,
"errorScore": 0
}
]
}

View File

@ -1,9 +0,0 @@
{
"id": "Gyz6y01b4kaqVSY",
"json": "{\"id\":\"Gyz6y01b4kaqVSY\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:57.738Z\"}",
"method": "GET",
"orderNo": null,
"retryCount": 0,
"uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver",
"url": "https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver/"
}

View File

@ -1,9 +0,0 @@
{
"id": "UDSA3Hqwk1O5rcd",
"json": "{\"id\":\"UDSA3Hqwk1O5rcd\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:57.758Z\"}",
"method": "GET",
"orderNo": null,
"retryCount": 0,
"uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver",
"url": "https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver/"
}

View File

@ -1,9 +0,0 @@
{
"id": "Z6VkGiT8REFQyfA",
"json": "{\"id\":\"Z6VkGiT8REFQyfA\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.346Z\"}",
"method": "GET",
"orderNo": null,
"retryCount": 0,
"uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers",
"url": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/"
}

View File

@ -1,9 +0,0 @@
{
"id": "Zus6krdGaVkRBmX",
"json": "{\"id\":\"Zus6krdGaVkRBmX\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.047Z\"}",
"method": "GET",
"orderNo": null,
"retryCount": 0,
"uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver",
"url": "https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver/"
}

View File

@ -1,9 +0,0 @@
{
"id": "bhPAevnqFIxXzV3",
"json": "{\"id\":\"bhPAevnqFIxXzV3\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"crawlDepth\":2,\"enqueueStrategy\":\"same-hostname\",\"state\":4}},\"handledAt\":\"2026-03-27T03:07:02.235Z\"}",
"method": "GET",
"orderNo": null,
"retryCount": 0,
"uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3",
"url": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3/"
}

View File

@ -1,9 +0,0 @@
{
"id": "xbIMGR6AhgYwBWg",
"json": "{\"id\":\"xbIMGR6AhgYwBWg\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"crawlDepth\":1,\"enqueueStrategy\":\"same-hostname\",\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.564Z\"}",
"method": "GET",
"orderNo": null,
"retryCount": 0,
"uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2",
"url": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2/"
}

View File

@ -1,9 +0,0 @@
{
"id": "y74cMHovGn2i2xA",
"json": "{\"id\":\"y74cMHovGn2i2xA\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.031Z\"}",
"method": "GET",
"orderNo": null,
"retryCount": 0,
"uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers",
"url": "https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers/"
}