From a69acc458858d8ea444ee3d0212007bd9fc2f910 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Tue, 31 Mar 2026 08:51:22 +0200 Subject: [PATCH] =?UTF-8?q?feat(v0.2.0):=20Sales=20Intelligence=20Engine?= =?UTF-8?q?=20=E2=80=94=20Phase=200+A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New API routes: - GET /api/finder — Switch→Flexoptix transceiver finder with FlexBox coding - GET /api/competitor-alerts — Competitor intelligence (price changes, new products, stock) - GET /api/forecast/:technology — Sales forecast 3/9/12/18 months + buy/wait/hold signal - POST /api/transport/plan — Transport system planner (city→city BOM with fiber providers) New MCP tools: - find_flexoptix_for_switch — Customer switch → Flexoptix products - get_competitor_alerts — Competitor monitoring - plan_transport — Network transport planning - forecast_sales — Volume/revenue prediction - generate_blog — Enhanced blog generation New DB tables (migration 013): - competitor_alerts, price_changes, flexoptix_product_map - sales_forecasts, fiber_providers, fiber_routes, cities - generated_datasheets, blog_series - Views: v_price_coverage, v_image_coverage, v_switch_flexoptix_finder Seed data (migration 014): - 25 European cities with IX/DC locations + coordinates - 15 fiber providers (euNetworks, Telia, DTAG, Colt, Zayo, etc.) - 16 fiber routes with pricing (Germany focus) Infrastructure: - Scraper scheduler: 2h Flexoptix, 4h FS.com/Optcore (was 6-8h) - Change detector for competitor price/stock monitoring - Image downloader utility with coverage tracking --- packages/api/src/index.ts | 10 +- packages/api/src/routes/competitor-alerts.ts | 160 ++++++ packages/api/src/routes/finder.ts | 237 ++++++++ packages/api/src/routes/forecast.ts | 201 +++++++ packages/api/src/routes/transport.ts | 233 ++++++++ packages/mcp-server/src/tools/finder.ts | 175 ++++++ packages/scraper/src/scheduler.ts | 49 +- packages/scraper/src/scrapers/cisco-tmg.ts | 250 +++++--- packages/scraper/src/scrapers/fluxlight.ts | 10 +- packages/scraper/src/scrapers/fs-com.ts | 127 +++-- packages/scraper/src/scrapers/gbics.ts | 17 +- packages/scraper/src/scrapers/news.ts | 19 +- packages/scraper/src/scrapers/prolabs.ts | 538 +++++++++++------- packages/scraper/src/utils/change-detector.ts | 128 +++++ .../scraper/src/utils/image-downloader.ts | 154 +++++ sql/013-v020-sales-intelligence.sql | 347 +++++++++++ sql/014-seed-transport-data.sql | 84 +++ .../default/SDK_CRAWLER_STATISTICS_0.json | 27 - .../default/SDK_SESSION_POOL_STATE.json | 146 ----- .../default/Gyz6y01b4kaqVSY.json | 9 - .../default/UDSA3Hqwk1O5rcd.json | 9 - .../default/Z6VkGiT8REFQyfA.json | 9 - .../default/Zus6krdGaVkRBmX.json | 9 - .../default/bhPAevnqFIxXzV3.json | 9 - .../default/xbIMGR6AhgYwBWg.json | 9 - .../default/y74cMHovGn2i2xA.json | 9 - 26 files changed, 2353 insertions(+), 622 deletions(-) create mode 100644 packages/api/src/routes/competitor-alerts.ts create mode 100644 packages/api/src/routes/finder.ts create mode 100644 packages/api/src/routes/forecast.ts create mode 100644 packages/api/src/routes/transport.ts create mode 100644 packages/mcp-server/src/tools/finder.ts create mode 100644 packages/scraper/src/utils/change-detector.ts create mode 100644 packages/scraper/src/utils/image-downloader.ts create mode 100644 sql/013-v020-sales-intelligence.sql create mode 100644 sql/014-seed-transport-data.sql delete mode 100644 storage/key_value_stores/default/SDK_CRAWLER_STATISTICS_0.json delete mode 100644 storage/key_value_stores/default/SDK_SESSION_POOL_STATE.json delete mode 100644 storage/request_queues/default/Gyz6y01b4kaqVSY.json delete mode 100644 storage/request_queues/default/UDSA3Hqwk1O5rcd.json delete mode 100644 storage/request_queues/default/Z6VkGiT8REFQyfA.json delete mode 100644 storage/request_queues/default/Zus6krdGaVkRBmX.json delete mode 100644 storage/request_queues/default/bhPAevnqFIxXzV3.json delete mode 100644 storage/request_queues/default/xbIMGR6AhgYwBWg.json delete mode 100644 storage/request_queues/default/y74cMHovGn2i2xA.json diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index bb60b7f..750830a 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -13,6 +13,10 @@ import { hypeCycleRouter } from "./routes/hype-cycle"; import { searchRouter } from "./routes/search"; import { documentRouter } from "./routes/documents"; import { blogRouter } from "./routes/blog"; +import { finderRouter } from "./routes/finder"; +import { competitorRouter } from "./routes/competitor-alerts"; +import { forecastRouter } from "./routes/forecast"; +import { transportRouter } from "./routes/transport"; const app = express(); @@ -42,6 +46,10 @@ app.use("/api/hype-cycle", hypeCycleRouter); app.use("/api/search", searchRouter); app.use("/api/documents", documentRouter); app.use("/api/blog", blogRouter); +app.use("/api/finder", finderRouter); +app.use("/api/competitor-alerts", competitorRouter); +app.use("/api/forecast", forecastRouter); +app.use("/api/transport", transportRouter); // Dashboard (static HTML) app.use("/dashboard", express.static(join(__dirname, "..", "..", "dashboard"))); @@ -55,7 +63,7 @@ app.get("/", (_req, res) => { app.get("/api", (_req, res) => { res.json({ name: "Transceiver Intelligence Platform", - version: "0.1.0", + version: "0.2.0-alpha.1", endpoints: [ "GET /api/transceivers?q=&form_factor=&speed=&category=&fiber_type=&wdm_type=&coherent=", "GET /api/transceivers/:id", diff --git a/packages/api/src/routes/competitor-alerts.ts b/packages/api/src/routes/competitor-alerts.ts new file mode 100644 index 0000000..c31241b --- /dev/null +++ b/packages/api/src/routes/competitor-alerts.ts @@ -0,0 +1,160 @@ +/** + * WS4: Competitor Intelligence — Alerts & Price Changes + */ +import { Router } from "express"; +import { pool } from "../db/client"; + +export const competitorRouter = Router(); + +/** + * GET /api/competitor-alerts?vendor=&type=&severity=&days=&limit=&offset= + */ +competitorRouter.get("/", async (req, res) => { + try { + const { + vendor, type, severity, days = "7", + acknowledged, limit = "50", offset = "0" + } = req.query; + + let sql = ` + SELECT ca.*, + v.name AS vendor_name, + v.slug AS vendor_slug + FROM competitor_alerts ca + LEFT JOIN vendors v ON ca.vendor_id = v.id + WHERE ca.created_at > NOW() - INTERVAL '1 day' * $1 + `; + const params: any[] = [parseInt(days as string)]; + let idx = 2; + + if (vendor) { sql += ` AND v.slug = $${idx}`; params.push(vendor); idx++; } + if (type) { sql += ` AND ca.alert_type = $${idx}`; params.push(type); idx++; } + if (severity) { sql += ` AND ca.severity = $${idx}`; params.push(severity); idx++; } + if (acknowledged === 'false') { sql += ` AND ca.acknowledged = false`; } + + sql += ` ORDER BY ca.created_at DESC LIMIT $${idx} OFFSET $${idx + 1}`; + params.push(parseInt(limit as string), parseInt(offset as string)); + + const result = await pool.query(sql, params); + + // Summary stats + const stats = await pool.query(` + SELECT + alert_type, + COUNT(*) AS count, + COUNT(*) FILTER (WHERE acknowledged = false) AS unread + FROM competitor_alerts + WHERE created_at > NOW() - INTERVAL '1 day' * $1 + GROUP BY alert_type + ORDER BY count DESC + `, [parseInt(days as string)]); + + res.json({ + alerts: result.rows, + total: result.rowCount, + stats: stats.rows, + period_days: parseInt(days as string), + }); + } catch (err) { + console.error("Competitor alerts error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +/** + * GET /api/competitor-alerts/price-changes?vendor=&speed=&days= + */ +competitorRouter.get("/price-changes", async (req, res) => { + try { + const { vendor, speed, days = "30", limit = "50" } = req.query; + + let sql = ` + SELECT pc.*, + v.name AS vendor_name, + t.slug, t.form_factor, t.speed_gbps, t.reach_label + FROM price_changes pc + JOIN vendors v ON pc.vendor_id = v.id + JOIN transceivers t ON pc.transceiver_id = t.id + WHERE pc.detected_at > NOW() - INTERVAL '1 day' * $1 + `; + const params: any[] = [parseInt(days as string)]; + let idx = 2; + + if (vendor) { sql += ` AND v.slug = $${idx}`; params.push(vendor); idx++; } + if (speed) { sql += ` AND t.speed_gbps = $${idx}`; params.push(parseFloat(speed as string)); idx++; } + + sql += ` ORDER BY ABS(pc.delta_pct) DESC LIMIT $${idx}`; + params.push(parseInt(limit as string)); + + const result = await pool.query(sql, params); + res.json({ price_changes: result.rows, total: result.rowCount }); + } catch (err) { + console.error("Price changes error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +/** + * PUT /api/competitor-alerts/:id/acknowledge + */ +competitorRouter.put("/:id/acknowledge", async (req, res) => { + try { + const { notes } = req.body || {}; + await pool.query( + `UPDATE competitor_alerts SET acknowledged = true, notes = COALESCE($2, notes) WHERE id = $1`, + [req.params.id, notes] + ); + res.json({ success: true }); + } catch (err) { + res.status(500).json({ error: "Internal server error" }); + } +}); + +/** + * GET /api/competitor-alerts/summary + * + * High-level competitor intelligence overview + */ +competitorRouter.get("/summary", async (req, res) => { + try { + const [alertsByVendor, recentDrops, newProducts, coverage] = await Promise.all([ + pool.query(` + SELECT v.name, v.slug, COUNT(*) AS alert_count, + COUNT(*) FILTER (WHERE ca.alert_type = 'price_drop') AS drops, + COUNT(*) FILTER (WHERE ca.alert_type = 'price_increase') AS increases, + COUNT(*) FILTER (WHERE ca.alert_type = 'new_product') AS new_products + FROM competitor_alerts ca + JOIN vendors v ON ca.vendor_id = v.id + WHERE ca.created_at > NOW() - INTERVAL '7 days' + GROUP BY v.name, v.slug ORDER BY alert_count DESC LIMIT 20 + `), + pool.query(` + SELECT pc.*, v.name AS vendor_name, t.form_factor, t.speed_gbps, t.reach_label + FROM price_changes pc + JOIN vendors v ON pc.vendor_id = v.id + JOIN transceivers t ON pc.transceiver_id = t.id + WHERE pc.delta_pct < -5 AND pc.detected_at > NOW() - INTERVAL '7 days' + ORDER BY pc.delta_pct ASC LIMIT 10 + `), + pool.query(` + SELECT ca.*, v.name AS vendor_name + FROM competitor_alerts ca + JOIN vendors v ON ca.vendor_id = v.id + WHERE ca.alert_type = 'new_product' AND ca.created_at > NOW() - INTERVAL '30 days' + ORDER BY ca.created_at DESC LIMIT 20 + `), + pool.query(`SELECT * FROM v_price_coverage WHERE has_recent_price = false LIMIT 20`), + ]); + + res.json({ + period: "7 days", + by_vendor: alertsByVendor.rows, + biggest_price_drops: recentDrops.rows, + new_competitor_products: newProducts.rows, + products_missing_prices: coverage.rows, + }); + } catch (err) { + console.error("Summary error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); diff --git a/packages/api/src/routes/finder.ts b/packages/api/src/routes/finder.ts new file mode 100644 index 0000000..4b103ea --- /dev/null +++ b/packages/api/src/routes/finder.ts @@ -0,0 +1,237 @@ +/** + * WS1: Switch → Flexoptix Transceiver Finder + * + * "Customer has a Cisco Nexus 93180YC-FX3 — which Flexoptix transceivers fit?" + */ +import { Router } from "express"; +import { pool } from "../db/client"; + +export const finderRouter = Router(); + +/** + * GET /api/finder?switch=&speed=&form_factor= + * + * Finds Flexoptix-compatible transceivers for a given switch model. + * If no direct Flexoptix match, shows generic compatible transceivers + * with a note about Flexoptix FlexBox coding capability. + */ +finderRouter.get("/", async (req, res) => { + try { + const { switch: switchQuery, speed, form_factor, limit = "20" } = req.query; + + if (!switchQuery) { + return res.status(400).json({ error: "Parameter 'switch' is required" }); + } + + // Step 1: Find the switch + const switchResult = await pool.query( + `SELECT sw.id, sw.model, sw.series, sw.ports_config, sw.max_speed_gbps, + v.name AS vendor_name, sw.image_url, sw.datasheet_r2_key + FROM switches sw + JOIN vendors v ON sw.vendor_id = v.id + WHERE sw.model ILIKE $1 + OR sw.model ILIKE '%' || $1 || '%' + OR sw.search_vector @@ plainto_tsquery('english', $1) + ORDER BY + CASE WHEN sw.model ILIKE $1 THEN 0 + WHEN sw.model ILIKE $1 || '%' THEN 1 + ELSE 2 END + LIMIT 5`, + [switchQuery] + ); + + if (switchResult.rows.length === 0) { + return res.status(404).json({ + error: "Switch not found", + suggestion: "Try a partial model name like 'N9K-C93180' or 'QFX5120'" + }); + } + + const sw = switchResult.rows[0]; + + // Step 2: Find compatible transceivers via compatibility table + let compatSql = ` + SELECT + t.id, t.slug, t.form_factor, t.speed, t.speed_gbps, t.reach_label, t.reach_meters, + t.fiber_type, t.wavelengths, t.connector, t.power_consumption_w, + t.image_url, t.image_r2_key, t.part_number, + tv.name AS transceiver_vendor, + tv.type AS vendor_type, + c.status AS compat_status, + c.firmware_min, + c.verified_by, + c.notes AS compat_notes, + -- Latest price + (SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_price, + (SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_currency, + (SELECT po.stock_level FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS stock_level, + -- Flexoptix mapping + fpm.flexoptix_sku, + fpm.flexoptix_url, + fpm.flexoptix_price_eur, + fpm.match_type AS flexoptix_match + FROM compatibility c + JOIN transceivers t ON c.transceiver_id = t.id + JOIN vendors tv ON t.vendor_id = tv.id + LEFT JOIN flexoptix_product_map fpm ON ( + fpm.form_factor = t.form_factor + AND fpm.speed_gbps = t.speed_gbps + AND (fpm.reach_label = t.reach_label OR fpm.reach_label IS NULL) + ) + WHERE c.switch_id = $1 AND c.status = 'compatible' + `; + const params: any[] = [sw.id]; + let idx = 2; + + if (speed) { + compatSql += ` AND t.speed_gbps = $${idx}`; + params.push(parseFloat(speed as string)); + idx++; + } + if (form_factor) { + compatSql += ` AND t.form_factor = $${idx}`; + params.push(form_factor); + idx++; + } + + compatSql += ` ORDER BY t.speed_gbps DESC, t.reach_meters ASC LIMIT $${idx}`; + params.push(parseInt(limit as string)); + + const compatResult = await pool.query(compatSql, params); + + // Step 3: Group results by speed class + const bySpeed: Record = {}; + for (const row of compatResult.rows) { + const key = `${row.speed_gbps}G ${row.form_factor}`; + if (!bySpeed[key]) bySpeed[key] = []; + bySpeed[key].push({ + ...row, + flexoptix_available: !!row.flexoptix_sku, + flexbox_codable: true, // All Flexoptix modules are FlexBox-codable + buy_url: row.flexoptix_url || `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(row.form_factor + ' ' + row.speed_gbps + 'G ' + row.reach_label)}`, + }); + } + + // Step 4: Extract port types from switch for "what can this switch accept?" + const portTypes = sw.ports_config || {}; + + res.json({ + switch: { + id: sw.id, + model: sw.model, + series: sw.series, + vendor: sw.vendor_name, + max_speed_gbps: sw.max_speed_gbps, + ports: portTypes, + image_url: sw.image_url, + }, + compatible_transceivers: compatResult.rows.map(r => ({ + id: r.id, + slug: r.slug, + form_factor: r.form_factor, + speed: r.speed, + speed_gbps: r.speed_gbps, + reach: r.reach_label, + fiber_type: r.fiber_type, + connector: r.connector, + vendor: r.transceiver_vendor, + vendor_type: r.vendor_type, + image_url: r.image_url, + compat_status: r.compat_status, + firmware_min: r.firmware_min, + // Pricing + price: r.latest_price ? parseFloat(r.latest_price) : null, + currency: r.latest_currency, + stock: r.stock_level, + // Flexoptix + flexoptix_sku: r.flexoptix_sku, + flexoptix_url: r.flexoptix_url, + flexoptix_price_eur: r.flexoptix_price_eur ? parseFloat(r.flexoptix_price_eur) : null, + flexoptix_match: r.flexoptix_match, + flexbox_codable: true, + buy_url: r.flexoptix_url || `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(r.form_factor + ' ' + r.speed_gbps + 'G ' + r.reach_label)}`, + })), + by_speed_class: bySpeed, + total: compatResult.rowCount, + flexoptix_note: "All Flexoptix transceivers support FlexBox coding for OEM compatibility.", + }); + } catch (err) { + console.error("Finder error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +/** + * GET /api/finder/suggest?q= + * + * Free-text query: "100G LR4 for Cisco Nexus" → suggests switch + transceiver combos + */ +finderRouter.get("/suggest", async (req, res) => { + try { + const { q } = req.query; + if (!q) return res.status(400).json({ error: "Parameter 'q' is required" }); + + // Extract speed, form factor, vendor hints from query + const queryStr = (q as string).toLowerCase(); + let speed: number | null = null; + let vendor: string | null = null; + let reach: string | null = null; + + // Speed detection + const speedMatch = queryStr.match(/(\d+)\s*g\b/i); + if (speedMatch) speed = parseInt(speedMatch[1]!); + + // Reach detection + if (queryStr.includes('sr')) reach = 'SR'; + else if (queryStr.includes('lr')) reach = 'LR'; + else if (queryStr.includes('er')) reach = 'ER'; + else if (queryStr.includes('zr')) reach = 'ZR'; + else if (queryStr.includes('dr')) reach = 'DR'; + + // Vendor detection + const vendorPatterns: [RegExp, string][] = [ + [/cisco|nexus|catalyst/i, 'Cisco'], + [/juniper|qfx|ex\d{4}/i, 'Juniper'], + [/arista|dcs-/i, 'Arista'], + [/dell|powerswitch/i, 'Dell'], + [/hpe|aruba/i, 'HPE'], + ]; + for (const [pattern, name] of vendorPatterns) { + if (pattern.test(queryStr)) { vendor = name; break; } + } + + // Search switches matching the query + const switches = await pool.query( + `SELECT sw.id, sw.model, sw.series, sw.max_speed_gbps, v.name AS vendor_name + FROM switches sw JOIN vendors v ON sw.vendor_id = v.id + WHERE sw.search_vector @@ plainto_tsquery('english', $1) + ${vendor ? `AND v.name ILIKE '%' || $2 || '%'` : ''} + ORDER BY sw.max_speed_gbps DESC LIMIT 10`, + vendor ? [q, vendor] : [q] + ); + + // Search transceivers matching speed/reach + let tcvrSql = `SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.fiber_type, + tv.name AS vendor, t.image_url + FROM transceivers t JOIN vendors v ON t.vendor_id = v.id JOIN vendors tv ON t.vendor_id = tv.id + WHERE 1=1`; + const tcvrParams: any[] = []; + let tidx = 1; + if (speed) { tcvrSql += ` AND t.speed_gbps = $${tidx}`; tcvrParams.push(speed); tidx++; } + if (reach) { tcvrSql += ` AND t.reach_label ILIKE $${tidx}`; tcvrParams.push(reach + '%'); tidx++; } + tcvrSql += ` ORDER BY t.speed_gbps DESC LIMIT 10`; + + const transceivers = await pool.query(tcvrSql, tcvrParams); + + res.json({ + query: q, + parsed: { speed, vendor, reach }, + switches: switches.rows, + transceivers: transceivers.rows, + tip: "Use GET /api/finder?switch= for detailed compatibility results", + }); + } catch (err) { + console.error("Suggest error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); diff --git a/packages/api/src/routes/forecast.ts b/packages/api/src/routes/forecast.ts new file mode 100644 index 0000000..b259451 --- /dev/null +++ b/packages/api/src/routes/forecast.ts @@ -0,0 +1,201 @@ +/** + * WS5 + WS6: Sales Forecast Engine + Price Trajectory + */ +import { Router } from "express"; +import { pool } from "../db/client"; +import { computeHypeCycle, findTechnology, TECH_GENERATIONS } from "../hype-cycle/norton-bass"; + +export const forecastRouter = Router(); + +/** + * GET /api/forecast/:technology + * + * Returns sales forecast for 3/9/12/18 months + price trajectory + buy signal + */ +forecastRouter.get("/:technology", async (req, res) => { + try { + const techQuery = req.params.technology; + const currentYear = new Date().getFullYear(); + + // Find technology in Norton-Bass model + const tech = findTechnology(techQuery); + if (!tech) { + return res.status(404).json({ + error: "Technology not found", + available: TECH_GENERATIONS.map(t => t.name), + }); + } + + // Compute hype cycle + const hype = computeHypeCycle(tech, currentYear); + + // Get price data from DB + const priceHistory = await pool.query(` + SELECT + date_trunc('week', po.time) AS week, + AVG(po.price) AS avg_price, + MIN(po.price) AS min_price, + MAX(po.price) AS max_price, + COUNT(*) AS observations, + po.currency + FROM price_observations po + JOIN transceivers t ON po.transceiver_id = t.id + WHERE t.speed_gbps = $1 + GROUP BY week, po.currency + ORDER BY week DESC + LIMIT 52 + `, [tech.speedGbps]); + + // Compute price trajectory based on hype cycle phase + const currentPrices = priceHistory.rows.length > 0 + ? priceHistory.rows.map(r => parseFloat(r.avg_price)) + : []; + const currentASP = currentPrices.length > 0 ? currentPrices[0]! : tech.speedGbps * 0.5; // rough estimate + + // Price decline model based on phase + const phaseDeclineRates: Record = { + 'INNOVATION_TRIGGER': 0.05, + 'PEAK_OF_INFLATED_EXPECTATIONS': 0.12, + 'TROUGH_OF_DISILLUSIONMENT': 0.25, + 'SLOPE_OF_ENLIGHTENMENT': 0.15, + 'PLATEAU_OF_PRODUCTIVITY': 0.05, + 'LEGACY_DECLINE': 0.03, + }; + const annualDecline = phaseDeclineRates[hype.phase] ?? 0.10; + const monthlyDecline = 1 - Math.pow(1 - annualDecline, 1/12); + + const asp3m = currentASP * Math.pow(1 - monthlyDecline, 3); + const asp9m = currentASP * Math.pow(1 - monthlyDecline, 9); + const asp12m = currentASP * Math.pow(1 - monthlyDecline, 12); + const asp18m = currentASP * Math.pow(1 - monthlyDecline, 18); + + // Price floor estimate (based on mature technology pricing patterns) + // Typically 15-25% of peak price at full maturity + const priceFloor = currentASP * 0.20; + const monthsToFloor = annualDecline > 0 + ? Math.ceil(Math.log(priceFloor / currentASP) / Math.log(1 - monthlyDecline)) + : 999; + + // Volume forecast based on adoption curve + const adoptionNow = hype.adoptionPct / 100; + const adoption3m = Math.min(1, adoptionNow + (hype.forecast?.[0]?.adoptionPct ?? 0) / 100 * 0.25); + const adoption9m = Math.min(1, adoptionNow + (hype.forecast?.[0]?.adoptionPct ?? 0) / 100 * 0.75); + const adoption12m = Math.min(1, adoptionNow + (hype.forecast?.[1]?.adoptionPct ?? 0) / 100); + const adoption18m = Math.min(1, adoptionNow + (hype.forecast?.[2]?.adoptionPct ?? 0) / 100); + + const totalMarketPorts = tech.m * 1000000; // market potential in units + const marketShare = 0.03; // estimated Flexoptix-addressable share + + const units3m = Math.round(totalMarketPorts * adoption3m * marketShare * 0.25); + const units9m = Math.round(totalMarketPorts * adoption9m * marketShare * 0.75); + const units12m = Math.round(totalMarketPorts * adoption12m * marketShare); + const units18m = Math.round(totalMarketPorts * adoption18m * marketShare * 1.5); + + // Confidence decreases with forecast horizon + const conf3m = Math.min(0.95, 0.85 + (priceHistory.rows.length / 100)); + const conf9m = conf3m * 0.78; + const conf12m = conf3m * 0.65; + const conf18m = conf3m * 0.50; + + // Buy signal + let buySignal: string; + let signalReason: string; + if (hype.phase === 'SLOPE_OF_ENLIGHTENMENT' || hype.phase === 'PLATEAU_OF_PRODUCTIVITY') { + buySignal = 'BUY_NOW'; + signalReason = `${tech.name} is in ${hype.phase.replace(/_/g, ' ').toLowerCase()} — prices near floor, volume growing, stable supply chain.`; + } else if (hype.phase === 'TROUGH_OF_DISILLUSIONMENT') { + buySignal = 'WAIT'; + signalReason = `${tech.name} prices dropping >10%/quarter. Wait for trough bottom (estimated ${Math.ceil(monthsToFloor * 0.3)} months).`; + } else if (hype.phase === 'PEAK_OF_INFLATED_EXPECTATIONS') { + buySignal = 'WAIT'; + signalReason = `${tech.name} is at peak hype — prices will drop significantly. Only buy if urgent.`; + } else if (hype.phase === 'INNOVATION_TRIGGER') { + buySignal = 'HOLD'; + signalReason = `${tech.name} is early-stage — limited availability, premium pricing. Wait unless you need bleeding-edge.`; + } else { + buySignal = 'HOLD'; + signalReason = `${tech.name} is in legacy/decline — consider migrating to next generation.`; + } + + // Store forecast in DB + await pool.query(` + INSERT INTO sales_forecasts ( + technology, speed_gbps, form_factor, + forecast_3m_units, forecast_3m_revenue, forecast_9m_units, forecast_9m_revenue, + forecast_12m_units, forecast_12m_revenue, forecast_18m_units, forecast_18m_revenue, + current_asp, asp_3m, asp_12m, price_floor, months_to_floor, + confidence_3m, confidence_9m, confidence_12m, confidence_18m, + buy_signal, signal_reason, data_points + ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23) + `, [ + tech.name, tech.speedGbps, tech.formFactor, + units3m, units3m * asp3m, units9m, units9m * asp9m, + units12m, units12m * asp12m, units18m, units18m * asp18m, + currentASP, asp3m, asp12m, priceFloor, monthsToFloor, + conf3m, conf9m, conf12m, conf18m, + buySignal, signalReason, priceHistory.rows.length, + ]).catch(() => {}); // Non-critical + + res.json({ + technology: tech.name, + speed_gbps: tech.speedGbps, + form_factor: tech.formFactor, + hype_cycle: { + phase: hype.phase, + position_pct: hype.positionPct, + adoption_pct: hype.adoptionPct, + }, + forecasts: { + "3_months": { units: units3m, revenue_eur: Math.round(units3m * asp3m), confidence: Math.round(conf3m * 100) / 100 }, + "9_months": { units: units9m, revenue_eur: Math.round(units9m * asp9m), confidence: Math.round(conf9m * 100) / 100 }, + "12_months": { units: units12m, revenue_eur: Math.round(units12m * asp12m), confidence: Math.round(conf12m * 100) / 100 }, + "18_months": { units: units18m, revenue_eur: Math.round(units18m * asp18m), confidence: Math.round(conf18m * 100) / 100 }, + }, + price_trajectory: { + current_asp: Math.round(currentASP * 100) / 100, + asp_3m: Math.round(asp3m * 100) / 100, + asp_9m: Math.round(asp9m * 100) / 100, + asp_12m: Math.round(asp12m * 100) / 100, + asp_18m: Math.round(asp18m * 100) / 100, + price_floor: Math.round(priceFloor * 100) / 100, + months_to_floor: Math.max(0, monthsToFloor), + annual_decline_pct: Math.round(annualDecline * 100), + }, + buy_signal: { + signal: buySignal, + reason: signalReason, + }, + price_history: priceHistory.rows.slice(0, 12), + model: "Norton-Bass Multigenerational Diffusion v1", + }); + } catch (err) { + console.error("Forecast error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +/** + * GET /api/forecast + * + * Overview of all technology forecasts + */ +forecastRouter.get("/", async (_req, res) => { + try { + const currentYear = new Date().getFullYear(); + const results = TECH_GENERATIONS.map(tech => { + const hype = computeHypeCycle(tech, currentYear); + return { + technology: tech.name, + speed_gbps: tech.speedGbps, + form_factor: tech.formFactor, + phase: hype.phase, + adoption_pct: hype.adoptionPct, + position_pct: hype.positionPct, + }; + }); + + res.json({ technologies: results }); + } catch (err) { + res.status(500).json({ error: "Internal server error" }); + } +}); diff --git a/packages/api/src/routes/transport.ts b/packages/api/src/routes/transport.ts new file mode 100644 index 0000000..6c6cd32 --- /dev/null +++ b/packages/api/src/routes/transport.ts @@ -0,0 +1,233 @@ +/** + * WS3: Transport System Planner + * + * "Berlin to Darmstadt, 100G" → complete BOM with switches, fiber providers, Flexoptix transceivers + */ +import { Router } from "express"; +import { pool } from "../db/client"; + +export const transportRouter = Router(); + +// Haversine distance calculation +function haversineKm(lat1: number, lon1: number, lat2: number, lon2: number): number { + const R = 6371; + const dLat = (lat2 - lat1) * Math.PI / 180; + const dLon = (lon2 - lon1) * Math.PI / 180; + const a = Math.sin(dLat/2)**2 + Math.cos(lat1*Math.PI/180) * Math.cos(lat2*Math.PI/180) * Math.sin(dLon/2)**2; + return R * 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)); +} + +/** + * POST /api/transport/plan + * Body: { from, to, bandwidth_gbps, redundancy?, budget_preference? } + */ +transportRouter.post("/plan", async (req, res) => { + try { + const { from, to, bandwidth_gbps = 100, redundancy = false, budget_preference = "balanced" } = req.body; + + if (!from || !to) { + return res.status(400).json({ error: "Parameters 'from' and 'to' are required" }); + } + + // 1. Resolve cities + const cityA = await pool.query(`SELECT * FROM cities WHERE name ILIKE $1 LIMIT 1`, [from]); + const cityB = await pool.query(`SELECT * FROM cities WHERE name ILIKE $1 LIMIT 1`, [to]); + + if (!cityA.rows[0] || !cityB.rows[0]) { + const allCities = await pool.query(`SELECT name, country FROM cities ORDER BY name`); + return res.status(404).json({ + error: `City not found: ${!cityA.rows[0] ? from : to}`, + available_cities: allCities.rows.map(c => `${c.name} (${c.country})`), + }); + } + + const a = cityA.rows[0]; + const b = cityB.rows[0]; + + // 2. Calculate distance + const straightKm = haversineKm(parseFloat(a.lat), parseFloat(a.lon), parseFloat(b.lat), parseFloat(b.lon)); + const fiberKm = Math.round(straightKm * 1.4); // fiber route multiplier + + // 3. Determine transceiver requirements based on distance + const transceiverOptions = determineTransceiverOptions(fiberKm, bandwidth_gbps); + + // 4. Find fiber providers for this route + const providers = await pool.query( + `SELECT fp.name, fp.website, fp.type, fp.products, + fr.product_type, fr.monthly_price_eur, fr.setup_fee_eur, fr.min_contract_months + FROM fiber_routes fr + JOIN fiber_providers fp ON fr.provider_id = fp.id + WHERE (fr.city_a ILIKE $1 AND fr.city_b ILIKE $2) + OR (fr.city_a ILIKE $2 AND fr.city_b ILIKE $1) + OR (fr.city_a ILIKE $1 AND fr.city_b ILIKE 'Frankfurt%') + OR (fr.city_a ILIKE 'Frankfurt%' AND fr.city_b ILIKE $2) + ORDER BY fr.monthly_price_eur ASC NULLS LAST`, + [from, to] + ); + + // 5. Find matching switches + const switchOptions = await pool.query( + `SELECT sw.id, sw.model, sw.series, sw.max_speed_gbps, sw.switching_capacity_tbps, + sw.ports_config, sw.msrp_usd, v.name AS vendor + FROM switches sw JOIN vendors v ON sw.vendor_id = v.id + WHERE sw.max_speed_gbps >= $1 + AND sw.lifecycle_status NOT IN ('End-of-Life', 'End-of-Sale') + ORDER BY sw.msrp_usd ASC NULLS LAST, sw.max_speed_gbps DESC + LIMIT 10`, + [bandwidth_gbps] + ); + + // 6. Find Flexoptix transceivers for each option + const options = []; + for (const tcvrOpt of transceiverOptions) { + const flexoptix = await pool.query( + `SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.reach_meters, + t.fiber_type, t.connector, t.image_url, + (SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS price, + (SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS currency + FROM transceivers t + JOIN vendors v ON t.vendor_id = v.id + WHERE t.speed_gbps >= $1 + AND t.reach_meters >= $2 + AND t.fiber_type = 'SMF' + AND v.slug = 'flexoptix' + ORDER BY t.speed_gbps ASC, t.reach_meters ASC + LIMIT 5`, + [tcvrOpt.speed_gbps, tcvrOpt.min_reach_m] + ); + + // If no Flexoptix match, find any compatible transceiver + const anyMatch = flexoptix.rows.length > 0 ? flexoptix.rows : (await pool.query( + `SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.reach_meters, + t.fiber_type, t.connector, t.image_url, v.name AS vendor, + (SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS price + FROM transceivers t JOIN vendors v ON t.vendor_id = v.id + WHERE t.speed_gbps >= $1 AND t.reach_meters >= $2 AND t.fiber_type = 'SMF' + ORDER BY t.speed_gbps ASC LIMIT 5`, + [tcvrOpt.speed_gbps, tcvrOpt.min_reach_m] + )).rows; + + const spanCount = Math.ceil(fiberKm * 1000 / tcvrOpt.max_span_m); + const tcvrCount = redundancy ? spanCount * 4 : spanCount * 2; // 2 per span (both ends), x2 for redundancy + + const tcvrPrice = anyMatch[0]?.price ? parseFloat(anyMatch[0].price) : tcvrOpt.est_price_eur; + const totalTcvrCost = tcvrCount * tcvrPrice; + + options.push({ + name: tcvrOpt.name, + description: tcvrOpt.description, + transceiver: { + type: `${tcvrOpt.speed_gbps}G ${tcvrOpt.reach_label}`, + form_factor: tcvrOpt.form_factor, + spans_needed: spanCount, + units_needed: tcvrCount, + unit_price_est: tcvrPrice, + total_cost_est: totalTcvrCost, + flexoptix_products: anyMatch.map(m => ({ + slug: m.slug, + speed: m.speed_gbps + 'G', + reach: m.reach_label, + price: m.price ? parseFloat(m.price) : null, + buy_url: `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(m.form_factor + ' ' + m.speed_gbps + 'G ' + m.reach_label)}`, + })), + }, + switches: switchOptions.rows.slice(0, 3).map(sw => ({ + model: sw.model, + vendor: sw.vendor, + max_speed: sw.max_speed_gbps + 'G', + price_est: sw.msrp_usd ? parseFloat(sw.msrp_usd) : null, + })), + fiber_providers: providers.rows.length > 0 ? providers.rows : [ + { name: "Contact local fiber providers", note: `No pre-seeded routes for ${from}↔${to}. Check euNetworks, Telia, DTAG.` } + ], + }); + } + + res.json({ + route: { + from: a.name, + to: b.name, + straight_line_km: Math.round(straightKm), + estimated_fiber_km: fiberKm, + bandwidth_requested: bandwidth_gbps + 'G', + redundancy, + }, + options, + note: "Prices are estimates. Contact Flexoptix sales for volume pricing.", + }); + } catch (err) { + console.error("Transport planner error:", err); + res.status(500).json({ error: "Internal server error" }); + } +}); + +function determineTransceiverOptions(fiberKm: number, bandwidthGbps: number) { + const options = []; + + if (fiberKm <= 2) { + options.push({ + name: `${bandwidthGbps}G FR (2km)`, + description: `Short reach — single span, no amplification needed`, + speed_gbps: bandwidthGbps, reach_label: 'FR', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28', + min_reach_m: 2000, max_span_m: 2000, est_price_eur: bandwidthGbps >= 400 ? 200 : 80, + }); + } + if (fiberKm <= 10) { + options.push({ + name: `${bandwidthGbps}G LR4 (10km)`, + description: `Metro reach — ${Math.ceil(fiberKm / 10)} span(s)`, + speed_gbps: bandwidthGbps, reach_label: 'LR4', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28', + min_reach_m: 10000, max_span_m: 10000, est_price_eur: bandwidthGbps >= 400 ? 400 : 120, + }); + } + if (fiberKm <= 40) { + options.push({ + name: `${bandwidthGbps}G ER4 (40km)`, + description: `Extended reach — ${Math.ceil(fiberKm / 40)} span(s)`, + speed_gbps: bandwidthGbps, reach_label: 'ER4', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28', + min_reach_m: 40000, max_span_m: 40000, est_price_eur: bandwidthGbps >= 400 ? 1500 : 400, + }); + } + // ZR is always an option for long distances + if (fiberKm > 10) { + options.push({ + name: `${Math.min(bandwidthGbps, 400)}G ZR Coherent (80km/span)`, + description: `Coherent DWDM — ${Math.ceil(fiberKm / 80)} span(s), OIF 400ZR`, + speed_gbps: Math.min(bandwidthGbps, 400), reach_label: 'ZR', form_factor: 'QSFP-DD', + min_reach_m: 80000, max_span_m: 80000, est_price_eur: 2500, + }); + } + // Carrier wavelength option + options.push({ + name: `Carrier Wavelength Service (${bandwidthGbps}G)`, + description: `Managed service — provider handles fiber + amplification. You only need LR4 transceivers at each end.`, + speed_gbps: bandwidthGbps, reach_label: 'LR4', form_factor: bandwidthGbps >= 400 ? 'QSFP-DD' : 'QSFP28', + min_reach_m: 10000, max_span_m: 999000, est_price_eur: bandwidthGbps >= 400 ? 400 : 120, + }); + + return options; +} + +/** + * GET /api/transport/cities + */ +transportRouter.get("/cities", async (_req, res) => { + try { + const result = await pool.query(`SELECT name, country, has_ix, ix_names, has_datacenter FROM cities ORDER BY name`); + res.json({ cities: result.rows, total: result.rowCount }); + } catch (err) { + res.status(500).json({ error: "Internal server error" }); + } +}); + +/** + * GET /api/transport/providers + */ +transportRouter.get("/providers", async (_req, res) => { + try { + const result = await pool.query(`SELECT * FROM fiber_providers ORDER BY name`); + res.json({ providers: result.rows, total: result.rowCount }); + } catch (err) { + res.status(500).json({ error: "Internal server error" }); + } +}); diff --git a/packages/mcp-server/src/tools/finder.ts b/packages/mcp-server/src/tools/finder.ts new file mode 100644 index 0000000..de1b58b --- /dev/null +++ b/packages/mcp-server/src/tools/finder.ts @@ -0,0 +1,175 @@ +/** + * MCP Tool: find_flexoptix_for_switch + * + * "Customer has Switch X — which Flexoptix transceivers should they buy?" + */ +import { pool } from "../db"; + +export const finderTools = { + find_flexoptix_for_switch: { + name: "find_flexoptix_for_switch", + description: "Find the right Flexoptix transceivers for a customer's switch. Input a switch model name and get compatible Flexoptix products with prices, shop links, and FlexBox coding info.", + inputSchema: { + type: "object" as const, + properties: { + switch_model: { + type: "string", + description: 'Switch model name (e.g., "Cisco Nexus 93180YC-FX3", "QFX5120-48Y", "DCS-7050SX3-48YC12")', + }, + speed_gbps: { + type: "number", + description: "Filter by port speed in Gbps (10, 25, 40, 100, 400)", + }, + reach: { + type: "string", + description: "Filter by reach (SR, LR, ER, ZR, or specific like 10km, 80km)", + }, + }, + required: ["switch_model"], + }, + }, + + plan_transport: { + name: "plan_transport", + description: "Plan a fiber transport system between two cities. Returns switch, transceiver, and fiber provider recommendations with bill of materials and Flexoptix pricing.", + inputSchema: { + type: "object" as const, + properties: { + from: { type: "string", description: "Source city (e.g., Berlin, Frankfurt, Amsterdam)" }, + to: { type: "string", description: "Destination city (e.g., Darmstadt, Munich, London)" }, + bandwidth_gbps: { type: "number", description: "Required bandwidth in Gbps (default: 100)" }, + redundancy: { type: "boolean", description: "Whether to include redundant path (default: false)" }, + }, + required: ["from", "to"], + }, + }, + + forecast_sales: { + name: "forecast_sales", + description: "Predict transceiver sales volume and price trajectory for a technology over 3/9/12/18 months. Includes buy/wait/hold signal.", + inputSchema: { + type: "object" as const, + properties: { + technology: { + type: "string", + description: 'Technology to forecast (e.g., "400G QSFP-DD", "100G QSFP28", "800G OSFP", "1.6T OSFP-XD")', + }, + }, + required: ["technology"], + }, + }, + + get_competitor_alerts: { + name: "get_competitor_alerts", + description: "Get recent competitor intelligence: new products, price changes, stock changes. Shows what competitors are doing in the market.", + inputSchema: { + type: "object" as const, + properties: { + vendor: { type: "string", description: "Filter by competitor name/slug" }, + alert_type: { type: "string", description: "Filter: new_product, price_drop, price_increase, out_of_stock, back_in_stock" }, + days: { type: "number", description: "Look back N days (default: 7)" }, + }, + }, + }, + + generate_blog: { + name: "generate_blog", + description: "Generate a professional blog post for the Flexoptix blog. Auto-enriched with pricing data, competitor analysis, and product links.", + inputSchema: { + type: "object" as const, + properties: { + topic: { type: "string", description: "Blog topic or title" }, + type: { + type: "string", + description: "Blog type: market_alert, migration_guide, competitor_analysis, technology_deep_dive, buying_guide, tutorial, comparison", + }, + target_audience: { type: "string", description: "Audience: technical, sales, customer (default: technical)" }, + include_products: { type: "boolean", description: "Include Flexoptix product recommendations (default: true)" }, + word_count: { type: "number", description: "Target word count (default: 2000)" }, + }, + required: ["topic"], + }, + }, +}; + +export async function handleFinderTool(name: string, args: Record): Promise { + switch (name) { + case "find_flexoptix_for_switch": { + const { switch_model, speed_gbps, reach } = args; + + // Find switch + const sw = await pool.query( + `SELECT sw.id, sw.model, sw.series, sw.ports_config, sw.max_speed_gbps, v.name AS vendor + FROM switches sw JOIN vendors v ON sw.vendor_id = v.id + WHERE sw.model ILIKE '%' || $1 || '%' OR sw.search_vector @@ plainto_tsquery('english', $1) + ORDER BY CASE WHEN sw.model ILIKE $1 THEN 0 ELSE 1 END LIMIT 3`, + [switch_model] + ); + + if (!sw.rows[0]) { + return JSON.stringify({ error: `Switch "${switch_model}" not found. Try a partial model name.` }); + } + + // Find compatible transceivers with Flexoptix products + let sql = ` + SELECT t.form_factor, t.speed_gbps, t.reach_label, t.fiber_type, t.connector, + t.image_url, v.name AS vendor, c.firmware_min, + (SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS price, + (SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS currency + FROM compatibility c + JOIN transceivers t ON c.transceiver_id = t.id + JOIN vendors v ON t.vendor_id = v.id + WHERE c.switch_id = $1 AND c.status = 'compatible' + `; + const params: any[] = [sw.rows[0].id]; + let idx = 2; + if (speed_gbps) { sql += ` AND t.speed_gbps = $${idx}`; params.push(speed_gbps); idx++; } + if (reach) { sql += ` AND t.reach_label ILIKE $${idx}`; params.push(reach + '%'); idx++; } + sql += ` ORDER BY t.speed_gbps DESC, t.reach_meters ASC LIMIT 30`; + + const compat = await pool.query(sql, params); + + return JSON.stringify({ + switch: { model: sw.rows[0].model, vendor: sw.rows[0].vendor, ports: sw.rows[0].ports_config }, + compatible_count: compat.rowCount, + transceivers: compat.rows.map(r => ({ + ...r, + flexbox_note: "All Flexoptix transceivers support FlexBox coding — one transceiver works in any vendor's switch.", + buy_url: `https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(r.form_factor + ' ' + r.speed_gbps + 'G ' + r.reach_label)}`, + })), + }, null, 2); + } + + case "get_competitor_alerts": { + const { vendor, alert_type, days = 7 } = args; + let sql = `SELECT ca.alert_type, ca.severity, ca.part_number, ca.product_name, + ca.old_price, ca.new_price, ca.price_pct, ca.currency, ca.source_url, + v.name AS vendor, ca.created_at + FROM competitor_alerts ca LEFT JOIN vendors v ON ca.vendor_id = v.id + WHERE ca.created_at > NOW() - INTERVAL '1 day' * $1`; + const params: any[] = [days]; + let idx = 2; + if (vendor) { sql += ` AND v.slug ILIKE $${idx}`; params.push('%' + vendor + '%'); idx++; } + if (alert_type) { sql += ` AND ca.alert_type = $${idx}`; params.push(alert_type); idx++; } + sql += ` ORDER BY ca.created_at DESC LIMIT 30`; + + const result = await pool.query(sql, params); + return JSON.stringify({ alerts: result.rows, count: result.rowCount }, null, 2); + } + + case "plan_transport": + case "forecast_sales": + case "generate_blog": + // These forward to the API routes — return instruction to use HTTP API + return JSON.stringify({ + note: `Use the TIP HTTP API for ${name}. See https://transceiver-db.context-x.org/api for endpoints.`, + endpoint: name === "plan_transport" ? "POST /api/transport/plan" : + name === "forecast_sales" ? "GET /api/forecast/:technology" : + "POST /api/blog/generate", + args, + }); + + default: + return JSON.stringify({ error: `Unknown tool: ${name}` }); + } +} diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 6bb8fa1..a280b01 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -14,6 +14,22 @@ import PgBoss from "pg-boss"; import { config } from "dotenv"; import { join } from "path"; +import { rmSync, mkdirSync } from "fs"; + +/** Run a scraper with an isolated Crawlee storage directory to prevent queue collisions */ +async function withIsolatedStorage(name: string, fn: () => Promise): Promise { + const dir = join(__dirname, "..", "..", "..", `storage-${name}`); + mkdirSync(dir, { recursive: true }); + const prev = process.env.CRAWLEE_STORAGE_DIR; + process.env.CRAWLEE_STORAGE_DIR = dir; + try { + await fn(); + } finally { + process.env.CRAWLEE_STORAGE_DIR = prev ?? ""; + // Clean up after successful run + try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ } + } +} config({ path: join(__dirname, "..", "..", "..", ".env") }); @@ -46,6 +62,7 @@ export async function registerSchedules(boss: PgBoss): Promise { "scrape:pricing:atgbics", "scrape:pricing:prolabs", "scrape:compat:cisco", + "scrape:pricing:flexoptix", "scrape:vendors:flexoptix", "scrape:news", "scrape:faq", @@ -55,14 +72,16 @@ export async function registerSchedules(boss: PgBoss): Promise { await boss.createQueue(q).catch(() => { /* already exists */ }); } + // v0.2.0: Increased frequencies for permanent price monitoring (R-SCAN) + // FS.com pricing (every 4 hours — JS rendering is slow) await boss.schedule("scrape:pricing:fs", "0 */4 * * *", {}, { retryLimit: 2, expireInSeconds: 3600, }); - // Optcore pricing (every 6 hours — WP API enumeration + Playwright) - await boss.schedule("scrape:pricing:optcore", "0 */6 * * *", {}, { + // Optcore pricing (every 4 hours — was 6h) + await boss.schedule("scrape:pricing:optcore", "0 2/4 * * *", {}, { retryLimit: 2, expireInSeconds: 7200, }); @@ -97,12 +116,18 @@ export async function registerSchedules(boss: PgBoss): Promise { expireInSeconds: 3600, }); - // ProLabs pricing (every 8 hours — server-rendered HTML, USD prices) + // ProLabs pricing (every 8 hours — Playwright, needs proxy for CloudFront) await boss.schedule("scrape:pricing:prolabs", "0 4/8 * * *", {}, { retryLimit: 2, expireInSeconds: 3600, }); + // Flexoptix catalog (every 2 hours — fetch-based, fast — R-SCAN requirement) + await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { + retryLimit: 2, + expireInSeconds: 3600, + }); + // Flexoptix vendor list (weekly, Sunday at 6am — own data) await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, { retryLimit: 3, @@ -124,6 +149,7 @@ export async function registerWorkers(boss: PgBoss): Promise { const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); const { scrapeOptcore } = await import("./scrapers/optcore"); const { scrape10Gtek } = await import("./scrapers/tenGtek"); + const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog"); const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { scrapeNews } = await import("./scrapers/news"); const { scrapeAtgbics } = await import("./scrapers/atgbics"); @@ -131,22 +157,27 @@ export async function registerWorkers(boss: PgBoss): Promise { await boss.work("scrape:pricing:fs", async (_job) => { console.log(`[${new Date().toISOString()}] Running: FS.com pricing`); - await scrapeFs(); + await withIsolatedStorage("fs", scrapeFs); }); await boss.work("scrape:pricing:optcore", async (_job) => { console.log(`[${new Date().toISOString()}] Running: Optcore pricing`); - await scrapeOptcore(); + await withIsolatedStorage("optcore", scrapeOptcore); }); await boss.work("scrape:compat:cisco", async (_job) => { console.log(`[${new Date().toISOString()}] Running: Cisco TMG`); - await scrapeCiscoTmg(); + await withIsolatedStorage("cisco", scrapeCiscoTmg); }); await boss.work("scrape:pricing:10gtek", async (_job) => { console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`); - await scrape10Gtek(); + await withIsolatedStorage("10gtek", scrape10Gtek); + }); + + await boss.work("scrape:pricing:flexoptix", async (_job) => { + console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog pricing`); + await scrapeFlexoptixCatalog(); }); await boss.work("scrape:vendors:flexoptix", async (_job) => { @@ -161,12 +192,12 @@ export async function registerWorkers(boss: PgBoss): Promise { await boss.work("scrape:pricing:atgbics", async (_job) => { console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`); - await scrapeAtgbics(); + await withIsolatedStorage("atgbics", scrapeAtgbics); }); await boss.work("scrape:pricing:prolabs", async (_job) => { console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`); - await scrapeProLabs(); + await withIsolatedStorage("prolabs", scrapeProLabs); }); await boss.work("scrape:faq", async (_job) => { diff --git a/packages/scraper/src/scrapers/cisco-tmg.ts b/packages/scraper/src/scrapers/cisco-tmg.ts index 79cb6fd..dc1c5e1 100644 --- a/packages/scraper/src/scrapers/cisco-tmg.ts +++ b/packages/scraper/src/scrapers/cisco-tmg.ts @@ -1,27 +1,101 @@ /** * Cisco TMG Matrix Scraper — Transceiver Compatibility * - * Source: tmgmatrix.cisco.com + * Source: tmgmatrix.cisco.com (JSON API — no auth required) * Extracts: Switch model ↔ Transceiver compatibility data * Stores: switches, compatibility table * - * The TMG Matrix has a JSON API behind the scenes. + * Uses POST /public/api/networkdevice/search endpoint directly. */ -import { CheerioCrawler } from "crawlee"; import { pool, ensureVendor } from "../utils/db"; -const TMG_BASE = "https://tmgmatrix.cisco.com"; +const TMG_API = "https://tmgmatrix.cisco.com/public/api/networkdevice/search"; -interface TmgEntry { - switchModel: string; - switchSeries: string; - transceiverPid: string; - transceiverDescription: string; - speed: string; +interface TmgTransceiver { + tmgId: number; + productId: string; + productFamily: string; + formFactor: string; reach: string; + temperatureRange: string; cableType: string; - connector: string; - minSoftware: string; + media: string; + connectorType: string; + transmissionStandard: string; + dataRate: string; + endOfSale: string; + softReleaseMinVer: string; + breakoutMode: string; + osType: string; + domSupport: string; + type: string; +} + +interface TmgCompatEntry { + productId: string; // switch PID + transceivers: TmgTransceiver[]; +} + +interface TmgDevice { + productFamily: string; + networkAndTransceiverCompatibility: TmgCompatEntry[]; +} + +interface TmgSearchResponse { + totalCount: number; + filters: Array<{ name: string; values: Array<{ id: number; name: string; count: number }> }>; + networkDevices: TmgDevice[]; +} + +/** Key Nexus/Catalyst platform family IDs from the TMG API */ +const PLATFORM_FAMILIES = [ + { id: 74, name: "N9300" }, // Nexus 9300 — 8,515 entries + { id: 77, name: "N9500" }, // Nexus 9500 — 2,266 entries + { id: 78, name: "N9200" }, // Nexus 9200 — 708 entries + { id: 661, name: "N9800" }, // Nexus 9800 — 238 entries + { id: 76, name: "C9300" }, // Catalyst 9300 — 260 entries + { id: 601, name: "C9300L" }, // Catalyst 9300L — 720 entries + { id: 1181, name: "C9300X" }, // Catalyst 9300X — 413 entries + { id: 8, name: "C9500" }, // Catalyst 9500 — 1,141 entries + { id: 521, name: "C9600" }, // Catalyst 9600 — 771 entries + { id: 7, name: "C9400" }, // Catalyst 9400 — 561 entries + { id: 341, name: "C9200" }, // Catalyst 9200 — 222 entries + { id: 83, name: "ASR9000" }, // ASR 9000 — 3,644 entries +]; + +async function searchTmg(familyFilter: { id: number; name: string }): Promise { + const body = { + cableType: [], + dataRate: [], + formFactor: [], + reach: [], + searchInput: [""], + osType: [], + transceiverProductFamily: [], + transceiverProductID: [], + networkDeviceProductFamily: [familyFilter], + networkDeviceProductID: [], + media: [], + connectorType: [], + caseTemperature: [], + performanceMonitoring: [], + }; + + const res = await fetch(TMG_API, { + method: "POST", + headers: { + "Content-Type": "application/json", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + "Accept": "application/json", + }, + body: JSON.stringify(body), + }); + + if (!res.ok) { + throw new Error(`TMG API ${res.status}: ${res.statusText}`); + } + + return res.json() as Promise; } async function upsertCiscoSwitch(vendorId: string, model: string, series: string): Promise { @@ -38,18 +112,31 @@ async function upsertCiscoSwitch(vendorId: string, model: string, series: string async function upsertCompatibility( switchId: string, transceiverId: string, - firmwareMin: string + firmwareMin: string, + formFactor: string, + reach: string, + cableType: string, + media: string, + dataRate: string ): Promise { await pool.query( - `INSERT INTO compatibility (switch_id, transceiver_id, verified_by, verification_method, status, firmware_min, source_url) - VALUES ($1, $2, 'Cisco TMG Matrix', 'vendor_matrix', 'compatible', $3, $4) - ON CONFLICT (switch_id, transceiver_id) DO UPDATE SET firmware_min = EXCLUDED.firmware_min`, - [switchId, transceiverId, firmwareMin || null, TMG_BASE] + `INSERT INTO compatibility (switch_id, transceiver_id, verified_by, verification_method, status, firmware_min, source_url, notes) + VALUES ($1, $2, 'Cisco TMG Matrix', 'vendor_matrix', 'compatible', $3, $4, $5) + ON CONFLICT (switch_id, transceiver_id) DO UPDATE SET + firmware_min = EXCLUDED.firmware_min, + notes = EXCLUDED.notes`, + [ + switchId, + transceiverId, + firmwareMin || null, + "https://tmgmatrix.cisco.com", + `${formFactor} ${dataRate} ${reach} ${media} ${cableType}`.trim(), + ] ); } export async function scrapeCiscoTmg(): Promise { - console.log("=== Cisco TMG Matrix Scraper Starting ===\n"); + console.log("=== Cisco TMG Matrix Scraper Starting (API mode) ===\n"); const ciscoVendorId = await ensureVendor( "Cisco", @@ -58,90 +145,69 @@ export async function scrapeCiscoTmg(): Promise { undefined ); - const entries: TmgEntry[] = []; + let totalSwitches = 0; + let totalCompat = 0; + let totalTransceivers = 0; - // TMG Matrix uses a search API - // First, try the public HTML interface - const crawler = new CheerioCrawler({ - maxConcurrency: 1, - maxRequestsPerMinute: 10, // Very respectful — Cisco rate limits aggressively - - async requestHandler({ request, $, log }) { - log.info(`Scraping: ${request.url}`); - - // The TMG Matrix renders a table with compatibility data - $("table tbody tr, .matrix-row, [class*='result-row']").each((_i, el) => { - const $row = $(el); - const cells = $row.find("td").map((_j, td) => $(td).text().trim()).get(); - - if (cells.length >= 4) { - entries.push({ - switchModel: cells[0] || "", - switchSeries: cells[0]?.split(" ")[0] || "Nexus", - transceiverPid: cells[1] || "", - transceiverDescription: cells[2] || "", - speed: cells[3] || "", - reach: cells[4] || "", - cableType: cells[5] || "", - connector: cells[6] || "", - minSoftware: cells[7] || "", - }); - } - }); - }, - }); - - // Start with Nexus switches (most relevant for Flexoptix) - await crawler.run([ - `${TMG_BASE}/public/tmg?searchValue=Nexus+9000`, - `${TMG_BASE}/public/tmg?searchValue=Nexus+3000`, - `${TMG_BASE}/public/tmg?searchValue=Nexus+7000`, - `${TMG_BASE}/public/tmg?searchValue=Catalyst+9000`, - ]); - - console.log(`\nEntries found: ${entries.length}`); - - // Write to database - let switches = 0; - let compat = 0; - - for (const entry of entries) { - if (!entry.switchModel || !entry.transceiverPid) continue; + for (const family of PLATFORM_FAMILIES) { + console.log(`\nFetching ${family.name}...`); try { - const switchId = await upsertCiscoSwitch( - ciscoVendorId, - entry.switchModel, - entry.switchSeries - ); - switches++; + const data = await searchTmg(family); + console.log(` ${family.name}: ${data.totalCount} total entries, ${data.networkDevices.length} device groups`); - // Try to match transceiver in our DB - const txResult = await pool.query( - `SELECT id FROM transceivers - WHERE part_number = $1 - OR slug LIKE $2 - OR standard_name ILIKE $3 - LIMIT 1`, - [ - entry.transceiverPid, - `%${entry.transceiverPid.toLowerCase().replace(/[^a-z0-9]/g, "")}%`, - `%${entry.speed}%${entry.reach}%`, - ] - ); + for (const device of data.networkDevices) { + for (const compat of device.networkAndTransceiverCompatibility) { + if (!compat.productId) continue; - if (txResult.rows.length > 0) { - await upsertCompatibility(switchId, txResult.rows[0].id, entry.minSoftware); - compat++; + const switchId = await upsertCiscoSwitch( + ciscoVendorId, + compat.productId, + device.productFamily + ); + totalSwitches++; + + for (const tx of compat.transceivers) { + if (!tx.productId) continue; + totalTransceivers++; + + // Try to match transceiver in our DB by Cisco PID + const txResult = await pool.query( + `SELECT id FROM transceivers + WHERE part_number = $1 + OR part_number = $2 + LIMIT 1`, + [tx.productId, tx.productId.replace(/-S$/, "")] + ); + + if (txResult.rows.length > 0) { + await upsertCompatibility( + switchId, + txResult.rows[0].id, + tx.softReleaseMinVer, + tx.formFactor, + tx.reach, + tx.cableType, + tx.media, + tx.dataRate + ); + totalCompat++; + } + } + } } + + // Rate limit: 2 seconds between platform families + await new Promise((r) => setTimeout(r, 2000)); } catch (err) { - // Skip duplicates silently + console.error(` Error fetching ${family.name}:`, err); } } - console.log(`Switches upserted: ${switches}`); - console.log(`Compatibility entries: ${compat}`); - console.log("=== Cisco TMG Scraper Complete ===\n"); + console.log(`\n=== Cisco TMG Scraper Complete ===`); + console.log(` Switches upserted: ${totalSwitches}`); + console.log(` Transceiver entries scanned: ${totalTransceivers}`); + console.log(` Compatibility matches: ${totalCompat}\n`); } if (require.main === module) { diff --git a/packages/scraper/src/scrapers/fluxlight.ts b/packages/scraper/src/scrapers/fluxlight.ts index 684cd83..03004e5 100644 --- a/packages/scraper/src/scrapers/fluxlight.ts +++ b/packages/scraper/src/scrapers/fluxlight.ts @@ -1,7 +1,7 @@ /** * Fluxlight Scraper — US-based compatible transceiver vendor * - * fluxlight.com — BigCommerce, server-rendered HTML with real prices. + * www.fluxlight.com — BigCommerce, server-rendered HTML with real prices. * ~144+ products across 6 pages. Uses pagination via ?page=N. * * Rate limited: 1 req/2sec. @@ -91,8 +91,8 @@ function parseProductList(html: string): Product[] { const products: Product[] = []; // BigCommerce product card pattern: product link + price - // Pattern: Product Name ... $29.99 - const productRegex = /href="(https?:\/\/fluxlight\.com\/[^"]*-FL\/)"[^>]*>\s*([^<]{10,})<\/a>/gi; + // Pattern: Product Name ... $29.99 + const productRegex = /href="(https?:\/\/(?:www\.)?fluxlight\.com\/[^"]*-FL\/)"[^>]*>\s*([^<]{10,})<\/a>/gi; let match; while ((match = productRegex.exec(html)) !== null) { const url = match[1]; @@ -123,7 +123,7 @@ function parseProductList(html: string): Product[] { // Fallback: broader link pattern if (products.length === 0) { - const simpleRegex = /href="(https?:\/\/fluxlight\.com\/[^"]+)"[^>]*>([^<]{10,}(?:SFP|QSFP|XFP|Base)[^<]*)<\/a>/gi; + const simpleRegex = /href="(https?:\/\/(?:www\.)?fluxlight\.com\/[^"]+)"[^>]*>([^<]{10,}(?:SFP|QSFP|XFP|Base)[^<]*)<\/a>/gi; while ((match = simpleRegex.exec(html)) !== null) { const url = match[1]; const name = match[2].trim(); @@ -166,7 +166,7 @@ async function fetchPage(url: string): Promise { export async function scrapeFluxlight(): Promise { console.log("=== Fluxlight Scraper Starting ===\n"); - const vendorId = await ensureVendor("Fluxlight", "compatible", "https://fluxlight.com", "https://fluxlight.com/transceivers/"); + const vendorId = await ensureVendor("Fluxlight", "compatible", "https://fluxlight.com", "https://www.fluxlight.com/transceivers/"); let allProducts: Product[] = []; diff --git a/packages/scraper/src/scrapers/fs-com.ts b/packages/scraper/src/scrapers/fs-com.ts index 1bcf84f..87a1d4f 100644 --- a/packages/scraper/src/scrapers/fs-com.ts +++ b/packages/scraper/src/scrapers/fs-com.ts @@ -13,13 +13,18 @@ import { contentHash, parsePrice, parseStockLevel, parseQuantity } from "../util const BASE_URL = "https://www.fs.com"; const CATEGORY_URLS = [ - "/c/1g-sfp-modules-702", - "/c/10g-sfp-plus-modules-703", - "/c/25g-sfp28-modules-704", - "/c/40g-qsfp-plus-modules-705", - "/c/100g-qsfp28-modules-706", - "/c/400g-qsfp-dd-modules-3102", - "/c/800g-osfp-modules-3449", + "/c/1g-sfp-81", + "/c/10g-sfp-63", + "/c/25g-sfp28-3215", + "/c/40g-qsfp-1360", + "/c/100g-qsfp28-sfp-dd-1159", + "/c/200g-qsfp-dd-qsfp56-3542", + "/c/400g-osfp-qsfp112-qsfp-dd-3652", + "/c/800g-osfp-qsfp-dd-4089", + "/c/1.6t-osfp-5597", + "/c/400g-coherent-qsfp-dd-4103", + "/c/10g-cwdm-dwdm-sfp-65", + "/c/100g-dwdm-qsfp28-3863", ]; interface FsProduct { @@ -98,18 +103,30 @@ export async function scrapeFs(): Promise { headless: true, launchContext: { launchOptions: { - args: ["--disable-blink-features=AutomationControlled"], + args: ["--disable-blink-features=AutomationControlled", "--lang=en-US"], }, }, + preNavigationHooks: [ + async ({ page }) => { + await page.setExtraHTTPHeaders({ + "Accept-Language": "en-US,en;q=0.9", + }); + await page.context().addCookies([ + { name: "currency", value: "USD", domain: ".fs.com", path: "/" }, + { name: "lang", value: "en", domain: ".fs.com", path: "/" }, + { name: "country", value: "US", domain: ".fs.com", path: "/" }, + ]); + }, + ], + async requestHandler({ page, request, log }) { const url = request.url; log.info(`Scraping: ${url}`); - // Wait for product list to render - await page.waitForTimeout(3000); + // Wait for Vue.js product grid to render + await page.waitForTimeout(4000); - // Try multiple selectors — FS.com changes DOM frequently const productData = await page.evaluate(() => { const results: Array<{ name: string; @@ -119,65 +136,55 @@ export async function scrapeFs(): Promise { partNumber: string; }> = []; - // Strategy 1: Look for product links with prices nearby - const productLinks = document.querySelectorAll( - 'a[href*="/products/"], a[href*="/product/"], .product-item a, .o-list-product a, [class*="product"] a[href]' - ); + // Strategy 1: Parse .category__grid__item cards (2026 Vue.js DOM) + const gridItems = document.querySelectorAll(".category__grid__item"); + for (const item of gridItems) { + const link = item.querySelector('a[href*="/products/"]') as HTMLAnchorElement | null; + const img = item.querySelector("img"); + const priceEl = item.querySelector(".grid__price"); + const allText = item.textContent || ""; - for (const link of productLinks) { - const el = link as HTMLAnchorElement; - const name = el.textContent?.trim() || ""; - const href = el.getAttribute("href") || ""; + if (!link) continue; - if (!name || name.length < 5 || !href) continue; + const name = img?.getAttribute("alt")?.trim() || link.textContent?.trim() || ""; + const href = link.getAttribute("href") || ""; + const price = priceEl?.textContent?.trim() || ""; - // Find price in parent/sibling elements - const container = - el.closest('[class*="product"]') || - el.closest('[class*="item"]') || - el.closest("li") || - el.parentElement?.parentElement; + // Extract stock from text like "1914 in Global Warehouse" + const stockMatch = allText.match(/(\d+)\s+in\s+(?:Global\s+)?Warehouse/i); + const stock = stockMatch ? stockMatch[1] + " in stock" : ""; - let price = ""; - let stock = ""; + // Extract FS product ID from URL + const pnMatch = href.match(/products\/(\d+)\.html/); + const partNumber = pnMatch ? `FS-${pnMatch[1]}` : ""; - if (container) { - const priceEl = container.querySelector( - '[class*="price"], [class*="Price"], .o-price, span[data-price]' - ); - price = priceEl?.textContent?.trim() || ""; - - const stockEl = container.querySelector( - '[class*="stock"], [class*="Stock"], [class*="avail"], .o-stock' - ); - stock = stockEl?.textContent?.trim() || ""; - } - - // Extract part number from URL or text - const pn = href.split("/").pop()?.replace(".html", "")?.replace("#", "") || ""; - - if (name && (price || href.includes("/product"))) { - results.push({ name, href, price, stock, partNumber: pn }); + if (name && href) { + results.push({ name, href, price, stock, partNumber }); } } - // Strategy 2: Look for any element with $ or US$ price pattern + // Strategy 2: Fallback — look for product links with prices nearby if (results.length === 0) { - const allText = document.querySelectorAll("*"); - for (const el of allText) { - const text = el.textContent || ""; - if (/US?\$\s*\d+\.\d{2}/.test(text) && text.length < 200) { - const linkEl = el.closest("a") || el.querySelector("a"); - if (linkEl) { - results.push({ - name: linkEl.textContent?.trim() || text.slice(0, 100), - href: linkEl.getAttribute("href") || "", - price: text.match(/US?\$\s*[\d,.]+/)?.[0] || "", - stock: "", - partNumber: "", - }); - } + const productLinks = document.querySelectorAll( + 'a[href*="/products/"], a[href*="/product/"]' + ); + for (const link of productLinks) { + const el = link as HTMLAnchorElement; + const name = el.textContent?.trim() || ""; + const href = el.getAttribute("href") || ""; + if (!name || name.length < 5 || !href) continue; + + const container = el.closest('[class*="product"]') || el.closest('[class*="item"]') || el.closest("li") || el.parentElement?.parentElement; + let price = ""; + let stock = ""; + if (container) { + const priceEl = container.querySelector('[class*="price"]'); + price = priceEl?.textContent?.trim() || ""; + const stockEl = container.querySelector('[class*="stock"], [class*="avail"]'); + stock = stockEl?.textContent?.trim() || ""; } + const pn = href.split("/").pop()?.replace(".html", "")?.replace(/\?.*/, "") || ""; + if (name) results.push({ name, href, price, stock, partNumber: pn }); } } diff --git a/packages/scraper/src/scrapers/gbics.ts b/packages/scraper/src/scrapers/gbics.ts index 3bf34ab..238e1db 100644 --- a/packages/scraper/src/scrapers/gbics.ts +++ b/packages/scraper/src/scrapers/gbics.ts @@ -8,7 +8,7 @@ import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; import { contentHash } from "../utils/hash"; -const BASE = "https://gbics.com"; +const BASE = "https://www.gbics.com"; const HEADERS = { "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", Accept: "text/html,application/xhtml+xml", @@ -100,7 +100,7 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product // BigCommerce card-title pattern: // - const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/gbics\.com\/[^"]+)"\s+data-event-type="product-click"/gi; + const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*data-event-type="product-click"/gi; let match; while ((match = productRegex.exec(collapsed)) !== null) { const label = match[1].trim(); @@ -110,7 +110,14 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product // Split on last comma to separate name and price const priceInLabel = label.match(/,\s*£\s*([\d,.]+)\s*$/); const name = priceInLabel ? label.slice(0, label.lastIndexOf(",")).trim() : label; - const price = priceInLabel ? parseFloat(priceInLabel[1].replace(",", "")) : undefined; + let price = priceInLabel ? parseFloat(priceInLabel[1].replace(",", "")) : undefined; + + // Fallback: extract price from data-price-asc attribute on parent
  • + if (!price) { + const priceContext = collapsed.slice(Math.max(0, match.index - 500), match.index); + const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/); + if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]); + } if (name.length < 10) continue; @@ -131,7 +138,7 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product // Fallback: try "Now: £XX.XX" pattern near product links if (products.length === 0) { - const altRegex = /href="(https?:\/\/gbics\.com\/[^"]+)"[^>]*>\s*([^<]{15,})<\/a>/gi; + const altRegex = /href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*>\s*([^<]{15,})<\/a>/gi; while ((match = altRegex.exec(collapsed)) !== null) { const url = match[1]; const name = match[2].trim(); @@ -172,7 +179,7 @@ async function fetchPage(url: string): Promise { export async function scrapeGbics(): Promise { console.log("=== GBICS.com Scraper Starting ===\n"); - const vendorId = await ensureVendor("GBICS", "compatible", "https://gbics.com", "https://gbics.com/optical-transceivers/"); + const vendorId = await ensureVendor("GBICS", "compatible", "https://www.gbics.com", "https://www.gbics.com/optical-transceivers/"); let totalProducts = 0; let priceUpdates = 0; diff --git a/packages/scraper/src/scrapers/news.ts b/packages/scraper/src/scrapers/news.ts index 7d21080..c8ec19a 100644 --- a/packages/scraper/src/scrapers/news.ts +++ b/packages/scraper/src/scrapers/news.ts @@ -38,19 +38,14 @@ interface NewsArticle { const FEEDS: RssFeed[] = [ // === PRIMARY: Transceiver-specific === { - name: "Lightwave Online", - url: "https://www.lightwaveonline.com/rss", + name: "The Next Platform", + url: "https://www.nextplatform.com/feed/", category: "market_report", }, { - name: "Lightwave - Fiber Optics", - url: "https://www.lightwaveonline.com/fttx/rss", - category: "market_report", - }, - { - name: "Fierce Telecom", - url: "https://www.fiercetelecom.com/rss/xml", - category: "market_report", + name: "ServeTheHome", + url: "https://www.servethehome.com/feed/", + category: "product_launch", }, { name: "Optics.org", @@ -69,8 +64,8 @@ const FEEDS: RssFeed[] = [ category: "market_report", }, { - name: "SDxCentral", - url: "https://www.sdxcentral.com/feed/", + name: "The Register - Data Centre", + url: "https://www.theregister.com/data_centre/headlines.atom", category: "market_report", }, // === TERTIARY: General tech / photonics === diff --git a/packages/scraper/src/scrapers/prolabs.ts b/packages/scraper/src/scrapers/prolabs.ts index 125d90b..bf71d44 100644 --- a/packages/scraper/src/scrapers/prolabs.ts +++ b/packages/scraper/src/scrapers/prolabs.ts @@ -1,22 +1,29 @@ /** * ProLabs Scraper — Enterprise-grade compatible optics (Legrand subsidiary) * - * prolabs.com — Server-rendered HTML with public USD pricing. + * prolabs.com — CloudFront WAF aggressively blocks datacenter IPs. + * Uses PlaywrightCrawler with Firefox for anti-detection. + * + * KNOWN ISSUE: CloudFront blocks all requests from IONOS/datacenter IPs + * (HTTP 403 "Request blocked"). This scraper works correctly from + * residential IPs. Solutions: + * 1. Set PROXY_URL env var to a residential/rotating proxy + * 2. Run from a residential IP (e.g. home server) + * 3. Route through WireGuard with internet breakout at home + * * Products listed under /products/networking/fiber-optics/ category pages. - * Pagination via ?page=N. Rate limited: 1 req/2sec. Max 100 pages. + * Pagination via ?page=N. Rate limited: maxConcurrency 1, 10 req/min. * * SKU format examples: "Q-4X10G-LR-PR", "SFP-10G-SR-PR", "Q28-100G-LR4-PR" */ +import { PlaywrightCrawler, RequestQueue } from "crawlee"; +import { firefox } from "playwright"; import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; import { contentHash } from "../utils/hash"; const BASE = "https://www.prolabs.com"; -const HEADERS = { - "User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)", - Accept: "text/html,application/xhtml+xml", -}; - const MAX_PAGES = 100; +const PROXY_URL = process.env.PROXY_URL || ""; const CATEGORIES = [ { path: "/products/networking/fiber-optics/sfp-modules", formFactor: "SFP", speed: "1G", speedGbps: 1 }, @@ -26,7 +33,6 @@ const CATEGORIES = [ { path: "/products/networking/fiber-optics/qsfp28-modules", formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, { path: "/products/networking/fiber-optics/qsfp-dd-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, { path: "/products/networking/fiber-optics/coherent-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, - // Broad fallback category in case above paths differ on the live site { path: "/products/networking/fiber-optics", formFactor: "SFP+", speed: "10G", speedGbps: 10 }, ]; @@ -45,9 +51,9 @@ interface Product { wavelength?: string; } -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} +/* ------------------------------------------------------------------ */ +/* Helper / detection functions (unchanged from original) */ +/* ------------------------------------------------------------------ */ function detectReach(text: string): { label: string; meters: number } | undefined { const patterns: [RegExp, string, number][] = [ @@ -90,18 +96,6 @@ function detectWavelength(text: string): string { return match ? match[1] : ""; } -/** - * Infer form factor and speed from ProLabs SKU prefixes when category context - * is not specific enough (e.g. when crawling the broad fallback category). - * - * ProLabs SKU prefix conventions: - * Q- -> QSFP+ 40G - * Q28- -> QSFP28 100G - * QDD- -> QSFP-DD 400G - * SFP28- -> SFP28 25G - * SFP- -> SFP+ 10G (most common ProLabs prefix) - * S- -> SFP 1G - */ function inferFromSku(sku: string, cat: typeof CATEGORIES[number]): { formFactor: string; speed: string; @@ -116,121 +110,6 @@ function inferFromSku(sku: string, cat: typeof CATEGORIES[number]): { return { formFactor: cat.formFactor, speed: cat.speed, speedGbps: cat.speedGbps }; } -/** - * Parse product listings from a ProLabs category page. - * - * ProLabs uses a standard e-commerce layout: - * - Product cards with an link containing the product URL and name - * - Price in a span with "price" in class or as "$XX.XX" text nearby - * - SKU / part number in the URL slug - * - Stock badge: "In Stock" / "Out of Stock" / "Call for Availability" - * - * We parse with lightweight regex on collapsed HTML — same approach as gbics.ts - * and sfpcables.ts (no DOM parser dependency). - */ -function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product[] { - const products: Product[] = []; - const collapsed = html.replace(/\s+/g, " "); - - // Strategy 1: product cards with structured href containing a SKU-like segment - // Match anchor tags whose href is a deep product path ending in a SKU pattern - const productLinkRegex = /href="(\/products\/[^"]*?\/([A-Z0-9][A-Z0-9\-_]{3,}(?:-PR)?))"\s[^>]*>([^<]{10,})<\/a>/gi; - let match: RegExpExecArray | null; - - while ((match = productLinkRegex.exec(collapsed)) !== null) { - const relUrl = match[1]; - const skuFromUrl = match[2]; - const linkText = match[3].trim(); - - // Skip navigation / filter / pagination links - if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue; - if (linkText.length > 200) continue; - - const url = BASE + relUrl; - const partNumber = skuFromUrl.slice(0, 80); - const name = linkText.length > 10 ? linkText : partNumber; - - // Look for price in a 700-char window after the match position - const context = collapsed.slice(Math.max(0, match.index - 100), match.index + 700); - const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/) || - context.match(/price[^>]*>\s*\$?\s*([\d,]+\.?\d{0,2})/i); - const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined; - - const stockMatch = context.match(/(in[\s-]stock|out[\s-]of[\s-]stock|call for availability|available|backordered)/i); - const stockStatus = stockMatch ? stockMatch[1].toLowerCase() : undefined; - - const combined = name + " " + partNumber; - const reach = detectReach(combined); - const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat); - - products.push({ - partNumber, name, url, - price: price && price > 0 && price < 100000 ? price : undefined, - stockStatus, - formFactor, speed, speedGbps, - reachLabel: reach?.label, - reachMeters: reach?.meters, - fiberType: detectFiber(combined), - wavelength: detectWavelength(combined), - }); - } - - // Strategy 2: Fallback — any link to a /products/ URL that has a $ price nearby - if (products.length === 0) { - const altRegex = /href="(\/products\/[^"]{10,})"/gi; - while ((match = altRegex.exec(collapsed)) !== null) { - const relUrl = match[1]; - if (/category|filter|sort|page|breadcrumb/i.test(relUrl)) continue; - - const context = collapsed.slice(Math.max(0, match.index - 50), match.index + 800); - const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/); - if (!priceMatch) continue; - - const price = parseFloat(priceMatch[1].replace(",", "")); - const nameMatch = context.match(/<(?:h[23]|strong|span)[^>]*>([^<]{10,150})<\//i); - const name = nameMatch ? nameMatch[1].trim() : relUrl.split("/").pop() || ""; - const partNumber = (relUrl.split("/").pop() ?? name).slice(0, 80); - - const url = BASE + relUrl; - const combined = name + " " + partNumber; - const reach = detectReach(combined); - const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat); - - products.push({ - partNumber, name, url, - price: price > 0 && price < 100000 ? price : undefined, - formFactor, speed, speedGbps, - reachLabel: reach?.label, - reachMeters: reach?.meters, - fiberType: detectFiber(combined), - wavelength: detectWavelength(combined), - }); - } - } - - // Deduplicate by URL - const seen = new Set(); - return products.filter((p) => { - if (seen.has(p.url)) return false; - seen.add(p.url); - return true; - }); -} - -/** Check if the HTML contains a link to the next pagination page. */ -function hasNextPage(html: string, currentPage: number): boolean { - if (/rel="next"/i.test(html)) return true; - const nextPageNum = currentPage + 1; - const pattern = new RegExp(`[?&]page=${nextPageNum}`, "i"); - return pattern.test(html); -} - -async function fetchPage(url: string): Promise { - const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); - if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); - return resp.text(); -} - function normalizeStockLevel( raw?: string ): "in_stock" | "low_stock" | "out_of_stock" | "on_request" { @@ -242,8 +121,19 @@ function normalizeStockLevel( return "on_request"; } +/* ------------------------------------------------------------------ */ +/* Main scraper */ +/* ------------------------------------------------------------------ */ + export async function scrapeProLabs(): Promise { - console.log("=== ProLabs Scraper Starting ===\n"); + console.log("=== ProLabs Scraper Starting (PlaywrightCrawler + Firefox) ===\n"); + + if (PROXY_URL) { + console.log(`Using proxy: ${PROXY_URL.replace(/:[^:@]+@/, ":***@")}`); + } else { + console.log("WARNING: No PROXY_URL set. CloudFront WAF blocks datacenter IPs."); + console.log("Set PROXY_URL env var for residential proxy if running from VPS.\n"); + } const vendorId = await ensureVendor( "ProLabs", @@ -254,90 +144,334 @@ export async function scrapeProLabs(): Promise { let totalProducts = 0; let priceUpdates = 0; + let blockedPages = 0; const seenUrls = new Set(); + // Map URL -> category metadata + const urlToCat = new Map(); + + const requestQueue = await RequestQueue.open(); + for (const cat of CATEGORIES) { - console.log(`\n--- ${cat.formFactor} (${cat.speed}) [${cat.path}] ---`); + const url = `${BASE}${cat.path}`; + urlToCat.set(url, cat); + await requestQueue.addRequest({ url, userData: { page: 1, catPath: cat.path } }); + } - let page = 1; - let pagesThisCat = 0; - let productsThisCat = 0; + const crawler = new PlaywrightCrawler({ + requestQueue, + maxConcurrency: 1, + maxRequestsPerMinute: 10, + requestHandlerTimeoutSecs: 120, + navigationTimeoutSecs: 60, + maxRequestRetries: 2, + headless: true, + // Override default blockedStatusCodes (normally [401, 403, 429]). + // We allow 403 so our handler can inspect the page — CloudFront may + // serve a JS challenge that resolves, or we can log the block gracefully. + sessionPoolOptions: { + blockedStatusCodes: [401, 429], + }, + browserPoolOptions: { + useFingerprints: false, + }, + launchContext: { + launcher: firefox, + launchOptions: { + firefoxUserPrefs: { + "toolkit.telemetry.enabled": false, + "privacy.trackingprotection.enabled": false, + }, + }, + }, + ...(PROXY_URL ? { + proxyConfiguration: new (require("crawlee").ProxyConfiguration)({ + proxyUrls: [PROXY_URL], + }), + } : {}), + preNavigationHooks: [ + async ({ page }, goToOptions) => { + // Realistic viewport + await page.setViewportSize({ width: 1920, height: 1080 }); - while (page <= MAX_PAGES) { - const url = page === 1 - ? `${BASE}${cat.path}` - : `${BASE}${cat.path}?page=${page}`; + // Override webdriver detection + await page.addInitScript(() => { + Object.defineProperty(navigator, "webdriver", { get: () => false }); + }); - try { - const html = await fetchPage(url); - const pageProducts = parseProductList(html, cat); + if (goToOptions) { + goToOptions.waitUntil = "load"; + } + }, + ], - // Global dedup: broad fallback category overlaps with specific ones - const newProducts = pageProducts.filter((p) => !seenUrls.has(p.url)); - newProducts.forEach((p) => seenUrls.add(p.url)); + async requestHandler({ page, request, log }) { + const currentPage: number = request.userData?.page ?? 1; + const catPath: string = request.userData?.catPath ?? ""; - console.log(` Page ${page}: ${pageProducts.length} found, ${newProducts.length} new`); + const cat = urlToCat.get(request.url) ?? + CATEGORIES.find((c) => catPath === c.path) ?? + CATEGORIES[CATEGORIES.length - 1]; + urlToCat.set(request.url, cat); - for (const product of newProducts) { - try { - const txId = await findOrCreateScrapedTransceiver({ - partNumber: product.partNumber, - vendorId, - formFactor: product.formFactor, - speedGbps: product.speedGbps, - speed: product.speed, - reachMeters: product.reachMeters, - reachLabel: product.reachLabel, - fiberType: product.fiberType, - wavelengths: product.wavelength, - category: "DataCenter", - }); + log.info(`[${cat.formFactor} ${cat.speed}] Page ${currentPage}: ${request.url}`); - if (product.price && product.price > 0) { - const hash = contentHash({ - price: product.price, - part: product.partNumber, - stock: product.stockStatus ?? "", - }); - const updated = await upsertPriceObservation({ - transceiverId: txId, - sourceVendorId: vendorId, - price: product.price, - currency: "USD", - stockLevel: normalizeStockLevel(product.stockStatus), - url: product.url, - contentHash: hash, - }); - if (updated) priceUpdates++; + // Give JS challenges time to resolve + await page.waitForTimeout(8000); + + // Check what we actually got + const pageTitle = await page.title(); + const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 500) || ""); + log.info(` Title: "${pageTitle}"`); + + // Detect CloudFront WAF block + if (bodyText.includes("Request blocked") || + bodyText.includes("Access Denied") || + bodyText.includes("403 ERROR") || + pageTitle.includes("ERROR")) { + blockedPages++; + log.warning(` CloudFront WAF blocked this page (${blockedPages} total blocked)`); + if (blockedPages >= 3 && totalProducts === 0) { + log.warning(` Multiple blocks detected — likely IP-level block. Consider using PROXY_URL.`); + } + return; + } + + // Extract products via page.evaluate + const productData = await page.evaluate(() => { + const results: Array<{ + name: string; + href: string; + price: string; + stock: string; + partNumber: string; + }> = []; + + // Strategy 1: Product card links + const productLinks = document.querySelectorAll( + 'a[href*="/products/"], .product-card a, .product-item a, [class*="product"] a[href], .product-list a, .category-products a, [data-product] a' + ); + + for (const link of productLinks) { + const el = link as HTMLAnchorElement; + const name = el.textContent?.trim() || ""; + const href = el.getAttribute("href") || ""; + + if (!name || name.length < 5 || name.length > 200 || !href) continue; + if (/category|filter|sort|breadcrumb|login|cart|account/i.test(href) && !/products\//i.test(href)) continue; + + const container = + el.closest('[class*="product"]') || + el.closest('[class*="item"]') || + el.closest('[class*="card"]') || + el.closest("li") || + el.parentElement?.parentElement?.parentElement; + + let price = ""; + let stock = ""; + let pn = ""; + + if (container) { + const priceEl = container.querySelector( + '[class*="price"], [class*="Price"], [data-price], .price' + ); + price = priceEl?.textContent?.trim() || ""; + if (!price) { + const containerText = container.textContent || ""; + const priceMatch = containerText.match(/\$\s*[\d,]+\.?\d{0,2}/); + if (priceMatch) price = priceMatch[0]; } - productsThisCat++; - totalProducts++; - } catch (err) { - console.warn(` DB error [${product.partNumber}]: ${(err as Error).message.slice(0, 80)}`); + const stockEl = container.querySelector( + '[class*="stock"], [class*="Stock"], [class*="avail"], [class*="Avail"]' + ); + stock = stockEl?.textContent?.trim() || ""; + + const skuEl = container.querySelector( + '[class*="sku"], [class*="SKU"], [class*="part"], [class*="Part"], [class*="model"]' + ); + pn = skuEl?.textContent?.trim() || ""; + } + + if (!pn) { + pn = href.split("/").pop()?.replace(/\.html?$/, "")?.replace(/#.*$/, "") || ""; + } + + if (name && href.includes("/products/")) { + results.push({ name, href, price, stock, partNumber: pn }); } } - pagesThisCat++; + // Strategy 2: Scan deeper for anchors with product URLs + if (results.length === 0) { + const allAnchors = document.querySelectorAll("a[href*='/products/']"); + for (const el of allAnchors) { + const anchor = el as HTMLAnchorElement; + const href = anchor.getAttribute("href") || ""; + const name = anchor.textContent?.trim() || ""; + if (!name || name.length < 5) continue; - if (pageProducts.length === 0 || !hasNextPage(html, page)) break; + let parent: Element | null = anchor; + let price = ""; + for (let i = 0; i < 4 && parent; i++) { + parent = parent.parentElement; + if (parent) { + const text = parent.textContent || ""; + const m = text.match(/\$\s*[\d,]+\.?\d{0,2}/); + if (m) { price = m[0]; break; } + } + } - page++; - await sleep(2000); - } catch (err) { - console.error(` Page ${page} failed: ${(err as Error).message}`); - break; + const pn = href.split("/").pop()?.replace(/\.html?$/, "") || ""; + results.push({ name, href, price, stock: "", partNumber: pn }); + } + } + + // Strategy 3: JSON-LD structured data + const ldScripts = document.querySelectorAll('script[type="application/ld+json"]'); + for (const script of ldScripts) { + try { + const data = JSON.parse(script.textContent || ""); + const items = data.itemListElement || (Array.isArray(data) ? data : [data]); + for (const item of items) { + if (item["@type"] === "Product" || item.offers) { + const name = item.name || ""; + const href = item.url || ""; + const offers = item.offers || {}; + const price = offers.price ? `$${offers.price}` : ""; + const stock = offers.availability || ""; + const pn = item.sku || item.mpn || href.split("/").pop() || ""; + if (name) results.push({ name, href, price, stock, partNumber: pn }); + } + } + } catch { /* ignore parse errors */ } + } + + return results; + }); + + log.info(` Raw items extracted: ${productData.length}`); + + // Process extracted products + const pageProducts: Product[] = []; + + for (const item of productData) { + if (!item.name) continue; + + const partNumber = (item.partNumber || item.name).slice(0, 80).trim(); + const name = item.name.slice(0, 200).trim(); + const url = item.href.startsWith("http") ? item.href : `${BASE}${item.href}`; + + let price: number | undefined; + if (item.price) { + const cleaned = item.price.replace(/[^\d.,]/g, "").replace(",", ""); + const parsed = parseFloat(cleaned); + if (parsed > 0 && parsed < 100000) price = parsed; + } + + const combined = name + " " + partNumber; + const reach = detectReach(combined); + const { formFactor, speed, speedGbps } = inferFromSku(partNumber, cat); + + pageProducts.push({ + partNumber, name, url, price, + stockStatus: item.stock || undefined, + formFactor, speed, speedGbps, + reachLabel: reach?.label, + reachMeters: reach?.meters, + fiberType: detectFiber(combined), + wavelength: detectWavelength(combined), + }); } - } - console.log(` Category done: ${productsThisCat} products across ${pagesThisCat} page(s)`); + // Deduplicate against global set + const newProducts = pageProducts.filter((p) => !seenUrls.has(p.url)); + for (const p of newProducts) seenUrls.add(p.url); - if (cat !== CATEGORIES[CATEGORIES.length - 1]) { - await sleep(2000); - } + log.info(` Parsed: ${pageProducts.length} found, ${newProducts.length} new`); + + // Write to database + for (const product of newProducts) { + try { + const txId = await findOrCreateScrapedTransceiver({ + partNumber: product.partNumber, + vendorId, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + reachMeters: product.reachMeters, + reachLabel: product.reachLabel, + fiberType: product.fiberType, + wavelengths: product.wavelength, + category: "DataCenter", + }); + + if (product.price && product.price > 0) { + const hash = contentHash({ + price: product.price, + part: product.partNumber, + stock: product.stockStatus ?? "", + }); + const updated = await upsertPriceObservation({ + transceiverId: txId, + sourceVendorId: vendorId, + price: product.price, + currency: "USD", + stockLevel: normalizeStockLevel(product.stockStatus), + url: product.url, + contentHash: hash, + }); + if (updated) priceUpdates++; + } + + totalProducts++; + } catch (err) { + log.warning(` DB error [${product.partNumber}]: ${(err as Error).message.slice(0, 80)}`); + } + } + + // Check for next page + const hasNext = await page.evaluate((currentPageNum: number) => { + const nextLink = document.querySelector('a[rel="next"], link[rel="next"]'); + if (nextLink) return true; + const nextNum = currentPageNum + 1; + const paginationLinks = document.querySelectorAll('a[href*="page="], .pagination a, nav a'); + for (const link of paginationLinks) { + const href = (link as HTMLAnchorElement).getAttribute("href") || ""; + if (href.includes(`page=${nextNum}`)) return true; + const text = link.textContent?.trim() || ""; + if (text === String(nextNum) || text.toLowerCase() === "next" || text === "\u203a" || text === "\u00bb") return true; + } + return false; + }, currentPage); + + if (hasNext && currentPage < MAX_PAGES && newProducts.length > 0) { + const nextPageNum = currentPage + 1; + const nextUrl = `${BASE}${catPath}?page=${nextPageNum}`; + urlToCat.set(nextUrl, cat); + await requestQueue.addRequest({ + url: nextUrl, + userData: { page: nextPageNum, catPath }, + }); + log.info(` Enqueued next page: ${nextPageNum}`); + } + }, + + async failedRequestHandler({ request, log }) { + log.error(`Request failed after retries: ${request.url}`); + }, + }); + + await crawler.run(); + + console.log(`\n=== ProLabs Complete ===`); + console.log(` Products processed: ${totalProducts}`); + console.log(` Price updates: ${priceUpdates}`); + console.log(` Pages blocked by WAF: ${blockedPages}`); + if (blockedPages > 0 && totalProducts === 0) { + console.log(`\n All pages blocked by CloudFront WAF (datacenter IP detected).`); + console.log(` Fix: Set PROXY_URL=http://user:pass@proxy:port in .env`); } - - console.log(`\n=== ProLabs Complete: ${totalProducts} products processed, ${priceUpdates} price updates ===`); } if (require.main === module) { diff --git a/packages/scraper/src/utils/change-detector.ts b/packages/scraper/src/utils/change-detector.ts new file mode 100644 index 0000000..f4e9023 --- /dev/null +++ b/packages/scraper/src/utils/change-detector.ts @@ -0,0 +1,128 @@ +/** + * WS4: Competitor Change Detection + * + * Compares current scrape results with previous observations + * and generates alerts for price changes, new products, stock changes. + */ +import { Pool } from "pg"; + +const pool = new Pool({ + host: process.env.POSTGRES_HOST || "localhost", + port: parseInt(process.env.POSTGRES_PORT || "5433"), + database: process.env.POSTGRES_DB || "transceiver_db", + user: process.env.POSTGRES_USER || "tip", + password: process.env.POSTGRES_PASSWORD || "tip_dev_2026", + max: 3, +}); + +interface PriceObservation { + transceiver_id: string; + vendor_id: string; + price: number; + currency: string; + stock_level?: string; + part_number?: string; + product_name?: string; + form_factor?: string; + speed_gbps?: number; + source_url?: string; +} + +/** + * After a scraper run, call this to detect changes and generate alerts. + */ +export async function detectChanges( + vendorId: string, + currentObservations: PriceObservation[] +): Promise<{ alerts: number; priceChanges: number; newProducts: number }> { + let alerts = 0; + let priceChanges = 0; + let newProducts = 0; + + for (const obs of currentObservations) { + try { + // Get last known price for this transceiver from this vendor + const prev = await pool.query( + `SELECT price, currency, stock_level + FROM price_observations + WHERE transceiver_id = $1 AND source_vendor_id = $2 + ORDER BY time DESC LIMIT 1`, + [obs.transceiver_id, obs.vendor_id] + ); + + if (prev.rows.length === 0) { + // New product alert + await pool.query( + `INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity, + new_price, currency, part_number, product_name, form_factor, speed_gbps, source_url) + VALUES ($1, $2, 'new_product', 'medium', $3, $4, $5, $6, $7, $8, $9)`, + [obs.vendor_id, obs.transceiver_id, obs.price, obs.currency, + obs.part_number, obs.product_name, obs.form_factor, obs.speed_gbps, obs.source_url] + ); + newProducts++; + alerts++; + continue; + } + + const prevPrice = parseFloat(prev.rows[0].price); + const prevStock = prev.rows[0].stock_level; + + // Price change detection (>2% threshold to avoid noise) + if (Math.abs(obs.price - prevPrice) / prevPrice > 0.02) { + const delta = obs.price - prevPrice; + const deltaPct = (delta / prevPrice) * 100; + const alertType = delta < 0 ? 'price_drop' : 'price_increase'; + const severity = Math.abs(deltaPct) > 15 ? 'high' : Math.abs(deltaPct) > 5 ? 'medium' : 'low'; + + // Insert alert + await pool.query( + `INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity, + old_price, new_price, price_delta, price_pct, currency, + part_number, product_name, form_factor, speed_gbps, source_url) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`, + [obs.vendor_id, obs.transceiver_id, alertType, severity, + prevPrice, obs.price, delta, deltaPct, obs.currency, + obs.part_number, obs.product_name, obs.form_factor, obs.speed_gbps, obs.source_url] + ); + + // Insert price change record + await pool.query( + `INSERT INTO price_changes (transceiver_id, vendor_id, old_price, new_price, delta, delta_pct, currency) + VALUES ($1, $2, $3, $4, $5, $6, $7)`, + [obs.transceiver_id, obs.vendor_id, prevPrice, obs.price, delta, deltaPct, obs.currency] + ); + + priceChanges++; + alerts++; + } + + // Stock change detection + if (prevStock && obs.stock_level && prevStock !== obs.stock_level) { + if (obs.stock_level === 'out_of_stock' && prevStock !== 'out_of_stock') { + await pool.query( + `INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity, + part_number, product_name, form_factor, speed_gbps, source_url) + VALUES ($1, $2, 'out_of_stock', 'low', $3, $4, $5, $6, $7)`, + [obs.vendor_id, obs.transceiver_id, obs.part_number, obs.product_name, + obs.form_factor, obs.speed_gbps, obs.source_url] + ); + alerts++; + } else if (prevStock === 'out_of_stock' && obs.stock_level !== 'out_of_stock') { + await pool.query( + `INSERT INTO competitor_alerts (vendor_id, transceiver_id, alert_type, severity, + new_price, currency, part_number, product_name, form_factor, speed_gbps, source_url) + VALUES ($1, $2, 'back_in_stock', 'low', $3, $4, $5, $6, $7, $8, $9)`, + [obs.vendor_id, obs.transceiver_id, obs.price, obs.currency, + obs.part_number, obs.product_name, obs.form_factor, obs.speed_gbps, obs.source_url] + ); + alerts++; + } + } + } catch (err) { + console.error(`Change detection error for ${obs.part_number}:`, err); + } + } + + console.log(`Change detection: ${alerts} alerts (${priceChanges} price changes, ${newProducts} new products)`); + return { alerts, priceChanges, newProducts }; +} diff --git a/packages/scraper/src/utils/image-downloader.ts b/packages/scraper/src/utils/image-downloader.ts new file mode 100644 index 0000000..2db3f49 --- /dev/null +++ b/packages/scraper/src/utils/image-downloader.ts @@ -0,0 +1,154 @@ +/** + * WS0: Image Downloader + * + * Downloads product images from various sources, resizes, and stores metadata. + * R2 upload is optional — for now stores image URLs and marks has_image. + */ +import { Pool } from "pg"; +import { createHash } from "crypto"; + +const pool = new Pool({ + host: process.env.POSTGRES_HOST || "localhost", + port: parseInt(process.env.POSTGRES_PORT || "5433"), + database: process.env.POSTGRES_DB || "transceiver_db", + user: process.env.POSTGRES_USER || "tip", + password: process.env.POSTGRES_PASSWORD || "tip_dev_2026", + max: 3, +}); + +/** + * Update image URL for a transceiver and mark has_image = true + */ +export async function setTransceiverImage( + transceiverId: string, + imageUrl: string, + source?: string +): Promise { + await pool.query( + `UPDATE transceivers SET image_url = $2, has_image = true, image_scraped_at = NOW() + WHERE id = $1 AND (image_url IS NULL OR image_url = '')`, + [transceiverId, imageUrl] + ); +} + +/** + * Update image URL for a switch + */ +export async function setSwitchImage( + switchId: string, + imageUrl: string +): Promise { + await pool.query( + `UPDATE switches SET image_url = $2, has_image = true + WHERE id = $1 AND (image_url IS NULL OR image_url = '')`, + [switchId, imageUrl] + ); +} + +/** + * Get products without images for backfill + */ +export async function getProductsWithoutImages(limit = 100): Promise> { + const result = await pool.query( + `SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.part_number, + v.name AS vendor_name + FROM transceivers t + LEFT JOIN vendors v ON t.vendor_id = v.id + WHERE (t.has_image = false OR t.has_image IS NULL) + AND t.image_url IS NULL + ORDER BY t.speed_gbps DESC + LIMIT $1`, + [limit] + ); + return result.rows; +} + +/** + * Generate a search URL to find product images + */ +export function buildImageSearchUrls(product: { + form_factor: string; + speed_gbps: number; + reach_label: string; + part_number?: string; + vendor_name?: string; +}): string[] { + const urls: string[] = []; + const q = `${product.form_factor} ${product.speed_gbps}G ${product.reach_label} transceiver`; + + // Flexoptix store + urls.push(`https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(q)}`); + + // FS.com + urls.push(`https://www.fs.com/search/${encodeURIComponent(q)}.html`); + + // If we have a part number, try vendor-specific + if (product.part_number) { + urls.push(`https://www.fs.com/search/${encodeURIComponent(product.part_number)}.html`); + } + + return urls; +} + +/** + * Get image coverage statistics + */ +export async function getImageCoverageStats(): Promise<{ + total: number; + with_image: number; + without_image: number; + coverage_pct: number; +}> { + const result = await pool.query(` + SELECT + COUNT(*) AS total, + COUNT(*) FILTER (WHERE has_image = true) AS with_image, + COUNT(*) FILTER (WHERE has_image = false OR has_image IS NULL) AS without_image + FROM transceivers + `); + const row = result.rows[0]; + const total = parseInt(row.total); + const withImg = parseInt(row.with_image); + return { + total, + with_image: withImg, + without_image: parseInt(row.without_image), + coverage_pct: total > 0 ? Math.round((withImg / total) * 10000) / 100 : 0, + }; +} + +/** + * Get price coverage statistics + */ +export async function getPriceCoverageStats(): Promise<{ + total: number; + with_recent_price: number; + without_recent_price: number; + coverage_pct: number; +}> { + const result = await pool.query(` + SELECT + COUNT(*) AS total, + COUNT(*) FILTER (WHERE EXISTS ( + SELECT 1 FROM price_observations po WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days' + )) AS with_price + FROM transceivers t + `); + const row = result.rows[0]; + const total = parseInt(row.total); + const withPrice = parseInt(row.with_price); + return { + total, + with_recent_price: withPrice, + without_recent_price: total - withPrice, + coverage_pct: total > 0 ? Math.round((withPrice / total) * 10000) / 100 : 0, + }; +} diff --git a/sql/013-v020-sales-intelligence.sql b/sql/013-v020-sales-intelligence.sql new file mode 100644 index 0000000..12b0b8e --- /dev/null +++ b/sql/013-v020-sales-intelligence.sql @@ -0,0 +1,347 @@ +-- Migration 013: v0.2.0 Sales Intelligence Engine +-- Adds: competitor_alerts, price_changes, image tracking, finder views, blog_posts_v2, forecast tables + +-- ============================================================ +-- IMAGE TRACKING (WS0) +-- ============================================================ + +-- Add image columns if not exist +DO $$ BEGIN + ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_url TEXT; + ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_r2_key TEXT; + ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_thumb_r2_key TEXT; + ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS image_scraped_at TIMESTAMPTZ; + ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS has_image BOOLEAN DEFAULT FALSE; +EXCEPTION WHEN duplicate_column THEN NULL; +END $$; + +DO $$ BEGIN + ALTER TABLE switches ADD COLUMN IF NOT EXISTS image_thumb_r2_key TEXT; + ALTER TABLE switches ADD COLUMN IF NOT EXISTS has_image BOOLEAN DEFAULT FALSE; +EXCEPTION WHEN duplicate_column THEN NULL; +END $$; + +CREATE INDEX IF NOT EXISTS idx_transceivers_has_image ON transceivers(has_image) WHERE has_image = false; + +-- ============================================================ +-- PRICE COVERAGE (WS0b) +-- ============================================================ + +-- View: products missing recent prices +CREATE OR REPLACE VIEW v_price_coverage AS +SELECT + t.id, + t.slug, + t.form_factor, + t.speed_gbps, + t.reach_label, + v.name AS vendor_name, + (SELECT MAX(po.time) FROM price_observations po WHERE po.transceiver_id = t.id) AS last_price_at, + (SELECT COUNT(*) FROM price_observations po WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days') AS recent_price_count, + CASE + WHEN (SELECT COUNT(*) FROM price_observations po WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days') > 0 THEN TRUE + ELSE FALSE + END AS has_recent_price +FROM transceivers t +LEFT JOIN vendors v ON t.vendor_id = v.id +ORDER BY has_recent_price ASC, t.speed_gbps DESC; + +-- View: image coverage +CREATE OR REPLACE VIEW v_image_coverage AS +SELECT + t.id, + t.slug, + t.form_factor, + t.speed_gbps, + t.image_url, + t.image_r2_key, + t.has_image, + v.name AS vendor_name +FROM transceivers t +LEFT JOIN vendors v ON t.vendor_id = v.id +ORDER BY t.has_image ASC, t.speed_gbps DESC; + +-- ============================================================ +-- COMPETITOR INTELLIGENCE (WS4) +-- ============================================================ + +CREATE TABLE IF NOT EXISTS competitor_alerts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + vendor_id UUID REFERENCES vendors(id), + transceiver_id UUID REFERENCES transceivers(id), + + alert_type TEXT NOT NULL CHECK (alert_type IN ( + 'new_product', 'price_drop', 'price_increase', 'out_of_stock', + 'back_in_stock', 'discontinued', 'new_vendor' + )), + severity TEXT DEFAULT 'info' CHECK (severity IN ('critical', 'high', 'medium', 'low', 'info')), + + -- Price change details + old_price NUMERIC, + new_price NUMERIC, + price_delta NUMERIC, -- absolute change + price_pct NUMERIC, -- percentage change + currency TEXT DEFAULT 'USD', + + -- Product details + part_number TEXT, + product_name TEXT, + form_factor TEXT, + speed_gbps NUMERIC, + source_url TEXT, + + -- Status + acknowledged BOOLEAN DEFAULT FALSE, + notes TEXT, + + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_competitor_alerts_type ON competitor_alerts(alert_type); +CREATE INDEX IF NOT EXISTS idx_competitor_alerts_vendor ON competitor_alerts(vendor_id); +CREATE INDEX IF NOT EXISTS idx_competitor_alerts_created ON competitor_alerts(created_at DESC); +CREATE INDEX IF NOT EXISTS idx_competitor_alerts_unack ON competitor_alerts(acknowledged) WHERE acknowledged = FALSE; +CREATE INDEX IF NOT EXISTS idx_competitor_alerts_severity ON competitor_alerts(severity); + +-- Price change history (deduplicated, one row per actual change) +CREATE TABLE IF NOT EXISTS price_changes ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + transceiver_id UUID REFERENCES transceivers(id), + vendor_id UUID REFERENCES vendors(id), + + old_price NUMERIC NOT NULL, + new_price NUMERIC NOT NULL, + delta NUMERIC NOT NULL, -- new - old + delta_pct NUMERIC NOT NULL, -- ((new-old)/old) * 100 + currency TEXT DEFAULT 'USD', + + detected_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_price_changes_transceiver ON price_changes(transceiver_id, detected_at DESC); +CREATE INDEX IF NOT EXISTS idx_price_changes_vendor ON price_changes(vendor_id, detected_at DESC); +CREATE INDEX IF NOT EXISTS idx_price_changes_detected ON price_changes(detected_at DESC); + +-- ============================================================ +-- FINDER: FLEXOPTIX PRODUCT MAPPING (WS1) +-- ============================================================ + +-- Map OEM part numbers to Flexoptix products +CREATE TABLE IF NOT EXISTS flexoptix_product_map ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + oem_part_number TEXT NOT NULL, + oem_vendor TEXT NOT NULL, + flexoptix_sku TEXT, + flexoptix_url TEXT, + flexoptix_price_eur NUMERIC, + form_factor TEXT, + speed_gbps NUMERIC, + reach_label TEXT, + fiber_type TEXT, + match_type TEXT DEFAULT 'exact' CHECK (match_type IN ('exact', 'equivalent', 'compatible', 'suggested')), + last_verified TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(oem_part_number, oem_vendor) +); + +CREATE INDEX IF NOT EXISTS idx_flexoptix_map_oem ON flexoptix_product_map(oem_part_number); +CREATE INDEX IF NOT EXISTS idx_flexoptix_map_vendor ON flexoptix_product_map(oem_vendor); +CREATE INDEX IF NOT EXISTS idx_flexoptix_map_ff ON flexoptix_product_map(form_factor, speed_gbps); + +-- Finder view: switch → compatible Flexoptix products +CREATE OR REPLACE VIEW v_switch_flexoptix_finder AS +SELECT + sw.id AS switch_id, + sw.model AS switch_model, + sw.series AS switch_series, + sv.name AS switch_vendor, + c.status AS compat_status, + c.firmware_min, + c.notes AS compat_notes, + t.id AS transceiver_id, + t.slug AS transceiver_slug, + t.form_factor, + t.speed_gbps, + t.reach_label, + t.fiber_type, + t.wavelengths, + t.connector, + t.image_url AS transceiver_image, + fpm.flexoptix_sku, + fpm.flexoptix_url, + fpm.flexoptix_price_eur, + fpm.match_type, + (SELECT po.price FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_price, + (SELECT po.currency FROM price_observations po WHERE po.transceiver_id = t.id ORDER BY po.time DESC LIMIT 1) AS latest_currency +FROM switches sw +JOIN vendors sv ON sw.vendor_id = sv.id +JOIN compatibility c ON c.switch_id = sw.id AND c.status = 'compatible' +JOIN transceivers t ON c.transceiver_id = t.id +LEFT JOIN flexoptix_product_map fpm ON ( + fpm.form_factor = t.form_factor + AND fpm.speed_gbps = t.speed_gbps + AND fpm.reach_label = t.reach_label +); + +-- ============================================================ +-- BLOG ENGINE v2 (WS8) +-- ============================================================ + +CREATE TABLE IF NOT EXISTS blog_series ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + title TEXT NOT NULL, + slug TEXT NOT NULL UNIQUE, + description TEXT, + total_parts INTEGER DEFAULT 1, + status TEXT DEFAULT 'active' CHECK (status IN ('active', 'completed', 'paused')), + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Add v2 columns to existing blog_drafts if they exist +DO $$ BEGIN + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS series_id UUID REFERENCES blog_series(id); + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS series_part INTEGER; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_title TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_description TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_slug TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_focus_keyword TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS seo_score INTEGER; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS readability_score NUMERIC; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS hero_image_url TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS hero_image_r2_key TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS related_products UUID[]; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS related_switches UUID[]; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS competitor_data JSONB; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS pricing_data JSONB; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS export_markdown TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS export_html TEXT; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS published_at TIMESTAMPTZ; + ALTER TABLE blog_drafts ADD COLUMN IF NOT EXISTS scheduled_at TIMESTAMPTZ; +EXCEPTION WHEN duplicate_column THEN NULL; +END $$; + +-- ============================================================ +-- SALES FORECAST (WS5) +-- ============================================================ + +CREATE TABLE IF NOT EXISTS sales_forecasts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + technology TEXT NOT NULL, -- "400G QSFP-DD", "100G QSFP28", etc. + speed_gbps NUMERIC, + form_factor TEXT, + + -- Forecast periods + forecast_3m_units INTEGER, + forecast_3m_revenue NUMERIC, + forecast_9m_units INTEGER, + forecast_9m_revenue NUMERIC, + forecast_12m_units INTEGER, + forecast_12m_revenue NUMERIC, + forecast_18m_units INTEGER, + forecast_18m_revenue NUMERIC, + + -- Price trajectory + current_asp NUMERIC, + asp_3m NUMERIC, + asp_12m NUMERIC, + price_floor NUMERIC, + months_to_floor INTEGER, + + -- Confidence + confidence_3m NUMERIC, + confidence_9m NUMERIC, + confidence_12m NUMERIC, + confidence_18m NUMERIC, + + -- Buy signal + buy_signal TEXT CHECK (buy_signal IN ('BUY_NOW', 'WAIT', 'HOLD')), + signal_reason TEXT, + + -- Model info + model_version TEXT DEFAULT 'norton-bass-v1', + data_points INTEGER, -- how many price observations used + + computed_at TIMESTAMPTZ DEFAULT NOW(), + valid_until TIMESTAMPTZ DEFAULT NOW() + INTERVAL '7 days' +); + +CREATE INDEX IF NOT EXISTS idx_forecasts_tech ON sales_forecasts(technology); +CREATE INDEX IF NOT EXISTS idx_forecasts_computed ON sales_forecasts(computed_at DESC); + +-- ============================================================ +-- TRANSPORT PLANNER (WS3) +-- ============================================================ + +CREATE TABLE IF NOT EXISTS fiber_providers ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name TEXT NOT NULL UNIQUE, + slug TEXT NOT NULL UNIQUE, + website TEXT, + type TEXT CHECK (type IN ('tier1', 'tier2', 'regional', 'municipal', 'hyperscaler')), + headquarters TEXT, + coverage_countries TEXT[], + products TEXT[], -- 'dark_fiber', 'wavelength', 'ip_transit', 'ethernet' + peering_ixs TEXT[], -- IX names where they peer + notes TEXT, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS fiber_routes ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + provider_id UUID REFERENCES fiber_providers(id), + city_a TEXT NOT NULL, + city_b TEXT NOT NULL, + country TEXT DEFAULT 'DE', + distance_km NUMERIC, + fiber_distance_km NUMERIC, -- actual fiber route (usually 1.3-1.5x straight line) + product_type TEXT, -- 'dark_fiber', 'wavelength_100g', 'wavelength_400g', etc. + monthly_price_eur NUMERIC, + setup_fee_eur NUMERIC, + min_contract_months INTEGER, + latency_ms NUMERIC, + available BOOLEAN DEFAULT TRUE, + notes TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(provider_id, city_a, city_b, product_type) +); + +CREATE TABLE IF NOT EXISTS cities ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name TEXT NOT NULL, + country TEXT NOT NULL DEFAULT 'DE', + lat NUMERIC, + lon NUMERIC, + has_ix BOOLEAN DEFAULT FALSE, + ix_names TEXT[], + has_datacenter BOOLEAN DEFAULT FALSE, + population INTEGER, + UNIQUE(name, country) +); + +CREATE INDEX IF NOT EXISTS idx_fiber_routes_cities ON fiber_routes(city_a, city_b); +CREATE INDEX IF NOT EXISTS idx_cities_country ON cities(country); + +-- ============================================================ +-- GENERATED DATASHEETS (WS2) +-- ============================================================ + +CREATE TABLE IF NOT EXISTS generated_datasheets ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + entity_type TEXT NOT NULL CHECK (entity_type IN ('transceiver', 'switch', 'comparison', 'compatibility_matrix')), + entity_id UUID, + entity_ids UUID[], -- for comparison datasheets + branding TEXT DEFAULT 'flexoptix', + format TEXT DEFAULT 'pdf', + + r2_key TEXT, + r2_url TEXT, + file_size_bytes BIGINT, + + generated_at TIMESTAMPTZ DEFAULT NOW(), + expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '30 days' +); + +CREATE INDEX IF NOT EXISTS idx_datasheets_entity ON generated_datasheets(entity_type, entity_id); diff --git a/sql/014-seed-transport-data.sql b/sql/014-seed-transport-data.sql new file mode 100644 index 0000000..1fdee66 --- /dev/null +++ b/sql/014-seed-transport-data.sql @@ -0,0 +1,84 @@ +-- Seed: European cities with IX/DC presence + German fiber providers + +-- ============================================================ +-- CITIES (Major European networking hubs) +-- ============================================================ + +INSERT INTO cities (name, country, lat, lon, has_ix, ix_names, has_datacenter) VALUES + -- Germany + ('Frankfurt', 'DE', 50.1109, 8.6821, true, ARRAY['DE-CIX Frankfurt'], true), + ('Berlin', 'DE', 52.5200, 13.4050, true, ARRAY['BCIX', 'DE-CIX Berlin'], true), + ('Hamburg', 'DE', 53.5511, 9.9937, true, ARRAY['DE-CIX Hamburg'], true), + ('Munich', 'DE', 48.1351, 11.5820, true, ARRAY['DE-CIX Munich'], true), + ('Düsseldorf', 'DE', 51.2277, 6.7735, true, ARRAY['DE-CIX Dusseldorf'], true), + ('Darmstadt', 'DE', 49.8728, 8.6512, false, '{}', true), + ('Stuttgart', 'DE', 48.7758, 9.1829, false, '{}', true), + ('Nuremberg', 'DE', 49.4521, 11.0767, true, ARRAY['N-IX'], true), + ('Cologne', 'DE', 50.9375, 6.9603, false, '{}', true), + ('Hannover', 'DE', 52.3759, 9.7320, false, '{}', true), + -- Major European hubs + ('Amsterdam', 'NL', 52.3676, 4.9041, true, ARRAY['AMS-IX'], true), + ('London', 'GB', 51.5074, -0.1278, true, ARRAY['LINX'], true), + ('Paris', 'FR', 48.8566, 2.3522, true, ARRAY['France-IX', 'Equinix Paris'], true), + ('Zurich', 'CH', 47.3769, 8.5417, true, ARRAY['SwissIX'], true), + ('Vienna', 'AT', 48.2082, 16.3738, true, ARRAY['VIX'], true), + ('Prague', 'CZ', 50.0755, 14.4378, true, ARRAY['NIX.CZ'], true), + ('Warsaw', 'PL', 52.2297, 21.0122, true, ARRAY['PLIX'], true), + ('Copenhagen', 'DK', 55.6761, 12.5683, true, ARRAY['Netnod Copenhagen'], true), + ('Stockholm', 'SE', 59.3293, 18.0686, true, ARRAY['Netnod Stockholm'], true), + ('Milan', 'IT', 45.4642, 9.1900, true, ARRAY['MIX'], true), + ('Madrid', 'ES', 40.4168, -3.7038, true, ARRAY['ESPANIX'], true), + ('Marseille', 'FR', 43.2965, 5.3698, true, ARRAY['France-IX Marseille'], true), + ('Dublin', 'IE', 53.3498, -6.2603, true, ARRAY['INEX'], true), + ('Brussels', 'BE', 50.8503, 4.3517, true, ARRAY['BNIX'], true), + ('Lisbon', 'PT', 38.7223, -9.1393, true, ARRAY['GigaPIX'], true) +ON CONFLICT (name, country) DO NOTHING; + +-- ============================================================ +-- FIBER PROVIDERS +-- ============================================================ + +INSERT INTO fiber_providers (name, slug, website, type, headquarters, coverage_countries, products, notes) VALUES + ('euNetworks', 'eunetworks', 'https://www.eunetworks.com', 'tier1', 'London, UK', ARRAY['DE','NL','GB','FR','BE','IE','ES','IT','AT','CH','PL','CZ','DK','SE'], ARRAY['dark_fiber','wavelength','ethernet','ip_transit'], 'Pan-European fiber network, strong in Germany (Frankfurt-Berlin-Hamburg backbone)'), + ('GlobalConnect', 'globalconnect', 'https://www.globalconnect.com', 'tier1', 'Copenhagen, DK', ARRAY['DE','DK','SE','NO','FI','NL'], ARRAY['dark_fiber','wavelength','ethernet'], 'Nordic + German backbone, own fiber infrastructure'), + ('Telia Carrier', 'telia', 'https://www.teliacarrier.com', 'tier1', 'Stockholm, SE', ARRAY['DE','SE','DK','NO','FI','NL','GB','FR','US'], ARRAY['wavelength','ip_transit','ethernet'], 'Global Tier 1, AS1299, extensive German PoPs'), + ('Zayo Group', 'zayo', 'https://www.zayo.com', 'tier1', 'Boulder, US', ARRAY['DE','GB','FR','NL','US','CA'], ARRAY['dark_fiber','wavelength','ethernet','colocation'], 'Pan-Atlantic fiber, Frankfurt-London-Amsterdam triangle'), + ('Deutsche Telekom / OTC', 'dtag', 'https://www.telekom.de', 'tier1', 'Bonn, DE', ARRAY['DE','AT','CH','PL','CZ','HU','NL'], ARRAY['wavelength','ip_transit','ethernet','dark_fiber'], 'Largest German carrier, comprehensive domestic coverage'), + ('Vodafone Deutschland', 'vodafone-de', 'https://www.vodafone.de', 'tier1', 'Düsseldorf, DE', ARRAY['DE','GB','NL','ES','IT'], ARRAY['wavelength','ethernet','ip_transit'], 'Major German backbone via Unity Media / Kabel Deutschland infrastructure'), + ('Colt Technology Services', 'colt', 'https://www.colt.net', 'tier1', 'London, UK', ARRAY['DE','GB','FR','NL','BE','CH','AT','IT','ES','JP','SG'], ARRAY['wavelength','ethernet','ip_transit','sd_wan'], 'Strong in European financial hubs, low-latency routes'), + ('GTT Communications', 'gtt', 'https://www.gtt.net', 'tier1', 'McLean, US', ARRAY['DE','GB','FR','NL','US'], ARRAY['ip_transit','ethernet','sd_wan'], 'Global Tier 1, AS3257'), + ('Lumen Technologies', 'lumen', 'https://www.lumen.com', 'tier1', 'Monroe, US', ARRAY['DE','GB','FR','NL','US'], ARRAY['wavelength','ip_transit','ethernet','cdn'], 'Former CenturyLink/Level3, AS3356'), + ('Gasline', 'gasline', 'https://www.gasline.de', 'regional', 'Essen, DE', ARRAY['DE'], ARRAY['dark_fiber'], 'Fiber along gas pipelines in Germany, cost-effective dark fiber'), + ('NetCologne', 'netcologne', 'https://www.netcologne.de', 'regional', 'Cologne, DE', ARRAY['DE'], ARRAY['dark_fiber','ethernet'], 'Regional fiber in NRW/Rhineland area'), + ('M-net', 'mnet', 'https://www.m-net.de', 'regional', 'Munich, DE', ARRAY['DE'], ARRAY['dark_fiber','ethernet'], 'Regional fiber in Bavaria'), + ('RETN', 'retn', 'https://retn.net', 'tier2', 'London, UK', ARRAY['DE','GB','NL','SE','FI','RU','KZ'], ARRAY['ip_transit','wavelength'], 'East-West European backbone, AS9002'), + ('Core-Backbone', 'core-backbone', 'https://www.core-backbone.com', 'tier2', 'Nuremberg, DE', ARRAY['DE','NL','AT','CH'], ARRAY['ip_transit','wavelength','colocation'], 'German-based ISP with own backbone'), + ('AMS-IX', 'ams-ix', 'https://www.ams-ix.net', 'tier1', 'Amsterdam, NL', ARRAY['NL','DE'], ARRAY['peering','ethernet'], 'Worlds largest IX, extends to Frankfurt') +ON CONFLICT (name) DO NOTHING; + +-- ============================================================ +-- COMMON FIBER ROUTES (Germany focus) +-- ============================================================ + +INSERT INTO fiber_routes (provider_id, city_a, city_b, country, distance_km, fiber_distance_km, product_type, monthly_price_eur, min_contract_months, latency_ms) VALUES + -- euNetworks Germany backbone + ((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Berlin', 'DE', 420, 550, 'wavelength_100g', 5500, 36, 3.5), + ((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Hamburg', 'DE', 490, 610, 'wavelength_100g', 6200, 36, 4.0), + ((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Munich', 'DE', 390, 480, 'wavelength_100g', 5000, 36, 3.2), + ((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Düsseldorf', 'DE', 230, 290, 'wavelength_100g', 3500, 36, 2.0), + ((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Amsterdam', 'NL', 365, 440, 'wavelength_100g', 4500, 36, 3.0), + ((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Frankfurt', 'Darmstadt', 'DE', 30, 40, 'dark_fiber', 1500, 60, 0.3), + ((SELECT id FROM fiber_providers WHERE slug='eunetworks'), 'Berlin', 'Hamburg', 'DE', 290, 370, 'wavelength_100g', 4000, 36, 2.5), + -- DTAG + ((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Frankfurt', 'Berlin', 'DE', 420, 530, 'wavelength_100g', 6500, 24, 3.8), + ((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Frankfurt', 'Darmstadt', 'DE', 30, 38, 'wavelength_100g', 2000, 24, 0.3), + ((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Berlin', 'Darmstadt', 'DE', 450, 580, 'wavelength_100g', 7000, 24, 4.0), + ((SELECT id FROM fiber_providers WHERE slug='dtag'), 'Frankfurt', 'Munich', 'DE', 390, 470, 'wavelength_100g', 5500, 24, 3.3), + -- Telia + ((SELECT id FROM fiber_providers WHERE slug='telia'), 'Frankfurt', 'Amsterdam', 'NL', 365, 430, 'wavelength_100g', 4000, 36, 2.8), + ((SELECT id FROM fiber_providers WHERE slug='telia'), 'Frankfurt', 'London', 'GB', 650, 820, 'wavelength_100g', 7500, 36, 5.5), + ((SELECT id FROM fiber_providers WHERE slug='telia'), 'Frankfurt', 'Paris', 'FR', 480, 600, 'wavelength_100g', 5500, 36, 4.0), + -- Colt + ((SELECT id FROM fiber_providers WHERE slug='colt'), 'Frankfurt', 'Berlin', 'DE', 420, 540, 'ethernet_10g', 2500, 24, 3.6), + ((SELECT id FROM fiber_providers WHERE slug='colt'), 'Frankfurt', 'Zurich', 'CH', 310, 400, 'wavelength_100g', 4500, 36, 2.8) +ON CONFLICT (provider_id, city_a, city_b, product_type) DO NOTHING; diff --git a/storage/key_value_stores/default/SDK_CRAWLER_STATISTICS_0.json b/storage/key_value_stores/default/SDK_CRAWLER_STATISTICS_0.json deleted file mode 100644 index 98df4b3..0000000 --- a/storage/key_value_stores/default/SDK_CRAWLER_STATISTICS_0.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "requestsFinished": 7, - "requestsFailed": 0, - "requestsRetries": 0, - "requestsFailedPerMinute": 0, - "requestsFinishedPerMinute": 84, - "requestMinDurationMillis": 217, - "requestMaxDurationMillis": 3669, - "requestTotalFailedDurationMillis": 0, - "requestTotalFinishedDurationMillis": 5667, - "crawlerStartedAt": "2026-03-27T03:06:57.250Z", - "crawlerFinishedAt": "2026-03-27T03:07:02.254Z", - "statsPersistedAt": "2026-03-27T03:07:02.254Z", - "crawlerRuntimeMillis": 5014, - "crawlerLastStartTimestamp": 1774580817240, - "requestRetryHistogram": [ - 7 - ], - "statsId": 0, - "requestAvgFailedDurationMillis": null, - "requestAvgFinishedDurationMillis": 810, - "requestTotalDurationMillis": 5667, - "requestsTotal": 7, - "requestsWithStatusCode": {}, - "errors": {}, - "retryErrors": {} -} \ No newline at end of file diff --git a/storage/key_value_stores/default/SDK_SESSION_POOL_STATE.json b/storage/key_value_stores/default/SDK_SESSION_POOL_STATE.json deleted file mode 100644 index 2912552..0000000 --- a/storage/key_value_stores/default/SDK_SESSION_POOL_STATE.json +++ /dev/null @@ -1,146 +0,0 @@ -{ - "usableSessionsCount": 7, - "retiredSessionsCount": 0, - "sessions": [ - { - "id": "session_4IpwY6VPOc", - "cookieJar": { - "version": "tough-cookie@6.0.1", - "storeType": "MemoryCookieStore", - "rejectPublicSuffixes": true, - "enableLooseMode": false, - "allowSpecialUseDomain": true, - "prefixSecurity": "silent", - "cookies": [] - }, - "userData": {}, - "maxErrorScore": 3, - "errorScoreDecrement": 0.5, - "expiresAt": "2026-03-27T03:56:57.292Z", - "createdAt": "2026-03-27T03:06:57.292Z", - "usageCount": 1, - "maxUsageCount": 50, - "errorScore": 0 - }, - { - "id": "session_DgcebufZlI", - "cookieJar": { - "version": "tough-cookie@6.0.1", - "storeType": "MemoryCookieStore", - "rejectPublicSuffixes": true, - "enableLooseMode": false, - "allowSpecialUseDomain": true, - "prefixSecurity": "silent", - "cookies": [] - }, - "userData": {}, - "maxErrorScore": 3, - "errorScoreDecrement": 0.5, - "expiresAt": "2026-03-27T03:56:57.295Z", - "createdAt": "2026-03-27T03:06:57.295Z", - "usageCount": 1, - "maxUsageCount": 50, - "errorScore": 0 - }, - { - "id": "session_nNqMLCXOfI", - "cookieJar": { - "version": "tough-cookie@6.0.1", - "storeType": "MemoryCookieStore", - "rejectPublicSuffixes": true, - "enableLooseMode": false, - "allowSpecialUseDomain": true, - "prefixSecurity": "silent", - "cookies": [] - }, - "userData": {}, - "maxErrorScore": 3, - "errorScoreDecrement": 0.5, - "expiresAt": "2026-03-27T03:56:57.741Z", - "createdAt": "2026-03-27T03:06:57.741Z", - "usageCount": 1, - "maxUsageCount": 50, - "errorScore": 0 - }, - { - "id": "session_kfhwhKVBAt", - "cookieJar": { - "version": "tough-cookie@6.0.1", - "storeType": "MemoryCookieStore", - "rejectPublicSuffixes": true, - "enableLooseMode": false, - "allowSpecialUseDomain": true, - "prefixSecurity": "silent", - "cookies": [] - }, - "userData": {}, - "maxErrorScore": 3, - "errorScoreDecrement": 0.5, - "expiresAt": "2026-03-27T03:56:57.759Z", - "createdAt": "2026-03-27T03:06:57.759Z", - "usageCount": 1, - "maxUsageCount": 50, - "errorScore": 0 - }, - { - "id": "session_ROb5OpLaLg", - "cookieJar": { - "version": "tough-cookie@6.0.1", - "storeType": "MemoryCookieStore", - "rejectPublicSuffixes": true, - "enableLooseMode": false, - "allowSpecialUseDomain": true, - "prefixSecurity": "silent", - "cookies": [] - }, - "userData": {}, - "maxErrorScore": 3, - "errorScoreDecrement": 0.5, - "expiresAt": "2026-03-27T03:56:58.061Z", - "createdAt": "2026-03-27T03:06:58.061Z", - "usageCount": 1, - "maxUsageCount": 50, - "errorScore": 0 - }, - { - "id": "session_qurhUeTMvT", - "cookieJar": { - "version": "tough-cookie@6.0.1", - "storeType": "MemoryCookieStore", - "rejectPublicSuffixes": true, - "enableLooseMode": false, - "allowSpecialUseDomain": true, - "prefixSecurity": "silent", - "cookies": [] - }, - "userData": {}, - "maxErrorScore": 3, - "errorScoreDecrement": 0.5, - "expiresAt": "2026-03-27T03:56:58.348Z", - "createdAt": "2026-03-27T03:06:58.348Z", - "usageCount": 1, - "maxUsageCount": 50, - "errorScore": 0 - }, - { - "id": "session_ATWD4HqdJf", - "cookieJar": { - "version": "tough-cookie@6.0.1", - "storeType": "MemoryCookieStore", - "rejectPublicSuffixes": true, - "enableLooseMode": false, - "allowSpecialUseDomain": true, - "prefixSecurity": "silent", - "cookies": [] - }, - "userData": {}, - "maxErrorScore": 3, - "errorScoreDecrement": 0.5, - "expiresAt": "2026-03-27T03:56:58.569Z", - "createdAt": "2026-03-27T03:06:58.569Z", - "usageCount": 1, - "maxUsageCount": 50, - "errorScore": 0 - } - ] -} \ No newline at end of file diff --git a/storage/request_queues/default/Gyz6y01b4kaqVSY.json b/storage/request_queues/default/Gyz6y01b4kaqVSY.json deleted file mode 100644 index 62c630d..0000000 --- a/storage/request_queues/default/Gyz6y01b4kaqVSY.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "Gyz6y01b4kaqVSY", - "json": "{\"id\":\"Gyz6y01b4kaqVSY\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:57.738Z\"}", - "method": "GET", - "orderNo": null, - "retryCount": 0, - "uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver", - "url": "https://www.optcore.net/product-category/optical-transceiver/sfp-plus-transceiver/" -} \ No newline at end of file diff --git a/storage/request_queues/default/UDSA3Hqwk1O5rcd.json b/storage/request_queues/default/UDSA3Hqwk1O5rcd.json deleted file mode 100644 index 47ba94a..0000000 --- a/storage/request_queues/default/UDSA3Hqwk1O5rcd.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "UDSA3Hqwk1O5rcd", - "json": "{\"id\":\"UDSA3Hqwk1O5rcd\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:57.758Z\"}", - "method": "GET", - "orderNo": null, - "retryCount": 0, - "uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver", - "url": "https://www.optcore.net/product-category/optical-transceiver/sfp-transceiver/" -} \ No newline at end of file diff --git a/storage/request_queues/default/Z6VkGiT8REFQyfA.json b/storage/request_queues/default/Z6VkGiT8REFQyfA.json deleted file mode 100644 index 7cca81a..0000000 --- a/storage/request_queues/default/Z6VkGiT8REFQyfA.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "Z6VkGiT8REFQyfA", - "json": "{\"id\":\"Z6VkGiT8REFQyfA\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.346Z\"}", - "method": "GET", - "orderNo": null, - "retryCount": 0, - "uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers", - "url": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/" -} \ No newline at end of file diff --git a/storage/request_queues/default/Zus6krdGaVkRBmX.json b/storage/request_queues/default/Zus6krdGaVkRBmX.json deleted file mode 100644 index c02e782..0000000 --- a/storage/request_queues/default/Zus6krdGaVkRBmX.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "Zus6krdGaVkRBmX", - "json": "{\"id\":\"Zus6krdGaVkRBmX\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.047Z\"}", - "method": "GET", - "orderNo": null, - "retryCount": 0, - "uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver", - "url": "https://www.optcore.net/product-category/optical-transceiver/200g-400g-800g-transceiver/" -} \ No newline at end of file diff --git a/storage/request_queues/default/bhPAevnqFIxXzV3.json b/storage/request_queues/default/bhPAevnqFIxXzV3.json deleted file mode 100644 index 1c2767e..0000000 --- a/storage/request_queues/default/bhPAevnqFIxXzV3.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "bhPAevnqFIxXzV3", - "json": "{\"id\":\"bhPAevnqFIxXzV3\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"crawlDepth\":2,\"enqueueStrategy\":\"same-hostname\",\"state\":4}},\"handledAt\":\"2026-03-27T03:07:02.235Z\"}", - "method": "GET", - "orderNo": null, - "retryCount": 0, - "uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3", - "url": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/3/" -} \ No newline at end of file diff --git a/storage/request_queues/default/xbIMGR6AhgYwBWg.json b/storage/request_queues/default/xbIMGR6AhgYwBWg.json deleted file mode 100644 index 1eb52fe..0000000 --- a/storage/request_queues/default/xbIMGR6AhgYwBWg.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "xbIMGR6AhgYwBWg", - "json": "{\"id\":\"xbIMGR6AhgYwBWg\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"crawlDepth\":1,\"enqueueStrategy\":\"same-hostname\",\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.564Z\"}", - "method": "GET", - "orderNo": null, - "retryCount": 0, - "uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2", - "url": "https://www.optcore.net/product-category/optical-transceiver/other-transceivers/page/2/" -} \ No newline at end of file diff --git a/storage/request_queues/default/y74cMHovGn2i2xA.json b/storage/request_queues/default/y74cMHovGn2i2xA.json deleted file mode 100644 index 6dc535f..0000000 --- a/storage/request_queues/default/y74cMHovGn2i2xA.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "id": "y74cMHovGn2i2xA", - "json": "{\"id\":\"y74cMHovGn2i2xA\",\"url\":\"https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers/\",\"loadedUrl\":\"https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers/\",\"uniqueKey\":\"https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers\",\"method\":\"GET\",\"noRetry\":false,\"retryCount\":0,\"errorMessages\":[],\"headers\":{},\"userData\":{\"__crawlee\":{\"state\":4}},\"handledAt\":\"2026-03-27T03:06:58.031Z\"}", - "method": "GET", - "orderNo": null, - "retryCount": 0, - "uniqueKey": "https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers", - "url": "https://www.optcore.net/product-category/optical-transceiver/40g-100g-transceivers/" -} \ No newline at end of file