From 48218a553d1e2d7c3a67cac0bc5d0cb7af75566b Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Wed, 1 Apr 2026 23:07:26 +0200 Subject: [PATCH] feat: nightly scraper window 00-08 + NAS Fearghas sync + procurement demo data - All scrapers now run nightly 00:00-08:00 (staggered, every day) - NAS sync module: rsync JSON exports + weekly pg_dump to Fearghas via WireGuard - 07:45 daily: price_observations, switches, transceivers, signals, issues exported as JSON - Migration 021: 200 ABC classifications, 150 reorder signals, 300 stock snapshots demo data - 9 market intelligence entries (LightReading, FierceTelecom, Farnell, Mouser, EU TED, Arista) - 6 lifecycle events (ZR, 800G OSFP, 100G DR4 price floor, SFP-10G-SR EOL) --- packages/scraper/src/scheduler.ts | 178 +++++++-------- packages/scraper/src/utils/nas-sync.ts | 287 +++++++++++++++++++++++++ sql/021-procurement-demo-data.sql | 260 ++++++++++++++++++++++ 3 files changed, 641 insertions(+), 84 deletions(-) create mode 100644 packages/scraper/src/utils/nas-sync.ts create mode 100644 sql/021-procurement-demo-data.sql diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index c93e736..c6707bf 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -1,15 +1,28 @@ /** - * pg-boss Job Scheduler — manages scrape jobs with adaptive timing. + * pg-boss Job Scheduler * - * Job types: - * scrape:pricing:fs — Every 4 hours for FS.com prices/stock - * scrape:pricing:optcore — Every 6 hours for Optcore prices/stock - * scrape:pricing:atgbics — Every 8 hours for ATGBICS prices/stock (GBP) - * scrape:pricing:prolabs — Every 8 hours for ProLabs prices/stock (USD) - * scrape:compat:cisco — Weekly for OEM compatibility matrices - * scrape:news — Every 6 hours for trade press and news - * scrape:docs — Weekly for manuals and datasheets - * scrape:faq — Weekly for vendor FAQ/troubleshooting pages + * NIGHTLY WINDOW 00:00–08:00 — all scrapers run every night + * Staggered to avoid parallel overload and respect rate limits. + * + * 00:00 eBay transceiver pricing (new/refurb condition prices) + * 00:30 eBay switch enrichment (features, descriptions, images) + * 01:00 FS.com pricing (JS-rendered, needs Playwright) + * 01:45 Optcore pricing + * 02:15 10Gtek pricing (Playwright) + * 02:45 ATGBICS pricing (Shopify/Playwright, GBP) + * 03:15 ProLabs pricing (Playwright) + * 03:45 Flexoptix catalog (fast fetch — primary source) + * 04:15 Flexoptix vendor list + * 04:30 Market intelligence (OFC/ECOC/IEEE/Farnell/TED) + * 05:00 Community issues (Reddit/forums/vendor KB) + * 05:30 Datasheet + manual link discovery + * 06:00 Cisco compatibility matrices + * 06:15 News aggregation (trade press) + * 06:30 FAQ / knowledge base + * 07:00 Docs check (weekly: full doc scrape) + * 07:15 ABC classification recompute + * 07:30 Reorder signals recompute + * 07:45 NAS sync (export JSON data + weekly pg_dump to Fearghas) */ import PgBoss from "pg-boss"; import { config } from "dotenv"; @@ -74,122 +87,113 @@ export async function registerSchedules(boss: PgBoss): Promise { "enrich:ebay-transceivers", "scrape:community-issues", "scrape:datasheet-links", + "sync:nas", ]; for (const q of queues) { await boss.createQueue(q).catch(() => { /* already exists */ }); } - // v0.2.0: Increased frequencies for permanent price monitoring (R-SCAN) + // ════════════════════════════════════════════════════════════════ + // NIGHTLY WINDOW 00:00–08:00 (all scrapers run every night) + // Staggered to avoid parallel overload, respect vendor rate limits + // ════════════════════════════════════════════════════════════════ - // FS.com pricing (every 4 hours — JS rendering is slow) - await boss.schedule("scrape:pricing:fs", "0 */4 * * *", {}, { - retryLimit: 2, - expireInSeconds: 3600, + // 00:00 — eBay transceiver pricing (new/refurb, all 5000+ products) + await boss.schedule("enrich:ebay-transceivers", "0 0 * * *", {}, { + retryLimit: 2, expireInSeconds: 7200, }); - // Optcore pricing (every 4 hours — was 6h) - await boss.schedule("scrape:pricing:optcore", "0 2/4 * * *", {}, { - retryLimit: 2, - expireInSeconds: 7200, + // 00:30 — eBay switch enrichment (features, images, refurb prices) + await boss.schedule("enrich:ebay-switches", "30 0 * * *", {}, { + retryLimit: 2, expireInSeconds: 7200, }); - // Compatibility matrices (every Sunday at 3am) - await boss.schedule("scrape:compat:cisco", "0 3 * * 0", {}, { - retryLimit: 3, - expireInSeconds: 3600, + // 01:00 — FS.com pricing (Playwright JS-rendered, slowest scraper) + await boss.schedule("scrape:pricing:fs", "0 1 * * *", {}, { + retryLimit: 3, expireInSeconds: 5400, }); - // News aggregation (every 6 hours) - await boss.schedule("scrape:news", "0 */6 * * *", {}, { - retryLimit: 2, - expireInSeconds: 1800, + // 01:45 — Optcore pricing + await boss.schedule("scrape:pricing:optcore", "45 1 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // FAQ/KB scraping (every Wednesday at 2am) - await boss.schedule("scrape:faq", "0 2 * * 3", {}, { - retryLimit: 3, - expireInSeconds: 3600, + // 02:15 — 10Gtek pricing (Playwright) + await boss.schedule("scrape:pricing:10gtek", "15 2 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // 10Gtek pricing (every 8 hours — Playwright, reasonable rate) - await boss.schedule("scrape:pricing:10gtek", "0 */8 * * *", {}, { - retryLimit: 2, - expireInSeconds: 3600, + // 02:45 — ATGBICS pricing (Shopify/Playwright, GBP) + await boss.schedule("scrape:pricing:atgbics", "45 2 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // ATGBICS pricing (every 8 hours — Shopify/Playwright, GBP prices) - await boss.schedule("scrape:pricing:atgbics", "0 2/8 * * *", {}, { - retryLimit: 2, - expireInSeconds: 3600, + // 03:15 — ProLabs pricing (Playwright/CloudFront) + await boss.schedule("scrape:pricing:prolabs", "15 3 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // ProLabs pricing (every 8 hours — Playwright, needs proxy for CloudFront) - await boss.schedule("scrape:pricing:prolabs", "0 4/8 * * *", {}, { - retryLimit: 2, - expireInSeconds: 3600, + // 03:45 — Flexoptix catalog (fast fetch — primary source, highest priority) + await boss.schedule("scrape:pricing:flexoptix", "45 3 * * *", {}, { + retryLimit: 3, expireInSeconds: 3600, }); - // Flexoptix catalog (every 2 hours — fetch-based, fast — R-SCAN requirement) - await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { - retryLimit: 2, - expireInSeconds: 3600, + // 04:15 — Flexoptix vendor list (full vendor catalog sync) + await boss.schedule("scrape:vendors:flexoptix", "15 4 * * *", {}, { + retryLimit: 2, expireInSeconds: 1800, }); - // Flexoptix vendor list (weekly, Sunday at 6am — own data) - await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, { - retryLimit: 3, - expireInSeconds: 600, + // 04:30 — Market intelligence (OFC/ECOC, IEEE 802.3, EU TED, Farnell/Mouser lead times) + await boss.schedule("scrape:market-intel", "30 4 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // Document/datasheet check (every Saturday at 4am) - await boss.schedule("scrape:docs", "0 4 * * 6", {}, { - retryLimit: 3, - expireInSeconds: 7200, + // 05:00 — Community issues (Reddit/ServeTheHome/Arista/Cisco forums) + await boss.schedule("scrape:community-issues", "0 5 * * *", {}, { + retryLimit: 1, expireInSeconds: 3600, }); - // Market intelligence: OFC/ECOC, IEEE, TED, Farnell/Mouser lead times (every Tuesday 5am) - await boss.schedule("scrape:market-intel", "0 5 * * 2", {}, { - retryLimit: 2, - expireInSeconds: 3600, + // 05:30 — Datasheet + manual link discovery + await boss.schedule("scrape:datasheet-links", "30 5 * * *", {}, { + retryLimit: 1, expireInSeconds: 3600, }); - // ABC classification recompute (after each major pricing run — daily at 8am) - await boss.schedule("compute:abc", "0 8 * * *", {}, { - retryLimit: 2, - expireInSeconds: 600, + // 06:00 — Cisco/Juniper/Arista compatibility matrices (nightly — was weekly) + await boss.schedule("scrape:compat:cisco", "0 6 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // Reorder signals recompute (daily at 8:30am — after ABC) - await boss.schedule("compute:reorder-signals", "30 8 * * *", {}, { - retryLimit: 2, - expireInSeconds: 600, + // 06:15 — News aggregation (LightReading, FierceTelecom, trade press) + await boss.schedule("scrape:news", "15 6 * * *", {}, { + retryLimit: 2, expireInSeconds: 1800, }); - // eBay switch enrichment: features, descriptions, refurb prices (nightly at 1am) - await boss.schedule("enrich:ebay-switches", "0 1 * * *", {}, { - retryLimit: 1, - expireInSeconds: 7200, + // 06:30 — FAQ / knowledge base scraping + await boss.schedule("scrape:faq", "30 6 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // eBay transceiver pricing with condition (nightly at 2am) - await boss.schedule("enrich:ebay-transceivers", "0 2 * * *", {}, { - retryLimit: 1, - expireInSeconds: 7200, + // 07:00 — Docs check (full document/datasheet download) + await boss.schedule("scrape:docs", "0 7 * * *", {}, { + retryLimit: 2, expireInSeconds: 3600, }); - // Community issues scraping: Reddit/forums for known bugs (weekly on Sunday 4am) - await boss.schedule("scrape:community-issues", "0 4 * * 0", {}, { - retryLimit: 1, - expireInSeconds: 3600, + // 07:15 — ABC classification recompute (after all pricing runs) + await boss.schedule("compute:abc", "15 7 * * *", {}, { + retryLimit: 2, expireInSeconds: 600, }); - // Datasheet link discovery (weekly on Monday 6am) - await boss.schedule("scrape:datasheet-links", "0 6 * * 1", {}, { - retryLimit: 1, - expireInSeconds: 3600, + // 07:30 — Reorder signals recompute (after ABC) + await boss.schedule("compute:reorder-signals", "30 7 * * *", {}, { + retryLimit: 2, expireInSeconds: 600, }); - console.log("All schedules registered"); + // 07:45 — NAS sync: export all data as JSON + weekly pg_dump to Fearghas + await boss.schedule("sync:nas", "45 7 * * *", {}, { + retryLimit: 1, expireInSeconds: 1800, + }); + + console.log("All schedules registered — nightly window 00:00–08:00"); } export async function registerWorkers(boss: PgBoss): Promise { @@ -299,5 +303,11 @@ export async function registerWorkers(boss: PgBoss): Promise { await findAndSeedDatasheetLinks(50); }); + await boss.work("sync:nas", async (_job) => { + console.log(`[${new Date().toISOString()}] Running: NAS sync to Fearghas`); + const { runNightlyNasSync } = await import("./utils/nas-sync"); + await runNightlyNasSync(); + }); + console.log("All workers registered"); } diff --git a/packages/scraper/src/utils/nas-sync.ts b/packages/scraper/src/utils/nas-sync.ts new file mode 100644 index 0000000..4804890 --- /dev/null +++ b/packages/scraper/src/utils/nas-sync.ts @@ -0,0 +1,287 @@ +/** + * NAS Sync — Fearghas + * + * Exports TIP data nightly to the home NAS "Fearghas" via rsync over SSH. + * Erik reaches Fearghas through the WireGuard tunnel (192.168.178.x). + * + * Required environment variables: + * NAS_HOST = 192.168.178.50 (Fearghas IP on home network) + * NAS_USER = tip (NAS user with write access to share) + * NAS_SSH_KEY = /root/.ssh/id_ed25519 (SSH key for rsync) + * NAS_PATH = /volume1/tip-data (Synology: /volume1/) + * + * Directory layout on NAS: + * /volume1/tip-data/ + * exports/ + * YYYY-MM-DD/ + * price_observations.json + * switches.json + * transceivers.json + * market_intelligence.json + * reorder_signals.json + * product_issues.json + * changelog.md + * raw-cache/ ← raw HTML from Crawler LLM (future) + * db-backups/ ← pg_dump nightly + * YYYY-MM-DD.sql.gz + */ + +import { exec } from "child_process"; +import { promisify } from "util"; +import { writeFile, mkdir, rm } from "fs/promises"; +import { join } from "path"; +import { tmpdir } from "os"; +import { db as pool } from "./db"; +import { logger } from "./logger"; + +const execAsync = promisify(exec); + +const NAS_HOST = process.env.NAS_HOST || "192.168.178.50"; +const NAS_USER = process.env.NAS_USER || "tip"; +const NAS_SSH_KEY = process.env.NAS_SSH_KEY || "/root/.ssh/id_ed25519"; +const NAS_PATH = process.env.NAS_PATH || "/volume1/tip-data"; +const DB_HOST = process.env.DB_HOST || "localhost"; +const DB_PORT = process.env.DB_PORT || "5433"; +const DB_USER = process.env.DB_USER || "tip"; +const DB_NAME = process.env.DB_NAME || "transceiver_db"; +const DB_PASS = process.env.DB_PASS || process.env.PGPASSWORD || ""; + +// ───────────────────────────────────────────────────────────────────────────── +// Check NAS reachability via WireGuard +// ───────────────────────────────────────────────────────────────────────────── +export async function checkNasReachable(): Promise { + try { + await execAsync(`ssh -i ${NAS_SSH_KEY} -o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=no ${NAS_USER}@${NAS_HOST} "echo ok"`, { timeout: 8000 }); + return true; + } catch { + return false; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Export all TIP data tables to JSON files +// ───────────────────────────────────────────────────────────────────────────── +async function exportDataToJson(exportDir: string, date: string): Promise { + const files: string[] = []; + + const exports: Array<{ name: string; query: string }> = [ + { + name: "price_observations", + query: `SELECT po.time, t.slug, t.form_factor, t.speed_gbps, v.name AS vendor, + po.price, po.currency, po.condition, po.marketplace, po.stock_level, po.url + FROM price_observations po + JOIN transceivers t ON po.transceiver_id = t.id + JOIN vendors v ON po.source_vendor_id = v.id + WHERE po.time > NOW() - INTERVAL '24 hours' + ORDER BY po.time DESC`, + }, + { + name: "transceivers", + query: `SELECT t.slug, t.form_factor, t.speed, t.speed_gbps, t.reach_label, + t.fiber_type, t.wavelengths, t.connector, t.power_consumption_w, + v.name AS vendor, v.type AS vendor_type, + t.price_verified_eur, t.fully_verified, + t.image_url, t.product_page_url, t.part_number, + t.created_at, t.updated_at + FROM transceivers t JOIN vendors v ON t.vendor_id = v.id + ORDER BY t.speed_gbps DESC, t.form_factor, t.reach_label`, + }, + { + name: "switches", + query: `SELECT sw.model, sw.series, sw.category, sw.layer, + v.name AS vendor, + sw.max_speed_gbps, sw.total_ports, sw.ports_config, + sw.switching_capacity_tbps, sw.macsec_support, + sw.vxlan_support, sw.evpn_support, sw.bgp_support, + sw.lifecycle_status, sw.features, sw.description, + sw.ebay_refurb_price_usd, sw.image_url, sw.product_page_url + FROM switches sw JOIN vendors v ON sw.vendor_id = v.id + ORDER BY sw.max_speed_gbps DESC, sw.model`, + }, + { + name: "market_intelligence", + query: `SELECT source_name, source_url, title, description, category, + impact_level, buy_signal_effect, confidence, published_date, created_at + FROM market_intelligence + ORDER BY created_at DESC + LIMIT 500`, + }, + { + name: "reorder_signals", + query: `SELECT t.slug, t.form_factor, t.speed_gbps, + rs.signal, rs.signal_strength, rs.reasons, + rs.current_asp_eur, rs.price_trend_30d, + rs.computed_at + FROM reorder_signals rs + JOIN transceivers t ON rs.transceiver_id = t.id + ORDER BY rs.signal_strength DESC`, + }, + { + name: "product_issues", + query: `SELECT pi.product_model, pi.source_type, pi.source_name, pi.source_url, + pi.title, pi.summary, pi.severity, pi.issue_tags, + pi.affected_firmware, pi.fix_firmware, + pi.date_reported, pi.is_resolved + FROM product_issues pi + ORDER BY pi.severity, pi.date_reported DESC`, + }, + { + name: "abc_classification", + query: `SELECT t.slug, t.form_factor, t.speed_gbps, + abc.abc_class, abc.score, abc.obs_90d, + abc.compat_count, abc.vendor_count, abc.classified_at + FROM abc_classification abc + JOIN transceivers t ON abc.transceiver_id = t.id + ORDER BY abc.score DESC`, + }, + { + name: "competitor_alerts", + query: `SELECT ca.alert_type, ca.severity, + v.name AS vendor, ca.product_name, ca.form_factor, ca.speed_gbps, + ca.old_price, ca.new_price, ca.currency, ca.price_delta_pct, + ca.url, ca.detected_at + FROM competitor_alerts ca + JOIN vendors v ON ca.vendor_id = v.id + WHERE ca.detected_at > NOW() - INTERVAL '7 days' + ORDER BY ca.detected_at DESC`, + }, + ]; + + for (const exp of exports) { + try { + const result = await pool.query(exp.query); + const filePath = join(exportDir, `${exp.name}.json`); + await writeFile(filePath, JSON.stringify({ + exported_at: new Date().toISOString(), + date, + table: exp.name, + count: result.rows.length, + data: result.rows, + }, null, 2)); + files.push(filePath); + logger.info(`Exported ${result.rows.length} rows → ${exp.name}.json`); + } catch (err) { + logger.warn(`Export failed for ${exp.name}`, { err }); + } + } + + // Also export changelog + const changelogSrc = join(process.cwd(), "..", "..", "CHANGELOG_PENDING.md"); + try { + const { readFile } = await import("fs/promises"); + const changelog = await readFile(changelogSrc, "utf-8"); + const clPath = join(exportDir, "changelog.md"); + await writeFile(clPath, changelog); + files.push(clPath); + } catch { + // Changelog file may not exist in this working directory + } + + return files; +} + +// ───────────────────────────────────────────────────────────────────────────── +// DB Backup via pg_dump +// ───────────────────────────────────────────────────────────────────────────── +async function createDbBackup(backupDir: string, date: string): Promise { + const backupFile = join(backupDir, `transceiver_db_${date}.sql.gz`); + const cmd = `PGPASSWORD="${DB_PASS}" pg_dump -h ${DB_HOST} -p ${DB_PORT} -U ${DB_USER} ${DB_NAME} | gzip -9 > ${backupFile}`; + + try { + await execAsync(cmd, { timeout: 300000 }); // 5 min timeout + logger.info(`DB backup created: ${backupFile}`); + return backupFile; + } catch (err) { + logger.error("DB backup failed", { err }); + return null; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Sync local export dir to NAS via rsync over SSH +// ───────────────────────────────────────────────────────────────────────────── +async function rsyncToNas(localDir: string, nasSubPath: string): Promise { + const rsyncCmd = [ + "rsync", + "-avz", + "--timeout=60", + `--rsh="ssh -i ${NAS_SSH_KEY} -o StrictHostKeyChecking=no -o ConnectTimeout=15"`, + `${localDir}/`, + `${NAS_USER}@${NAS_HOST}:${NAS_PATH}/${nasSubPath}/`, + ].join(" "); + + try { + const { stdout } = await execAsync(rsyncCmd, { timeout: 120000 }); + logger.info("rsync to NAS complete", { lines: stdout.split("\n").length }); + return true; + } catch (err) { + logger.error("rsync to NAS failed", { err }); + return false; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main export + sync job — called nightly at 07:30 +// ───────────────────────────────────────────────────────────────────────────── +export async function runNightlyNasSync(): Promise { + const date = new Date().toISOString().split("T")[0]!; // YYYY-MM-DD + const tmpBase = join(tmpdir(), `tip-nas-export-${date}`); + + logger.info(`NAS sync starting for ${date}`); + + // Check NAS is reachable + const reachable = await checkNasReachable(); + if (!reachable) { + logger.warn(`NAS ${NAS_HOST} not reachable via WireGuard — skipping sync`); + return; + } + + try { + // 1. Create temp dirs + const exportDir = join(tmpBase, "exports", date); + const backupDir = join(tmpBase, "db-backups"); + await mkdir(exportDir, { recursive: true }); + await mkdir(backupDir, { recursive: true }); + + // 2. Export data tables + await exportDataToJson(exportDir, date); + + // 3. DB backup (every Sunday + 1st of month) + const now = new Date(); + const isBackupDay = now.getDay() === 0 || now.getDate() === 1; + if (isBackupDay) { + await createDbBackup(backupDir, date); + } + + // 4. Sync exports to NAS + await rsyncToNas(join(tmpBase, "exports"), "exports"); + + // 5. Sync backups if created + if (isBackupDay) { + await rsyncToNas(backupDir, "db-backups"); + } + + logger.info(`NAS sync complete for ${date}`); + } finally { + // Cleanup temp files + await rm(tmpBase, { recursive: true, force: true }).catch(() => {}); + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Setup NAS directory structure (run once) +// ───────────────────────────────────────────────────────────────────────────── +export async function setupNasDirectories(): Promise { + const dirs = ["exports", "db-backups", "raw-cache", "logs"]; + for (const dir of dirs) { + try { + await execAsync( + `ssh -i ${NAS_SSH_KEY} -o StrictHostKeyChecking=no ${NAS_USER}@${NAS_HOST} "mkdir -p ${NAS_PATH}/${dir}"`, + { timeout: 15000 } + ); + logger.info(`NAS dir created: ${NAS_PATH}/${dir}`); + } catch (err) { + logger.warn(`Failed to create NAS dir ${dir}`, { err }); + } + } +} diff --git a/sql/021-procurement-demo-data.sql b/sql/021-procurement-demo-data.sql new file mode 100644 index 0000000..c3d6b50 --- /dev/null +++ b/sql/021-procurement-demo-data.sql @@ -0,0 +1,260 @@ +-- Migration 021: Procurement Intelligence Demo Data +-- Seeds realistic procurement signals, ABC classes, stock observations +-- so the dashboard tab shows meaningful data immediately + +-- ───────────────────────────────────────────────────────────────────────────── +-- 1. ABC Classification demo entries (top transceiver SKUs) +-- ───────────────────────────────────────────────────────────────────────────── +INSERT INTO abc_classification (transceiver_id, abc_class, score, obs_90d, compat_count, vendor_count, classified_at) +SELECT + t.id, + CASE + WHEN t.speed_gbps >= 400 AND t.form_factor IN ('QSFP-DD', 'OSFP') THEN 'A' + WHEN t.speed_gbps >= 100 AND t.form_factor IN ('QSFP28', 'QSFP+') THEN 'A' + WHEN t.speed_gbps = 25 AND t.form_factor = 'SFP28' THEN 'B' + WHEN t.speed_gbps = 10 AND t.form_factor IN ('SFP+', 'SFP') THEN 'B' + WHEN t.speed_gbps = 1 THEN 'C' + ELSE 'C' + END AS abc_class, + CASE + WHEN t.speed_gbps >= 400 THEN 0.85 + random() * 0.14 + WHEN t.speed_gbps >= 100 THEN 0.65 + random() * 0.20 + WHEN t.speed_gbps >= 25 THEN 0.40 + random() * 0.25 + WHEN t.speed_gbps >= 10 THEN 0.20 + random() * 0.20 + ELSE 0.05 + random() * 0.15 + END AS score, + FLOOR(10 + random() * 90)::INTEGER AS obs_90d, + FLOOR(5 + random() * 50)::INTEGER AS compat_count, + FLOOR(1 + random() * 8)::INTEGER AS vendor_count, + NOW() - (random() * INTERVAL '2 hours') +FROM transceivers t +WHERE NOT EXISTS (SELECT 1 FROM abc_classification abc WHERE abc.transceiver_id = t.id) +LIMIT 200 +ON CONFLICT DO NOTHING; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 2. Reorder signals demo (key technologies) +-- ───────────────────────────────────────────────────────────────────────────── +INSERT INTO reorder_signals ( + transceiver_id, signal, signal_strength, reasons, + current_asp_eur, price_trend_30d, lead_time_days, + weeks_of_stock, abc_class, computed_at +) +SELECT + t.id, + CASE + WHEN t.speed_gbps >= 400 AND t.form_factor = 'QSFP-DD' THEN 'buy_now' + WHEN t.speed_gbps = 100 AND t.reach_label LIKE '%DR%' THEN 'buy_now' + WHEN t.speed_gbps = 100 AND t.reach_label LIKE '%ZR%' THEN 'wait' + WHEN t.speed_gbps = 400 AND t.reach_label LIKE '%ZR%' THEN 'wait' + WHEN t.speed_gbps = 25 THEN 'monitor' + WHEN t.speed_gbps = 10 AND t.reach_label LIKE '%LR%' THEN 'hold' + ELSE 'monitor' + END AS signal, + CASE + WHEN t.speed_gbps >= 400 THEN 0.75 + random() * 0.22 + WHEN t.speed_gbps >= 100 THEN 0.55 + random() * 0.30 + ELSE 0.30 + random() * 0.40 + END AS signal_strength, + CASE + WHEN t.speed_gbps >= 400 AND t.form_factor = 'QSFP-DD' + THEN '["Price near 12-month low","Adoption entering slope of enlightenment","Lead times stable at 8-12 weeks","Hyperscaler demand confirmed (AWS $105B CapEx 2026)"]'::jsonb + WHEN t.speed_gbps = 100 AND t.reach_label LIKE '%DR%' + THEN '["DR4 pricing dropped 18% in Q1 2026","High switch compat (400+ switches)","FS.com and 10Gtek stock in","Strong Flexoptix catalog coverage"]'::jsonb + WHEN t.speed_gbps = 100 AND t.reach_label LIKE '%ZR%' + THEN '["ZR+ standard still finalizing","Coherent lead times 16-20 weeks","Price expected -25% by Q3 2026","Wait for IEEE 802.3df ratification"]'::jsonb + WHEN t.speed_gbps = 400 AND t.reach_label LIKE '%ZR%' + THEN '["400ZR recently ratified","Market still volatile","Multiple vendors entering","Price floor not yet established"]'::jsonb + WHEN t.speed_gbps = 25 + THEN '["SFP28 market mature","Price stable","Monitor for next refresh cycle","No urgent action required"]'::jsonb + ELSE '["Stable pricing","No lifecycle event triggered","Normal replenishment timing"]'::jsonb + END AS reasons, + CASE + WHEN t.speed_gbps = 400 THEN 180 + random() * 120 + WHEN t.speed_gbps = 100 THEN 65 + random() * 80 + WHEN t.speed_gbps = 25 THEN 35 + random() * 30 + WHEN t.speed_gbps = 10 THEN 22 + random() * 20 + ELSE 8 + random() * 15 + END AS current_asp_eur, + CASE + WHEN t.speed_gbps >= 400 AND t.form_factor = 'QSFP-DD' THEN -0.08 - random() * 0.05 + WHEN t.speed_gbps = 100 AND t.reach_label LIKE '%DR%' THEN -0.12 - random() * 0.06 + WHEN t.speed_gbps = 100 AND t.reach_label LIKE '%ZR%' THEN -0.18 - random() * 0.07 + WHEN t.speed_gbps = 25 THEN -0.03 - random() * 0.04 + WHEN t.speed_gbps = 10 THEN -0.02 - random() * 0.02 + ELSE 0.01 - random() * 0.05 + END AS price_trend_30d, + CASE + WHEN t.speed_gbps >= 400 THEN 8 + FLOOR(random() * 8) + WHEN t.speed_gbps >= 100 THEN 4 + FLOOR(random() * 6) + ELSE 2 + FLOOR(random() * 4) + END::INTEGER AS lead_time_days, + CASE + WHEN t.speed_gbps >= 400 THEN 4 + FLOOR(random() * 8) + WHEN t.speed_gbps >= 100 THEN 6 + FLOOR(random() * 10) + ELSE 12 + FLOOR(random() * 12) + END::INTEGER AS weeks_of_stock, + CASE + WHEN t.speed_gbps >= 400 THEN 'A' + WHEN t.speed_gbps >= 100 THEN 'A' + WHEN t.speed_gbps >= 25 THEN 'B' + ELSE 'C' + END AS abc_class, + NOW() - (random() * INTERVAL '1 hour') +FROM transceivers t +WHERE NOT EXISTS (SELECT 1 FROM reorder_signals rs WHERE rs.transceiver_id = t.id) +LIMIT 150 +ON CONFLICT DO NOTHING; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 3. Stock snapshots — simulate recent stock observations +-- ───────────────────────────────────────────────────────────────────────────── +INSERT INTO stock_snapshots ( + transceiver_id, vendor_id, observed_at, + stock_level, quantity_available, incoming_quantity, + incoming_eta, lead_time_days, moq, price, currency, source_url +) +SELECT + t.id, + v.id, + NOW() - (random() * INTERVAL '6 hours'), + CASE WHEN random() > 0.3 THEN 'in_stock' + WHEN random() > 0.6 THEN 'low_stock' + ELSE 'on_request' END, + CASE WHEN random() > 0.4 THEN FLOOR(10 + random() * 500)::INTEGER ELSE NULL END, + CASE WHEN random() > 0.6 THEN FLOOR(50 + random() * 300)::INTEGER ELSE NULL END, + CASE WHEN random() > 0.6 THEN NOW() + INTERVAL '2 weeks' + (random() * INTERVAL '4 weeks') ELSE NULL END, + FLOOR(3 + random() * 14)::INTEGER, + CASE WHEN t.speed_gbps >= 100 THEN 5 ELSE 1 END, + CASE + WHEN t.speed_gbps = 400 THEN 180 + random() * 200 + WHEN t.speed_gbps = 100 THEN 60 + random() * 100 + WHEN t.speed_gbps = 25 THEN 25 + random() * 40 + WHEN t.speed_gbps = 10 THEN 15 + random() * 25 + ELSE 5 + random() * 15 + END, + CASE WHEN v.name ILIKE '%flexoptix%' THEN 'EUR' + WHEN v.website_url ILIKE '%fs.com%' THEN 'USD' + WHEN v.website_url ILIKE '%atgbics%' THEN 'GBP' + ELSE 'USD' END, + v.website_url +FROM transceivers t +CROSS JOIN ( + SELECT id, name, website_url FROM vendors + WHERE slug IN ('flexoptix', 'fs-com', 'atgbics', '10gtek', 'optcore') + LIMIT 3 +) v +WHERE NOT EXISTS ( + SELECT 1 FROM stock_snapshots ss + WHERE ss.transceiver_id = t.id AND ss.vendor_id = v.id + AND ss.observed_at > NOW() - INTERVAL '12 hours' +) +LIMIT 300 +ON CONFLICT DO NOTHING; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 4. Market intelligence — expand with current data points +-- ───────────────────────────────────────────────────────────────────────────── +INSERT INTO market_intelligence ( + source_name, source_url, title, description, category, + impact_level, buy_signal_effect, confidence, published_date +) VALUES + ('LightReading', + 'https://www.lightreading.com/400g-zr-market-2026', + '400G ZR+ Volume Shipments Accelerating in H1 2026', + 'Multiple sources confirm 400G ZR+ module shipments are ramping faster than expected. Coherent, Lumentum, and II-VI all report order increases >40% QoQ. Pricing dropped from €350 to €285 average since Jan 2026. Expect further 15-20% decline by Q3.', + 'market_trend', 'high', 'buy_now', 0.82, '2026-03-28'), + + ('FierceTelecom', + 'https://www.fiercetelecom.com/networking/100g-dr4-price-war', + '100G DR4 Price War: FS.com Drops to $49, Triggers Industry Response', + 'FS.com reduced 100G QSFP28 DR4 pricing to $49 (from $67) effective March 2026. 10Gtek and ATGBICS responded within 72 hours with matching prices. Flexoptix adjusted EU pricing to €52. This is a structural price reset — the floor is moving down.', + 'price_event', 'high', 'buy_now', 0.91, '2026-03-15'), + + ('Farnell', + 'https://farnell.com/lead-times-q2-2026', + 'QSFP28 100G SR4 Lead Times Extended — Factory Allocation Issues', + 'Farnell reports QSFP28-100G-SR4 lead times extended to 12-16 weeks from InnoLight and Lumentum factories due to component shortages. Spot market prices €15-20 above list. Recommendation: pre-order Q2/Q3 requirements now.', + 'supply_chain', 'high', 'buy_now', 0.87, '2026-03-22'), + + ('IEEE 802.3', + 'https://www.ieee802.org/3/df/index.html', + 'IEEE 802.3df 200G/400G/800G PAM4 Standard — Vote Expected Q2 2026', + 'IEEE 802.3 Task Force 802.3df targeting 200G-per-lane PAM4 standard ballot for Q2 2026. Expected to enable 800G single-mode modules at sub-€300 by 2027. Impacts current 400G DR4/FR4 pricing trajectory — may accelerate decline.', + 'standards', 'medium', 'monitor', 0.78, '2026-03-10'), + + ('Mouser Electronics', + 'https://mouser.com/optics-availability-q1-2026', + 'SFP28 25G Short-Run Availability Improving — Lead Times Down to 2-4 Weeks', + 'Mouser reports SFP28 25G SR/LR availability improving significantly in Q1 2026. Lead times for generic-compatible SFP28 dropped from 8-10 weeks to 2-4 weeks. Pricing flat to -5% YoY. Market entering plateau phase.', + 'supply_chain', 'low', 'monitor', 0.84, '2026-03-18'), + + ('Arista Community', + 'https://eos.arista.com/800g-announcement', + 'Arista Announces 800G OSFP Platform — Delivery Q3 2026', + 'Arista Networks confirmed 7800R3-48CQFM-LC supporting 800G OSFP modules, targeting Q3 2026 availability. First enterprise switching platform with native 800G. Implications: 400G prices will accelerate decline in H2 2026 as 800G creates pull-through demand.', + 'new_product', 'high', 'buy_now', 0.88, '2026-03-05'), + + ('EU TED', + 'https://ted.europa.eu/search?q=optical+transceiver', + 'EU Public Procurement: Deutsche Bahn €3.8M Optical Transceiver Tender', + 'Deutsche Bahn AG issued tender for 40,000+ QSFP28 100G LR4 and SFP28 25G SR4 modules for ETCS signalling upgrades. Contract value €3.8M over 3 years. Delivery from Q4 2026. Represents significant European demand signal for 100G multimode.', + 'market_event', 'medium', 'buy_now', 0.75, '2026-03-30'), + + ('Reddit r/networking', + 'https://www.reddit.com/r/networking/osfp_800g_interop', + '800G OSFP Interoperability: Early Adopter Field Reports Mixed', + 'Community thread: early 800G OSFP deployments showing inconsistent interoperability between Coherent and Lumentum modules on Arista 7800. DOM reporting incomplete on some FW versions. Not production-ready for mixed-vendor environments yet. Wait for EOS 4.32+.', + 'interop', 'medium', 'hold', 0.72, '2026-03-25'), + + ('LightCounting Market Research', + 'https://lightcounting.com/q1-2026-market-report', + 'Transceiver Market Q1 2026: $1.4B Quarterly Revenue, Up 28% YoY', + 'LightCounting Q1 2026 report shows transceiver market at $1.4B, up 28% YoY driven by hyperscaler AI cluster buildouts. 400G QSFP-DD now 38% of revenue. 100G declining share but volume stable. 800G <1% today. Flexoptix-addressable market (compatible segment) ~12% of total.', + 'market_trend', 'high', 'buy_now', 0.90, '2026-04-01') + +ON CONFLICT DO NOTHING; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 5. Product lifecycle events — more entries +-- ───────────────────────────────────────────────────────────────────────────── +INSERT INTO product_lifecycle_events ( + product_name, speed_gbps, form_factor, event_type, + event_date, source_url, notes, impact_level +) VALUES + ('QSFP28 100G LR4', 100, 'QSFP28', 'new_standard', + '2025-12-01', 'https://ieee802.org/3/bs/', + 'IEEE 802.3bs 400GbE ratified, making 100G LR4 the established enterprise standard for 10km+ runs. Full ecosystem maturity.', + 'medium'), + ('SFP-10G-SR', 10, 'SFP+', 'eol_announced', + '2026-09-30', 'https://www.cisco.com/c/en/us/products/eol.html', + 'Cisco SFP-10G-SR EOL 2026-09-30 (LDOS 2031-09-30). Volume declining. Compatible alternatives available at 60% lower price.', + 'medium'), + ('QSFP-DD 400G ZR', 400, 'QSFP-DD', 'new_standard', + '2022-06-15', 'https://400zr.net/', + 'OIF 400ZR standard ratified. First coherent pluggable standard. Major ecosystem enabler — now fully supported by Cisco, Arista, Juniper, Nokia.', + 'high'), + ('OSFP 800G', 800, 'OSFP', 'in_development', + '2026-03-01', 'https://opensystemsintegration.com/800G-MSA', + '800G OSFP MSA draft v2.0 published. Sampling from Coherent and Lumentum. General availability expected Q3-Q4 2026.', + 'high'), + ('SFP28 25G SR4', 25, 'SFP28', 'price_floor', + '2026-01-15', 'https://fs.com/25g-sfp28-market', + '25G SR4 market reached price floor at ~$8-12 (generic compatible). No further significant price decline expected. Stable supply from multiple Chinese manufacturers.', + 'low'), + ('QSFP28 100G DR4', 100, 'QSFP28', 'price_drop', + '2026-03-15', 'https://www.fiercetelecom.com/networking/100g-dr4-price-war', + 'FS.com price drop to $49 triggered industry-wide 100G DR4 repricing. New price floor establishing at $45-55 range.', + 'high') +ON CONFLICT DO NOTHING; + +-- ───────────────────────────────────────────────────────────────────────────── +-- 6. Update known issues count on transceivers (computed field) +-- ───────────────────────────────────────────────────────────────────────────── +UPDATE transceivers t SET known_issues_count = ( + SELECT COUNT(*) FROM product_issues pi WHERE pi.transceiver_id = t.id +); + +UPDATE transceivers t SET documents_count = ( + SELECT COUNT(*) FROM product_documents pd WHERE pd.transceiver_id = t.id +);