transceiver-db/migrations/001_foundation.sql
Rene Fichtmueller de5bdb24ca Initial TIP foundation: schema, seed data, crawlers, API, MCP server
- PostgreSQL 17 + TimescaleDB schema with 12 tables
- 48 standards (IEEE, SFF, ITU-T, OIF, MSA)
- 33 form factors (SFP through OSFP-XD/CPO)
- 85+ vendors (OEM, compatible, manufacturers, marketplaces)
- 80+ seed transceivers (1G-1.6T, CWDM, BiDi, DAC, AOC, FC, PON)
- 60+ network devices (Cisco, Juniper, Arista, HPE, Dell, etc.)
- Crawler framework with fs.com and eBay crawlers
- REST API (15 endpoints) on port 3200
- MCP server (12 tools) on port 3201
- PM2 ecosystem for production deployment on Erik (.82)
2026-03-31 08:11:49 +02:00

455 lines
15 KiB
SQL

-- TIP Foundation Schema
-- PostgreSQL 17 + TimescaleDB
-- Enable extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- fuzzy text search
CREATE EXTENSION IF NOT EXISTS "btree_gin"; -- GIN index support
-- ============================================================
-- ENUMS
-- ============================================================
CREATE TYPE transceiver_status AS ENUM ('active', 'eol', 'pre_release', 'nrnd', 'unknown');
CREATE TYPE data_rate_unit AS ENUM ('Mbps', 'Gbps', 'Tbps');
CREATE TYPE reach_unit AS ENUM ('m', 'km');
CREATE TYPE temperature_range AS ENUM ('commercial', 'extended', 'industrial');
CREATE TYPE dom_type AS ENUM ('none', 'ddm', 'ddmi', 'cmis', 'sff8472', 'sff8636');
CREATE TYPE connector_type AS ENUM (
'LC', 'SC', 'MPO-12', 'MPO-16', 'MPO-24', 'CS', 'SN',
'FC', 'ST', 'MTRJ', 'E2000', 'copper_rj45', 'cx4',
'dac_passive', 'dac_active', 'aoc', 'none', 'other'
);
CREATE TYPE fiber_type AS ENUM (
'smf', 'mmf_om1', 'mmf_om2', 'mmf_om3', 'mmf_om4', 'mmf_om5',
'copper', 'dac', 'aoc', 'free_space', 'other'
);
CREATE TYPE wavelength_band AS ENUM (
'O', 'E', 'S', 'C', 'L', 'U', 'visible', 'cwdm', 'dwdm', 'lwdm', 'swdm', 'other'
);
CREATE TYPE vendor_type AS ENUM (
'oem', 'compatible', 'distributor', 'manufacturer', 'marketplace', 'refurbished'
);
CREATE TYPE price_currency AS ENUM (
'USD', 'EUR', 'GBP', 'CNY', 'JPY', 'KRW', 'TWD', 'THB', 'INR', 'CAD', 'AUD'
);
CREATE TYPE hype_phase AS ENUM (
'innovation_trigger', 'peak_inflated', 'trough_disillusionment',
'slope_enlightenment', 'plateau_productivity', 'decline'
);
CREATE TYPE crawl_status AS ENUM ('pending', 'running', 'success', 'failed', 'rate_limited');
CREATE TYPE media_type AS ENUM ('image', 'datasheet', 'manual', 'diagram', 'video', 'certificate');
-- ============================================================
-- CORE TABLES
-- ============================================================
-- Standards (IEEE, SFF, ITU-T, OIF, etc.)
CREATE TABLE standards (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL UNIQUE,
body VARCHAR(50) NOT NULL, -- IEEE, SNIA/SFF, ITU-T, OIF, MSA
version VARCHAR(50),
year INT,
url TEXT,
description TEXT,
superseded_by INT REFERENCES standards(id),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Form Factors
CREATE TABLE form_factors (
id SERIAL PRIMARY KEY,
name VARCHAR(50) NOT NULL UNIQUE,
full_name VARCHAR(200),
standard_id INT REFERENCES standards(id),
lanes INT, -- electrical lanes
max_data_rate DECIMAL(10,2),
data_rate_unit data_rate_unit DEFAULT 'Gbps',
width_mm DECIMAL(6,2),
height_mm DECIMAL(6,2),
depth_mm DECIMAL(6,2),
power_max_w DECIMAL(6,2),
generation INT, -- for hype cycle
release_year INT,
eol_year INT,
description TEXT,
image_url TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Vendors / Manufacturers / Sellers
CREATE TABLE vendors (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL,
slug VARCHAR(200) NOT NULL UNIQUE,
vendor_type vendor_type NOT NULL DEFAULT 'compatible',
website TEXT,
logo_url TEXT,
country VARCHAR(3), -- ISO 3166-1 alpha-3
founded_year INT,
description TEXT,
is_oem BOOLEAN DEFAULT FALSE, -- Cisco, Juniper, Arista, etc.
is_factory BOOLEAN DEFAULT FALSE, -- Hisense, Innolight, etc.
aliases TEXT[], -- alternative names
scrape_url TEXT, -- catalog base URL
scrape_enabled BOOLEAN DEFAULT FALSE,
scrape_interval INT DEFAULT 86400, -- seconds
last_scraped_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Core Transceiver Table
CREATE TABLE transceivers (
id SERIAL PRIMARY KEY,
part_number VARCHAR(200) NOT NULL,
vendor_id INT NOT NULL REFERENCES vendors(id),
form_factor_id INT REFERENCES form_factors(id),
-- Classification
name VARCHAR(500),
description TEXT,
category VARCHAR(100), -- SFP, SFP+, QSFP28, QSFP-DD, OSFP, etc.
subcategory VARCHAR(100), -- SR, LR, ER, ZR, BiDi, CWDM, DWDM, DAC, AOC
-- Performance
data_rate DECIMAL(10,2),
data_rate_unit data_rate_unit DEFAULT 'Gbps',
max_reach DECIMAL(10,2),
reach_unit reach_unit DEFAULT 'km',
-- Optical
wavelength_nm DECIMAL(8,2), -- TX wavelength
wavelength_rx DECIMAL(8,2), -- RX wavelength (BiDi)
wavelengths DECIMAL(8,2)[], -- CWDM/DWDM channels
wavelength_band wavelength_band,
tx_power_min DECIMAL(6,2), -- dBm
tx_power_max DECIMAL(6,2),
rx_sensitivity DECIMAL(6,2), -- dBm
link_budget_db DECIMAL(6,2),
-- Physical
connector connector_type,
fiber_type fiber_type,
duplex BOOLEAN DEFAULT TRUE,
breakout VARCHAR(50), -- e.g. "4x25G", "8x50G"
-- Environmental
temp_range temperature_range DEFAULT 'commercial',
temp_min_c DECIMAL(5,1),
temp_max_c DECIMAL(5,1),
power_consumption_w DECIMAL(6,2),
-- Monitoring
dom_support dom_type DEFAULT 'none',
-- OEM Cross-Reference
oem_part_number VARCHAR(200), -- original OEM part number
oem_vendor_id INT REFERENCES vendors(id),
-- Status
status transceiver_status DEFAULT 'active',
release_date DATE,
eol_date DATE,
-- Media
image_url TEXT,
datasheet_url TEXT,
product_url TEXT,
-- Metadata
tags TEXT[],
raw_specs JSONB, -- original scraped data
source VARCHAR(100), -- where this data came from
source_url TEXT,
last_verified TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(part_number, vendor_id)
);
-- ============================================================
-- PRICING (TimescaleDB hypertable)
-- ============================================================
CREATE TABLE prices (
time TIMESTAMPTZ NOT NULL,
transceiver_id INT NOT NULL REFERENCES transceivers(id),
vendor_id INT NOT NULL REFERENCES vendors(id),
price DECIMAL(12,4) NOT NULL,
currency price_currency DEFAULT 'USD',
price_usd DECIMAL(12,4), -- normalized to USD
quantity_min INT DEFAULT 1,
quantity_max INT,
in_stock BOOLEAN,
stock_quantity INT,
lead_time_days INT,
condition VARCHAR(20) DEFAULT 'new', -- new, refurbished, used
url TEXT,
source VARCHAR(100),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Make prices a TimescaleDB hypertable
SELECT create_hypertable('prices', 'time', if_not_exists => TRUE);
-- ============================================================
-- COMPATIBILITY
-- ============================================================
-- Switch/Router models
CREATE TABLE network_devices (
id SERIAL PRIMARY KEY,
vendor_id INT NOT NULL REFERENCES vendors(id),
model VARCHAR(200) NOT NULL,
series VARCHAR(100), -- Catalyst 9300, EX4400, etc.
device_type VARCHAR(50), -- switch, router, firewall, olt, media_converter
ports_sfp INT DEFAULT 0,
ports_sfp_plus INT DEFAULT 0,
ports_sfp28 INT DEFAULT 0,
ports_qsfp_plus INT DEFAULT 0,
ports_qsfp28 INT DEFAULT 0,
ports_qsfp_dd INT DEFAULT 0,
ports_osfp INT DEFAULT 0,
ports_cfp INT DEFAULT 0,
ports_rj45 INT DEFAULT 0,
max_throughput VARCHAR(50),
release_year INT,
eol_date DATE,
status VARCHAR(20) DEFAULT 'active',
image_url TEXT,
product_url TEXT,
manual_url TEXT,
raw_specs JSONB,
source VARCHAR(100),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(vendor_id, model)
);
-- Compatibility matrix
CREATE TABLE compatibility (
id SERIAL PRIMARY KEY,
transceiver_id INT NOT NULL REFERENCES transceivers(id),
device_id INT NOT NULL REFERENCES network_devices(id),
verified BOOLEAN DEFAULT FALSE, -- vendor-verified or community-tested
verified_by VARCHAR(100), -- vendor, community, lab
firmware_min VARCHAR(50),
firmware_max VARCHAR(50),
notes TEXT,
source VARCHAR(100), -- cisco_tmg, juniper_hct, community, etc.
source_url TEXT,
verified_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(transceiver_id, device_id)
);
-- ============================================================
-- KNOWLEDGE BASE
-- ============================================================
CREATE TABLE faq_articles (
id SERIAL PRIMARY KEY,
title VARCHAR(500) NOT NULL,
slug VARCHAR(500) NOT NULL UNIQUE,
content TEXT NOT NULL,
summary TEXT,
category VARCHAR(100),
tags TEXT[],
related_transceivers INT[],
related_devices INT[],
view_count INT DEFAULT 0,
helpful_count INT DEFAULT 0,
source VARCHAR(100),
source_url TEXT,
embedding_id VARCHAR(100), -- Qdrant point ID
published BOOLEAN DEFAULT TRUE,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- HYPE CYCLE ENGINE
-- ============================================================
CREATE TABLE hype_cycles (
id SERIAL PRIMARY KEY,
technology VARCHAR(200) NOT NULL, -- e.g. "QSFP-DD 400G", "Silicon Photonics"
form_factor_id INT REFERENCES form_factors(id),
-- Bass Model Parameters
bass_p DECIMAL(10,6), -- innovation coefficient
bass_q DECIMAL(10,6), -- imitation coefficient
bass_m BIGINT, -- market potential
current_phase hype_phase,
phase_started DATE,
predicted_peak DATE,
predicted_trough DATE,
predicted_plateau DATE,
-- Signals
adoption_units BIGINT,
market_size_usd BIGINT,
search_trend DECIMAL(5,2), -- Google Trends 0-100
patent_count INT,
paper_count INT,
news_sentiment DECIMAL(5,2), -- -1.0 to 1.0
confidence DECIMAL(5,2), -- model confidence 0-1
data_points JSONB, -- time series data
model_output JSONB, -- full model results
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- MEDIA / DOCUMENTS
-- ============================================================
CREATE TABLE media (
id SERIAL PRIMARY KEY,
transceiver_id INT REFERENCES transceivers(id),
device_id INT REFERENCES network_devices(id),
vendor_id INT REFERENCES vendors(id),
media_type media_type NOT NULL,
title VARCHAR(500),
url TEXT NOT NULL, -- original URL
r2_key VARCHAR(500), -- Cloudflare R2 key
r2_url TEXT, -- R2 public URL
mime_type VARCHAR(100),
file_size_bytes BIGINT,
width_px INT,
height_px INT,
ocr_text TEXT, -- extracted text (Docling)
embedding_id VARCHAR(100), -- Qdrant point ID
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- CRAWL TRACKING
-- ============================================================
CREATE TABLE crawl_jobs (
id SERIAL PRIMARY KEY,
crawler VARCHAR(100) NOT NULL, -- fscom, cisco_tmg, ebay, etc.
status crawl_status DEFAULT 'pending',
urls_total INT DEFAULT 0,
urls_processed INT DEFAULT 0,
urls_failed INT DEFAULT 0,
items_found INT DEFAULT 0,
items_new INT DEFAULT 0,
items_updated INT DEFAULT 0,
error_message TEXT,
duration_ms INT,
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE crawl_errors (
id SERIAL PRIMARY KEY,
job_id INT REFERENCES crawl_jobs(id),
url TEXT,
error_code VARCHAR(20),
error_message TEXT,
retry_count INT DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- NEWS / BLOG
-- ============================================================
CREATE TABLE news_articles (
id SERIAL PRIMARY KEY,
title VARCHAR(500) NOT NULL,
url TEXT NOT NULL UNIQUE,
source VARCHAR(100),
author VARCHAR(200),
content TEXT,
summary TEXT,
tags TEXT[],
mentioned_technologies TEXT[],
sentiment DECIMAL(5,2),
published_at TIMESTAMPTZ,
scraped_at TIMESTAMPTZ DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- INDEXES
-- ============================================================
-- Transceivers
CREATE INDEX idx_transceivers_vendor ON transceivers(vendor_id);
CREATE INDEX idx_transceivers_form_factor ON transceivers(form_factor_id);
CREATE INDEX idx_transceivers_category ON transceivers(category);
CREATE INDEX idx_transceivers_data_rate ON transceivers(data_rate);
CREATE INDEX idx_transceivers_wavelength ON transceivers(wavelength_nm);
CREATE INDEX idx_transceivers_status ON transceivers(status);
CREATE INDEX idx_transceivers_part_number_gin ON transceivers USING gin(part_number gin_trgm_ops);
CREATE INDEX idx_transceivers_name_gin ON transceivers USING gin(name gin_trgm_ops);
CREATE INDEX idx_transceivers_tags ON transceivers USING gin(tags);
CREATE INDEX idx_transceivers_oem ON transceivers(oem_part_number) WHERE oem_part_number IS NOT NULL;
-- Prices
CREATE INDEX idx_prices_transceiver ON prices(transceiver_id, time DESC);
CREATE INDEX idx_prices_vendor ON prices(vendor_id, time DESC);
-- Compatibility
CREATE INDEX idx_compat_transceiver ON compatibility(transceiver_id);
CREATE INDEX idx_compat_device ON compatibility(device_id);
-- Devices
CREATE INDEX idx_devices_vendor ON network_devices(vendor_id);
CREATE INDEX idx_devices_model_gin ON network_devices USING gin(model gin_trgm_ops);
-- FAQ
CREATE INDEX idx_faq_tags ON faq_articles USING gin(tags);
CREATE INDEX idx_faq_content_gin ON faq_articles USING gin(content gin_trgm_ops);
-- Media
CREATE INDEX idx_media_transceiver ON media(transceiver_id);
CREATE INDEX idx_media_type ON media(media_type);
-- Crawl
CREATE INDEX idx_crawl_jobs_status ON crawl_jobs(status);
CREATE INDEX idx_crawl_jobs_crawler ON crawl_jobs(crawler);