- PostgreSQL 17 + TimescaleDB schema with 12 tables - 48 standards (IEEE, SFF, ITU-T, OIF, MSA) - 33 form factors (SFP through OSFP-XD/CPO) - 85+ vendors (OEM, compatible, manufacturers, marketplaces) - 80+ seed transceivers (1G-1.6T, CWDM, BiDi, DAC, AOC, FC, PON) - 60+ network devices (Cisco, Juniper, Arista, HPE, Dell, etc.) - Crawler framework with fs.com and eBay crawlers - REST API (15 endpoints) on port 3200 - MCP server (12 tools) on port 3201 - PM2 ecosystem for production deployment on Erik (.82)
455 lines
15 KiB
SQL
455 lines
15 KiB
SQL
-- TIP Foundation Schema
|
|
-- PostgreSQL 17 + TimescaleDB
|
|
|
|
-- Enable extensions
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- fuzzy text search
|
|
CREATE EXTENSION IF NOT EXISTS "btree_gin"; -- GIN index support
|
|
|
|
-- ============================================================
|
|
-- ENUMS
|
|
-- ============================================================
|
|
|
|
CREATE TYPE transceiver_status AS ENUM ('active', 'eol', 'pre_release', 'nrnd', 'unknown');
|
|
CREATE TYPE data_rate_unit AS ENUM ('Mbps', 'Gbps', 'Tbps');
|
|
CREATE TYPE reach_unit AS ENUM ('m', 'km');
|
|
CREATE TYPE temperature_range AS ENUM ('commercial', 'extended', 'industrial');
|
|
CREATE TYPE dom_type AS ENUM ('none', 'ddm', 'ddmi', 'cmis', 'sff8472', 'sff8636');
|
|
CREATE TYPE connector_type AS ENUM (
|
|
'LC', 'SC', 'MPO-12', 'MPO-16', 'MPO-24', 'CS', 'SN',
|
|
'FC', 'ST', 'MTRJ', 'E2000', 'copper_rj45', 'cx4',
|
|
'dac_passive', 'dac_active', 'aoc', 'none', 'other'
|
|
);
|
|
CREATE TYPE fiber_type AS ENUM (
|
|
'smf', 'mmf_om1', 'mmf_om2', 'mmf_om3', 'mmf_om4', 'mmf_om5',
|
|
'copper', 'dac', 'aoc', 'free_space', 'other'
|
|
);
|
|
CREATE TYPE wavelength_band AS ENUM (
|
|
'O', 'E', 'S', 'C', 'L', 'U', 'visible', 'cwdm', 'dwdm', 'lwdm', 'swdm', 'other'
|
|
);
|
|
CREATE TYPE vendor_type AS ENUM (
|
|
'oem', 'compatible', 'distributor', 'manufacturer', 'marketplace', 'refurbished'
|
|
);
|
|
CREATE TYPE price_currency AS ENUM (
|
|
'USD', 'EUR', 'GBP', 'CNY', 'JPY', 'KRW', 'TWD', 'THB', 'INR', 'CAD', 'AUD'
|
|
);
|
|
CREATE TYPE hype_phase AS ENUM (
|
|
'innovation_trigger', 'peak_inflated', 'trough_disillusionment',
|
|
'slope_enlightenment', 'plateau_productivity', 'decline'
|
|
);
|
|
CREATE TYPE crawl_status AS ENUM ('pending', 'running', 'success', 'failed', 'rate_limited');
|
|
CREATE TYPE media_type AS ENUM ('image', 'datasheet', 'manual', 'diagram', 'video', 'certificate');
|
|
|
|
-- ============================================================
|
|
-- CORE TABLES
|
|
-- ============================================================
|
|
|
|
-- Standards (IEEE, SFF, ITU-T, OIF, etc.)
|
|
CREATE TABLE standards (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(100) NOT NULL UNIQUE,
|
|
body VARCHAR(50) NOT NULL, -- IEEE, SNIA/SFF, ITU-T, OIF, MSA
|
|
version VARCHAR(50),
|
|
year INT,
|
|
url TEXT,
|
|
description TEXT,
|
|
superseded_by INT REFERENCES standards(id),
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Form Factors
|
|
CREATE TABLE form_factors (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(50) NOT NULL UNIQUE,
|
|
full_name VARCHAR(200),
|
|
standard_id INT REFERENCES standards(id),
|
|
lanes INT, -- electrical lanes
|
|
max_data_rate DECIMAL(10,2),
|
|
data_rate_unit data_rate_unit DEFAULT 'Gbps',
|
|
width_mm DECIMAL(6,2),
|
|
height_mm DECIMAL(6,2),
|
|
depth_mm DECIMAL(6,2),
|
|
power_max_w DECIMAL(6,2),
|
|
generation INT, -- for hype cycle
|
|
release_year INT,
|
|
eol_year INT,
|
|
description TEXT,
|
|
image_url TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Vendors / Manufacturers / Sellers
|
|
CREATE TABLE vendors (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(200) NOT NULL,
|
|
slug VARCHAR(200) NOT NULL UNIQUE,
|
|
vendor_type vendor_type NOT NULL DEFAULT 'compatible',
|
|
website TEXT,
|
|
logo_url TEXT,
|
|
country VARCHAR(3), -- ISO 3166-1 alpha-3
|
|
founded_year INT,
|
|
description TEXT,
|
|
is_oem BOOLEAN DEFAULT FALSE, -- Cisco, Juniper, Arista, etc.
|
|
is_factory BOOLEAN DEFAULT FALSE, -- Hisense, Innolight, etc.
|
|
aliases TEXT[], -- alternative names
|
|
scrape_url TEXT, -- catalog base URL
|
|
scrape_enabled BOOLEAN DEFAULT FALSE,
|
|
scrape_interval INT DEFAULT 86400, -- seconds
|
|
last_scraped_at TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Core Transceiver Table
|
|
CREATE TABLE transceivers (
|
|
id SERIAL PRIMARY KEY,
|
|
part_number VARCHAR(200) NOT NULL,
|
|
vendor_id INT NOT NULL REFERENCES vendors(id),
|
|
form_factor_id INT REFERENCES form_factors(id),
|
|
|
|
-- Classification
|
|
name VARCHAR(500),
|
|
description TEXT,
|
|
category VARCHAR(100), -- SFP, SFP+, QSFP28, QSFP-DD, OSFP, etc.
|
|
subcategory VARCHAR(100), -- SR, LR, ER, ZR, BiDi, CWDM, DWDM, DAC, AOC
|
|
|
|
-- Performance
|
|
data_rate DECIMAL(10,2),
|
|
data_rate_unit data_rate_unit DEFAULT 'Gbps',
|
|
max_reach DECIMAL(10,2),
|
|
reach_unit reach_unit DEFAULT 'km',
|
|
|
|
-- Optical
|
|
wavelength_nm DECIMAL(8,2), -- TX wavelength
|
|
wavelength_rx DECIMAL(8,2), -- RX wavelength (BiDi)
|
|
wavelengths DECIMAL(8,2)[], -- CWDM/DWDM channels
|
|
wavelength_band wavelength_band,
|
|
tx_power_min DECIMAL(6,2), -- dBm
|
|
tx_power_max DECIMAL(6,2),
|
|
rx_sensitivity DECIMAL(6,2), -- dBm
|
|
link_budget_db DECIMAL(6,2),
|
|
|
|
-- Physical
|
|
connector connector_type,
|
|
fiber_type fiber_type,
|
|
duplex BOOLEAN DEFAULT TRUE,
|
|
breakout VARCHAR(50), -- e.g. "4x25G", "8x50G"
|
|
|
|
-- Environmental
|
|
temp_range temperature_range DEFAULT 'commercial',
|
|
temp_min_c DECIMAL(5,1),
|
|
temp_max_c DECIMAL(5,1),
|
|
power_consumption_w DECIMAL(6,2),
|
|
|
|
-- Monitoring
|
|
dom_support dom_type DEFAULT 'none',
|
|
|
|
-- OEM Cross-Reference
|
|
oem_part_number VARCHAR(200), -- original OEM part number
|
|
oem_vendor_id INT REFERENCES vendors(id),
|
|
|
|
-- Status
|
|
status transceiver_status DEFAULT 'active',
|
|
release_date DATE,
|
|
eol_date DATE,
|
|
|
|
-- Media
|
|
image_url TEXT,
|
|
datasheet_url TEXT,
|
|
product_url TEXT,
|
|
|
|
-- Metadata
|
|
tags TEXT[],
|
|
raw_specs JSONB, -- original scraped data
|
|
source VARCHAR(100), -- where this data came from
|
|
source_url TEXT,
|
|
last_verified TIMESTAMPTZ,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
|
|
UNIQUE(part_number, vendor_id)
|
|
);
|
|
|
|
-- ============================================================
|
|
-- PRICING (TimescaleDB hypertable)
|
|
-- ============================================================
|
|
|
|
CREATE TABLE prices (
|
|
time TIMESTAMPTZ NOT NULL,
|
|
transceiver_id INT NOT NULL REFERENCES transceivers(id),
|
|
vendor_id INT NOT NULL REFERENCES vendors(id),
|
|
|
|
price DECIMAL(12,4) NOT NULL,
|
|
currency price_currency DEFAULT 'USD',
|
|
price_usd DECIMAL(12,4), -- normalized to USD
|
|
|
|
quantity_min INT DEFAULT 1,
|
|
quantity_max INT,
|
|
in_stock BOOLEAN,
|
|
stock_quantity INT,
|
|
lead_time_days INT,
|
|
|
|
condition VARCHAR(20) DEFAULT 'new', -- new, refurbished, used
|
|
url TEXT,
|
|
source VARCHAR(100),
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Make prices a TimescaleDB hypertable
|
|
SELECT create_hypertable('prices', 'time', if_not_exists => TRUE);
|
|
|
|
-- ============================================================
|
|
-- COMPATIBILITY
|
|
-- ============================================================
|
|
|
|
-- Switch/Router models
|
|
CREATE TABLE network_devices (
|
|
id SERIAL PRIMARY KEY,
|
|
vendor_id INT NOT NULL REFERENCES vendors(id),
|
|
model VARCHAR(200) NOT NULL,
|
|
series VARCHAR(100), -- Catalyst 9300, EX4400, etc.
|
|
device_type VARCHAR(50), -- switch, router, firewall, olt, media_converter
|
|
|
|
ports_sfp INT DEFAULT 0,
|
|
ports_sfp_plus INT DEFAULT 0,
|
|
ports_sfp28 INT DEFAULT 0,
|
|
ports_qsfp_plus INT DEFAULT 0,
|
|
ports_qsfp28 INT DEFAULT 0,
|
|
ports_qsfp_dd INT DEFAULT 0,
|
|
ports_osfp INT DEFAULT 0,
|
|
ports_cfp INT DEFAULT 0,
|
|
ports_rj45 INT DEFAULT 0,
|
|
|
|
max_throughput VARCHAR(50),
|
|
release_year INT,
|
|
eol_date DATE,
|
|
status VARCHAR(20) DEFAULT 'active',
|
|
|
|
image_url TEXT,
|
|
product_url TEXT,
|
|
manual_url TEXT,
|
|
|
|
raw_specs JSONB,
|
|
source VARCHAR(100),
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
|
|
UNIQUE(vendor_id, model)
|
|
);
|
|
|
|
-- Compatibility matrix
|
|
CREATE TABLE compatibility (
|
|
id SERIAL PRIMARY KEY,
|
|
transceiver_id INT NOT NULL REFERENCES transceivers(id),
|
|
device_id INT NOT NULL REFERENCES network_devices(id),
|
|
|
|
verified BOOLEAN DEFAULT FALSE, -- vendor-verified or community-tested
|
|
verified_by VARCHAR(100), -- vendor, community, lab
|
|
firmware_min VARCHAR(50),
|
|
firmware_max VARCHAR(50),
|
|
notes TEXT,
|
|
|
|
source VARCHAR(100), -- cisco_tmg, juniper_hct, community, etc.
|
|
source_url TEXT,
|
|
verified_at TIMESTAMPTZ,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
|
|
UNIQUE(transceiver_id, device_id)
|
|
);
|
|
|
|
-- ============================================================
|
|
-- KNOWLEDGE BASE
|
|
-- ============================================================
|
|
|
|
CREATE TABLE faq_articles (
|
|
id SERIAL PRIMARY KEY,
|
|
title VARCHAR(500) NOT NULL,
|
|
slug VARCHAR(500) NOT NULL UNIQUE,
|
|
content TEXT NOT NULL,
|
|
summary TEXT,
|
|
category VARCHAR(100),
|
|
tags TEXT[],
|
|
|
|
related_transceivers INT[],
|
|
related_devices INT[],
|
|
|
|
view_count INT DEFAULT 0,
|
|
helpful_count INT DEFAULT 0,
|
|
|
|
source VARCHAR(100),
|
|
source_url TEXT,
|
|
embedding_id VARCHAR(100), -- Qdrant point ID
|
|
|
|
published BOOLEAN DEFAULT TRUE,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- HYPE CYCLE ENGINE
|
|
-- ============================================================
|
|
|
|
CREATE TABLE hype_cycles (
|
|
id SERIAL PRIMARY KEY,
|
|
technology VARCHAR(200) NOT NULL, -- e.g. "QSFP-DD 400G", "Silicon Photonics"
|
|
form_factor_id INT REFERENCES form_factors(id),
|
|
|
|
-- Bass Model Parameters
|
|
bass_p DECIMAL(10,6), -- innovation coefficient
|
|
bass_q DECIMAL(10,6), -- imitation coefficient
|
|
bass_m BIGINT, -- market potential
|
|
|
|
current_phase hype_phase,
|
|
phase_started DATE,
|
|
predicted_peak DATE,
|
|
predicted_trough DATE,
|
|
predicted_plateau DATE,
|
|
|
|
-- Signals
|
|
adoption_units BIGINT,
|
|
market_size_usd BIGINT,
|
|
search_trend DECIMAL(5,2), -- Google Trends 0-100
|
|
patent_count INT,
|
|
paper_count INT,
|
|
news_sentiment DECIMAL(5,2), -- -1.0 to 1.0
|
|
|
|
confidence DECIMAL(5,2), -- model confidence 0-1
|
|
|
|
data_points JSONB, -- time series data
|
|
model_output JSONB, -- full model results
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- MEDIA / DOCUMENTS
|
|
-- ============================================================
|
|
|
|
CREATE TABLE media (
|
|
id SERIAL PRIMARY KEY,
|
|
transceiver_id INT REFERENCES transceivers(id),
|
|
device_id INT REFERENCES network_devices(id),
|
|
vendor_id INT REFERENCES vendors(id),
|
|
|
|
media_type media_type NOT NULL,
|
|
title VARCHAR(500),
|
|
url TEXT NOT NULL, -- original URL
|
|
r2_key VARCHAR(500), -- Cloudflare R2 key
|
|
r2_url TEXT, -- R2 public URL
|
|
|
|
mime_type VARCHAR(100),
|
|
file_size_bytes BIGINT,
|
|
width_px INT,
|
|
height_px INT,
|
|
|
|
ocr_text TEXT, -- extracted text (Docling)
|
|
embedding_id VARCHAR(100), -- Qdrant point ID
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- CRAWL TRACKING
|
|
-- ============================================================
|
|
|
|
CREATE TABLE crawl_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
crawler VARCHAR(100) NOT NULL, -- fscom, cisco_tmg, ebay, etc.
|
|
status crawl_status DEFAULT 'pending',
|
|
|
|
urls_total INT DEFAULT 0,
|
|
urls_processed INT DEFAULT 0,
|
|
urls_failed INT DEFAULT 0,
|
|
items_found INT DEFAULT 0,
|
|
items_new INT DEFAULT 0,
|
|
items_updated INT DEFAULT 0,
|
|
|
|
error_message TEXT,
|
|
duration_ms INT,
|
|
|
|
started_at TIMESTAMPTZ,
|
|
finished_at TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE TABLE crawl_errors (
|
|
id SERIAL PRIMARY KEY,
|
|
job_id INT REFERENCES crawl_jobs(id),
|
|
url TEXT,
|
|
error_code VARCHAR(20),
|
|
error_message TEXT,
|
|
retry_count INT DEFAULT 0,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- NEWS / BLOG
|
|
-- ============================================================
|
|
|
|
CREATE TABLE news_articles (
|
|
id SERIAL PRIMARY KEY,
|
|
title VARCHAR(500) NOT NULL,
|
|
url TEXT NOT NULL UNIQUE,
|
|
source VARCHAR(100),
|
|
author VARCHAR(200),
|
|
|
|
content TEXT,
|
|
summary TEXT,
|
|
|
|
tags TEXT[],
|
|
mentioned_technologies TEXT[],
|
|
sentiment DECIMAL(5,2),
|
|
|
|
published_at TIMESTAMPTZ,
|
|
scraped_at TIMESTAMPTZ DEFAULT NOW(),
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- INDEXES
|
|
-- ============================================================
|
|
|
|
-- Transceivers
|
|
CREATE INDEX idx_transceivers_vendor ON transceivers(vendor_id);
|
|
CREATE INDEX idx_transceivers_form_factor ON transceivers(form_factor_id);
|
|
CREATE INDEX idx_transceivers_category ON transceivers(category);
|
|
CREATE INDEX idx_transceivers_data_rate ON transceivers(data_rate);
|
|
CREATE INDEX idx_transceivers_wavelength ON transceivers(wavelength_nm);
|
|
CREATE INDEX idx_transceivers_status ON transceivers(status);
|
|
CREATE INDEX idx_transceivers_part_number_gin ON transceivers USING gin(part_number gin_trgm_ops);
|
|
CREATE INDEX idx_transceivers_name_gin ON transceivers USING gin(name gin_trgm_ops);
|
|
CREATE INDEX idx_transceivers_tags ON transceivers USING gin(tags);
|
|
CREATE INDEX idx_transceivers_oem ON transceivers(oem_part_number) WHERE oem_part_number IS NOT NULL;
|
|
|
|
-- Prices
|
|
CREATE INDEX idx_prices_transceiver ON prices(transceiver_id, time DESC);
|
|
CREATE INDEX idx_prices_vendor ON prices(vendor_id, time DESC);
|
|
|
|
-- Compatibility
|
|
CREATE INDEX idx_compat_transceiver ON compatibility(transceiver_id);
|
|
CREATE INDEX idx_compat_device ON compatibility(device_id);
|
|
|
|
-- Devices
|
|
CREATE INDEX idx_devices_vendor ON network_devices(vendor_id);
|
|
CREATE INDEX idx_devices_model_gin ON network_devices USING gin(model gin_trgm_ops);
|
|
|
|
-- FAQ
|
|
CREATE INDEX idx_faq_tags ON faq_articles USING gin(tags);
|
|
CREATE INDEX idx_faq_content_gin ON faq_articles USING gin(content gin_trgm_ops);
|
|
|
|
-- Media
|
|
CREATE INDEX idx_media_transceiver ON media(transceiver_id);
|
|
CREATE INDEX idx_media_type ON media(media_type);
|
|
|
|
-- Crawl
|
|
CREATE INDEX idx_crawl_jobs_status ON crawl_jobs(status);
|
|
CREATE INDEX idx_crawl_jobs_crawler ON crawl_jobs(crawler);
|