-- TIP Foundation Schema -- PostgreSQL 17 + TimescaleDB -- Enable extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- fuzzy text search CREATE EXTENSION IF NOT EXISTS "btree_gin"; -- GIN index support -- ============================================================ -- ENUMS -- ============================================================ CREATE TYPE transceiver_status AS ENUM ('active', 'eol', 'pre_release', 'nrnd', 'unknown'); CREATE TYPE data_rate_unit AS ENUM ('Mbps', 'Gbps', 'Tbps'); CREATE TYPE reach_unit AS ENUM ('m', 'km'); CREATE TYPE temperature_range AS ENUM ('commercial', 'extended', 'industrial'); CREATE TYPE dom_type AS ENUM ('none', 'ddm', 'ddmi', 'cmis', 'sff8472', 'sff8636'); CREATE TYPE connector_type AS ENUM ( 'LC', 'SC', 'MPO-12', 'MPO-16', 'MPO-24', 'CS', 'SN', 'FC', 'ST', 'MTRJ', 'E2000', 'copper_rj45', 'cx4', 'dac_passive', 'dac_active', 'aoc', 'none', 'other' ); CREATE TYPE fiber_type AS ENUM ( 'smf', 'mmf_om1', 'mmf_om2', 'mmf_om3', 'mmf_om4', 'mmf_om5', 'copper', 'dac', 'aoc', 'free_space', 'other' ); CREATE TYPE wavelength_band AS ENUM ( 'O', 'E', 'S', 'C', 'L', 'U', 'visible', 'cwdm', 'dwdm', 'lwdm', 'swdm', 'other' ); CREATE TYPE vendor_type AS ENUM ( 'oem', 'compatible', 'distributor', 'manufacturer', 'marketplace', 'refurbished' ); CREATE TYPE price_currency AS ENUM ( 'USD', 'EUR', 'GBP', 'CNY', 'JPY', 'KRW', 'TWD', 'THB', 'INR', 'CAD', 'AUD' ); CREATE TYPE hype_phase AS ENUM ( 'innovation_trigger', 'peak_inflated', 'trough_disillusionment', 'slope_enlightenment', 'plateau_productivity', 'decline' ); CREATE TYPE crawl_status AS ENUM ('pending', 'running', 'success', 'failed', 'rate_limited'); CREATE TYPE media_type AS ENUM ('image', 'datasheet', 'manual', 'diagram', 'video', 'certificate'); -- ============================================================ -- CORE TABLES -- ============================================================ -- Standards (IEEE, SFF, ITU-T, OIF, etc.) CREATE TABLE standards ( id SERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL UNIQUE, body VARCHAR(50) NOT NULL, -- IEEE, SNIA/SFF, ITU-T, OIF, MSA version VARCHAR(50), year INT, url TEXT, description TEXT, superseded_by INT REFERENCES standards(id), created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- Form Factors CREATE TABLE form_factors ( id SERIAL PRIMARY KEY, name VARCHAR(50) NOT NULL UNIQUE, full_name VARCHAR(200), standard_id INT REFERENCES standards(id), lanes INT, -- electrical lanes max_data_rate DECIMAL(10,2), data_rate_unit data_rate_unit DEFAULT 'Gbps', width_mm DECIMAL(6,2), height_mm DECIMAL(6,2), depth_mm DECIMAL(6,2), power_max_w DECIMAL(6,2), generation INT, -- for hype cycle release_year INT, eol_year INT, description TEXT, image_url TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- Vendors / Manufacturers / Sellers CREATE TABLE vendors ( id SERIAL PRIMARY KEY, name VARCHAR(200) NOT NULL, slug VARCHAR(200) NOT NULL UNIQUE, vendor_type vendor_type NOT NULL DEFAULT 'compatible', website TEXT, logo_url TEXT, country VARCHAR(3), -- ISO 3166-1 alpha-3 founded_year INT, description TEXT, is_oem BOOLEAN DEFAULT FALSE, -- Cisco, Juniper, Arista, etc. is_factory BOOLEAN DEFAULT FALSE, -- Hisense, Innolight, etc. aliases TEXT[], -- alternative names scrape_url TEXT, -- catalog base URL scrape_enabled BOOLEAN DEFAULT FALSE, scrape_interval INT DEFAULT 86400, -- seconds last_scraped_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- Core Transceiver Table CREATE TABLE transceivers ( id SERIAL PRIMARY KEY, part_number VARCHAR(200) NOT NULL, vendor_id INT NOT NULL REFERENCES vendors(id), form_factor_id INT REFERENCES form_factors(id), -- Classification name VARCHAR(500), description TEXT, category VARCHAR(100), -- SFP, SFP+, QSFP28, QSFP-DD, OSFP, etc. subcategory VARCHAR(100), -- SR, LR, ER, ZR, BiDi, CWDM, DWDM, DAC, AOC -- Performance data_rate DECIMAL(10,2), data_rate_unit data_rate_unit DEFAULT 'Gbps', max_reach DECIMAL(10,2), reach_unit reach_unit DEFAULT 'km', -- Optical wavelength_nm DECIMAL(8,2), -- TX wavelength wavelength_rx DECIMAL(8,2), -- RX wavelength (BiDi) wavelengths DECIMAL(8,2)[], -- CWDM/DWDM channels wavelength_band wavelength_band, tx_power_min DECIMAL(6,2), -- dBm tx_power_max DECIMAL(6,2), rx_sensitivity DECIMAL(6,2), -- dBm link_budget_db DECIMAL(6,2), -- Physical connector connector_type, fiber_type fiber_type, duplex BOOLEAN DEFAULT TRUE, breakout VARCHAR(50), -- e.g. "4x25G", "8x50G" -- Environmental temp_range temperature_range DEFAULT 'commercial', temp_min_c DECIMAL(5,1), temp_max_c DECIMAL(5,1), power_consumption_w DECIMAL(6,2), -- Monitoring dom_support dom_type DEFAULT 'none', -- OEM Cross-Reference oem_part_number VARCHAR(200), -- original OEM part number oem_vendor_id INT REFERENCES vendors(id), -- Status status transceiver_status DEFAULT 'active', release_date DATE, eol_date DATE, -- Media image_url TEXT, datasheet_url TEXT, product_url TEXT, -- Metadata tags TEXT[], raw_specs JSONB, -- original scraped data source VARCHAR(100), -- where this data came from source_url TEXT, last_verified TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(part_number, vendor_id) ); -- ============================================================ -- PRICING (TimescaleDB hypertable) -- ============================================================ CREATE TABLE prices ( time TIMESTAMPTZ NOT NULL, transceiver_id INT NOT NULL REFERENCES transceivers(id), vendor_id INT NOT NULL REFERENCES vendors(id), price DECIMAL(12,4) NOT NULL, currency price_currency DEFAULT 'USD', price_usd DECIMAL(12,4), -- normalized to USD quantity_min INT DEFAULT 1, quantity_max INT, in_stock BOOLEAN, stock_quantity INT, lead_time_days INT, condition VARCHAR(20) DEFAULT 'new', -- new, refurbished, used url TEXT, source VARCHAR(100), created_at TIMESTAMPTZ DEFAULT NOW() ); -- Make prices a TimescaleDB hypertable SELECT create_hypertable('prices', 'time', if_not_exists => TRUE); -- ============================================================ -- COMPATIBILITY -- ============================================================ -- Switch/Router models CREATE TABLE network_devices ( id SERIAL PRIMARY KEY, vendor_id INT NOT NULL REFERENCES vendors(id), model VARCHAR(200) NOT NULL, series VARCHAR(100), -- Catalyst 9300, EX4400, etc. device_type VARCHAR(50), -- switch, router, firewall, olt, media_converter ports_sfp INT DEFAULT 0, ports_sfp_plus INT DEFAULT 0, ports_sfp28 INT DEFAULT 0, ports_qsfp_plus INT DEFAULT 0, ports_qsfp28 INT DEFAULT 0, ports_qsfp_dd INT DEFAULT 0, ports_osfp INT DEFAULT 0, ports_cfp INT DEFAULT 0, ports_rj45 INT DEFAULT 0, max_throughput VARCHAR(50), release_year INT, eol_date DATE, status VARCHAR(20) DEFAULT 'active', image_url TEXT, product_url TEXT, manual_url TEXT, raw_specs JSONB, source VARCHAR(100), created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(vendor_id, model) ); -- Compatibility matrix CREATE TABLE compatibility ( id SERIAL PRIMARY KEY, transceiver_id INT NOT NULL REFERENCES transceivers(id), device_id INT NOT NULL REFERENCES network_devices(id), verified BOOLEAN DEFAULT FALSE, -- vendor-verified or community-tested verified_by VARCHAR(100), -- vendor, community, lab firmware_min VARCHAR(50), firmware_max VARCHAR(50), notes TEXT, source VARCHAR(100), -- cisco_tmg, juniper_hct, community, etc. source_url TEXT, verified_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(transceiver_id, device_id) ); -- ============================================================ -- KNOWLEDGE BASE -- ============================================================ CREATE TABLE faq_articles ( id SERIAL PRIMARY KEY, title VARCHAR(500) NOT NULL, slug VARCHAR(500) NOT NULL UNIQUE, content TEXT NOT NULL, summary TEXT, category VARCHAR(100), tags TEXT[], related_transceivers INT[], related_devices INT[], view_count INT DEFAULT 0, helpful_count INT DEFAULT 0, source VARCHAR(100), source_url TEXT, embedding_id VARCHAR(100), -- Qdrant point ID published BOOLEAN DEFAULT TRUE, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- ============================================================ -- HYPE CYCLE ENGINE -- ============================================================ CREATE TABLE hype_cycles ( id SERIAL PRIMARY KEY, technology VARCHAR(200) NOT NULL, -- e.g. "QSFP-DD 400G", "Silicon Photonics" form_factor_id INT REFERENCES form_factors(id), -- Bass Model Parameters bass_p DECIMAL(10,6), -- innovation coefficient bass_q DECIMAL(10,6), -- imitation coefficient bass_m BIGINT, -- market potential current_phase hype_phase, phase_started DATE, predicted_peak DATE, predicted_trough DATE, predicted_plateau DATE, -- Signals adoption_units BIGINT, market_size_usd BIGINT, search_trend DECIMAL(5,2), -- Google Trends 0-100 patent_count INT, paper_count INT, news_sentiment DECIMAL(5,2), -- -1.0 to 1.0 confidence DECIMAL(5,2), -- model confidence 0-1 data_points JSONB, -- time series data model_output JSONB, -- full model results created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- ============================================================ -- MEDIA / DOCUMENTS -- ============================================================ CREATE TABLE media ( id SERIAL PRIMARY KEY, transceiver_id INT REFERENCES transceivers(id), device_id INT REFERENCES network_devices(id), vendor_id INT REFERENCES vendors(id), media_type media_type NOT NULL, title VARCHAR(500), url TEXT NOT NULL, -- original URL r2_key VARCHAR(500), -- Cloudflare R2 key r2_url TEXT, -- R2 public URL mime_type VARCHAR(100), file_size_bytes BIGINT, width_px INT, height_px INT, ocr_text TEXT, -- extracted text (Docling) embedding_id VARCHAR(100), -- Qdrant point ID created_at TIMESTAMPTZ DEFAULT NOW() ); -- ============================================================ -- CRAWL TRACKING -- ============================================================ CREATE TABLE crawl_jobs ( id SERIAL PRIMARY KEY, crawler VARCHAR(100) NOT NULL, -- fscom, cisco_tmg, ebay, etc. status crawl_status DEFAULT 'pending', urls_total INT DEFAULT 0, urls_processed INT DEFAULT 0, urls_failed INT DEFAULT 0, items_found INT DEFAULT 0, items_new INT DEFAULT 0, items_updated INT DEFAULT 0, error_message TEXT, duration_ms INT, started_at TIMESTAMPTZ, finished_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE TABLE crawl_errors ( id SERIAL PRIMARY KEY, job_id INT REFERENCES crawl_jobs(id), url TEXT, error_code VARCHAR(20), error_message TEXT, retry_count INT DEFAULT 0, created_at TIMESTAMPTZ DEFAULT NOW() ); -- ============================================================ -- NEWS / BLOG -- ============================================================ CREATE TABLE news_articles ( id SERIAL PRIMARY KEY, title VARCHAR(500) NOT NULL, url TEXT NOT NULL UNIQUE, source VARCHAR(100), author VARCHAR(200), content TEXT, summary TEXT, tags TEXT[], mentioned_technologies TEXT[], sentiment DECIMAL(5,2), published_at TIMESTAMPTZ, scraped_at TIMESTAMPTZ DEFAULT NOW(), created_at TIMESTAMPTZ DEFAULT NOW() ); -- ============================================================ -- INDEXES -- ============================================================ -- Transceivers CREATE INDEX idx_transceivers_vendor ON transceivers(vendor_id); CREATE INDEX idx_transceivers_form_factor ON transceivers(form_factor_id); CREATE INDEX idx_transceivers_category ON transceivers(category); CREATE INDEX idx_transceivers_data_rate ON transceivers(data_rate); CREATE INDEX idx_transceivers_wavelength ON transceivers(wavelength_nm); CREATE INDEX idx_transceivers_status ON transceivers(status); CREATE INDEX idx_transceivers_part_number_gin ON transceivers USING gin(part_number gin_trgm_ops); CREATE INDEX idx_transceivers_name_gin ON transceivers USING gin(name gin_trgm_ops); CREATE INDEX idx_transceivers_tags ON transceivers USING gin(tags); CREATE INDEX idx_transceivers_oem ON transceivers(oem_part_number) WHERE oem_part_number IS NOT NULL; -- Prices CREATE INDEX idx_prices_transceiver ON prices(transceiver_id, time DESC); CREATE INDEX idx_prices_vendor ON prices(vendor_id, time DESC); -- Compatibility CREATE INDEX idx_compat_transceiver ON compatibility(transceiver_id); CREATE INDEX idx_compat_device ON compatibility(device_id); -- Devices CREATE INDEX idx_devices_vendor ON network_devices(vendor_id); CREATE INDEX idx_devices_model_gin ON network_devices USING gin(model gin_trgm_ops); -- FAQ CREATE INDEX idx_faq_tags ON faq_articles USING gin(tags); CREATE INDEX idx_faq_content_gin ON faq_articles USING gin(content gin_trgm_ops); -- Media CREATE INDEX idx_media_transceiver ON media(transceiver_id); CREATE INDEX idx_media_type ON media(media_type); -- Crawl CREATE INDEX idx_crawl_jobs_status ON crawl_jobs(status); CREATE INDEX idx_crawl_jobs_crawler ON crawl_jobs(crawler);