transceiver-db/sql/002-core-tables.sql
Rene Fichtmueller b43bdd3060 feat: TIP Phase 0+1 — monorepo, DB schema, API, scraper engine
Phase 0 - Foundation:
- Restructure into npm workspace monorepo (packages/core, api, scraper)
- PostgreSQL 17 + TimescaleDB schema (15 tables incl. hypertables)
- Docker Compose for local dev (PostgreSQL on 5433 + Qdrant)
- Express 5 API on port 3200 with 6 routes
- Seed script to migrate 159 transceivers + 42 standards from npm package
- Erik server setup script + PM2 ecosystem config

Phase 1 - Scraper Engine:
- Crawlee + Playwright framework with pg-boss scheduler
- FS.com scraper (PlaywrightCrawler, anti-bot workaround)
- Optcore.net scraper (WP REST API enumeration + PlaywrightCrawler)
  - Uses /wp-json/wp/v2/product to get 2000+ product URLs
  - Playwright renders individual product pages for price extraction
- Cisco TMG Matrix scraper (compatibility data)
- News RSS aggregator (optics.org, SPIE, Network World, Nature Photonics)
  - Keyword relevance scoring for transceiver/fiber topics
  - xml2js with malformed XML sanitization
- SHA-256 content hashing for change detection (skip unchanged records)
- pg-boss v10 with explicit queue creation before scheduling
2026-03-27 16:27:31 +13:00

470 lines
18 KiB
PL/PgSQL

-- TIP: Transceiver Intelligence Platform
-- Migration 002: Core Tables
-- ============================================================
-- VENDORS (Hersteller, Distributoren, Reseller, OEMs)
-- ============================================================
CREATE TABLE IF NOT EXISTS vendors (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL UNIQUE,
slug TEXT NOT NULL UNIQUE,
type TEXT NOT NULL CHECK (type IN ('manufacturer','distributor','oem','reseller','compatible')),
headquarters TEXT,
country TEXT,
website TEXT,
shop_url TEXT,
api_available BOOLEAN DEFAULT FALSE,
api_endpoint TEXT,
logo_r2_key TEXT,
founded_year INTEGER,
revenue_usd BIGINT,
employee_count INTEGER,
market_position TEXT,
specialties TEXT[] DEFAULT '{}',
scrape_config JSONB DEFAULT '{}',
last_scraped TIMESTAMPTZ,
is_competitor BOOLEAN DEFAULT FALSE,
is_factory BOOLEAN DEFAULT FALSE,
factory_locations TEXT[] DEFAULT '{}',
certifications TEXT[] DEFAULT '{}',
strengths TEXT[] DEFAULT '{}',
weaknesses TEXT[] DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- STANDARDS (IEEE, OIF, MSA)
-- ============================================================
CREATE TABLE IF NOT EXISTS standards (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL UNIQUE,
ieee_reference TEXT,
body TEXT CHECK (body IN ('IEEE','OIF','MSA','de_facto','proprietary')),
speed TEXT,
speed_gbps NUMERIC,
lanes INTEGER,
lane_rate TEXT,
lane_rate_gbps NUMERIC,
modulation TEXT,
fiber_type TEXT,
wavelength TEXT,
max_reach_meters INTEGER,
max_reach_label TEXT,
connector TEXT,
fec_required BOOLEAN DEFAULT FALSE,
form_factors TEXT[] DEFAULT '{}',
year_draft INTEGER,
year_ratified INTEGER,
year_revised INTEGER,
status TEXT DEFAULT 'ratified' CHECK (status IN ('draft','ratified','revised','superseded')),
superseded_by TEXT,
member_count INTEGER,
notes TEXT,
url TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- TRANSCEIVERS
-- ============================================================
CREATE TABLE IF NOT EXISTS transceivers (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
slug TEXT NOT NULL UNIQUE,
vendor_id UUID REFERENCES vendors(id),
part_number TEXT,
standard_name TEXT,
standard_id UUID REFERENCES standards(id),
ieee_reference TEXT,
form_factor TEXT NOT NULL,
speed TEXT NOT NULL,
speed_gbps NUMERIC NOT NULL,
lanes INTEGER,
lane_rate TEXT,
lane_rate_gbps NUMERIC,
modulation TEXT,
reach_meters INTEGER NOT NULL,
reach_label TEXT,
fiber_type TEXT,
wavelengths TEXT,
connector TEXT,
power_consumption_w NUMERIC,
temp_range TEXT DEFAULT 'COM' CHECK (temp_range IN ('COM','IND')),
category TEXT,
dom_support BOOLEAN DEFAULT TRUE,
digital_diagnostics TEXT,
-- CWDM/DWDM
wdm_type TEXT CHECK (wdm_type IN ('CWDM','DWDM',NULL)),
channel_count INTEGER,
channel_spacing_ghz NUMERIC,
tunable BOOLEAN DEFAULT FALSE,
itu_grid TEXT,
-- Coherent
coherent BOOLEAN DEFAULT FALSE,
baud_rate_gbaud NUMERIC,
fec_type TEXT,
dsp_vendor TEXT,
-- Lifecycle
year_introduced INTEGER,
year_mainstream INTEGER,
year_peak INTEGER,
year_decline INTEGER,
market_status TEXT DEFAULT 'Mainstream' CHECK (market_status IN ('Mainstream','Growth','Emerging','Legacy','EOL')),
hype_cycle_phase TEXT,
generation TEXT,
-- Pricing
price_tier TEXT CHECK (price_tier IN ('Budget','Standard','Premium')),
msrp_usd NUMERIC,
street_price_usd NUMERIC,
-- Technical
optical_budget_db NUMERIC,
tx_power_min_dbm NUMERIC,
tx_power_max_dbm NUMERIC,
rx_sensitivity_dbm NUMERIC,
-- Breakout
breakout_capable BOOLEAN DEFAULT FALSE,
breakout_to TEXT,
-- Storage
datasheet_r2_key TEXT,
image_r2_key TEXT,
-- Meta
use_case TEXT,
tags TEXT[] DEFAULT '{}',
vendor_compat JSONB DEFAULT '[]',
notes TEXT,
-- Search vector (auto-populated by trigger)
search_vector TSVECTOR,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- SWITCHES
-- ============================================================
CREATE TABLE IF NOT EXISTS switches (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
vendor_id UUID REFERENCES vendors(id),
model TEXT NOT NULL,
series TEXT,
category TEXT CHECK (category IN ('DataCenter','Campus','Edge','Core','SP','Industrial')),
layer TEXT CHECK (layer IN ('L2','L3','L2/L3')),
managed BOOLEAN DEFAULT TRUE,
-- Ports
ports_config JSONB DEFAULT '{}',
total_ports INTEGER,
uplink_speed_gbps NUMERIC,
max_speed_gbps NUMERIC,
-- Performance
switching_capacity_tbps NUMERIC,
forwarding_rate_mpps NUMERIC,
latency_ns NUMERIC,
buffer_mb NUMERIC,
-- ASIC
asic_vendor TEXT,
asic_model TEXT,
asic_generation TEXT,
-- Features
poe_support TEXT DEFAULT 'None',
stacking_support BOOLEAN DEFAULT FALSE,
vxlan_support BOOLEAN DEFAULT FALSE,
evpn_support BOOLEAN DEFAULT FALSE,
bgp_support BOOLEAN DEFAULT FALSE,
mpls_support BOOLEAN DEFAULT FALSE,
openconfig_support BOOLEAN DEFAULT FALSE,
sonic_compatible BOOLEAN DEFAULT FALSE,
macsec_support BOOLEAN DEFAULT FALSE,
-- Lifecycle
release_date DATE,
eos_date DATE,
eol_date DATE,
last_support_date DATE,
lifecycle_status TEXT DEFAULT 'Active' CHECK (lifecycle_status IN ('Active','EoS_Announced','EoL','Legacy')),
successor_model TEXT,
-- Physical
rack_units NUMERIC,
max_power_w NUMERIC,
typical_power_w NUMERIC,
weight_kg NUMERIC,
airflow TEXT,
-- Pricing
msrp_usd NUMERIC,
street_price_usd NUMERIC,
-- Documentation
manual_r2_key TEXT,
datasheet_r2_key TEXT,
config_guide_r2_key TEXT,
compatibility_list_url TEXT,
-- Meta
tags TEXT[] DEFAULT '{}',
search_vector TSVECTOR,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(vendor_id, model)
);
-- ============================================================
-- COMPATIBILITY (Switch <-> Transceiver)
-- ============================================================
CREATE TABLE IF NOT EXISTS compatibility (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
switch_id UUID REFERENCES switches(id) ON DELETE CASCADE,
transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
verified_by TEXT,
verification_date DATE,
verification_method TEXT CHECK (verification_method IN ('tested','vendor_matrix','datasheet','community')),
status TEXT DEFAULT 'compatible' CHECK (status IN ('compatible','incompatible','partial','unknown')),
notes TEXT,
firmware_min TEXT,
known_issues TEXT,
source_url TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(switch_id, transceiver_id)
);
-- ============================================================
-- BREAKOUTS
-- ============================================================
CREATE TABLE IF NOT EXISTS breakouts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
slug TEXT NOT NULL UNIQUE,
from_standard TEXT NOT NULL,
to_standard TEXT NOT NULL,
form_factor TEXT,
description TEXT,
cable_type TEXT CHECK (cable_type IN ('Passive','Active')),
max_length TEXT,
speed_per_lane TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- TEMPLATES (FlexBox Coding + Switch Config)
-- ============================================================
CREATE TABLE IF NOT EXISTS templates (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
type TEXT NOT NULL CHECK (type IN ('flexbox_coding','switch_config')),
name TEXT NOT NULL,
description TEXT,
switch_vendor TEXT,
switch_series TEXT,
transceiver_type TEXT,
speed_gbps NUMERIC,
technology TEXT,
template_content TEXT NOT NULL,
variables JSONB DEFAULT '{}',
tags TEXT[] DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- DOCUMENTS (PDFs in R2)
-- ============================================================
CREATE TABLE IF NOT EXISTS documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
entity_type TEXT NOT NULL CHECK (entity_type IN ('transceiver','switch','vendor','standard')),
entity_id UUID,
doc_type TEXT NOT NULL CHECK (doc_type IN ('manual','datasheet','config_guide','compatibility_list','faq','whitepaper')),
title TEXT,
filename TEXT,
r2_key TEXT NOT NULL,
source_url TEXT,
file_size_bytes BIGINT,
page_count INTEGER,
ocr_status TEXT DEFAULT 'pending' CHECK (ocr_status IN ('pending','processing','completed','failed')),
ocr_text TEXT,
language TEXT DEFAULT 'en',
content_hash TEXT,
last_checked TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- KNOWLEDGE BASE (FAQs, Troubleshooting)
-- ============================================================
CREATE TABLE IF NOT EXISTS knowledge_base (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
category TEXT NOT NULL CHECK (category IN ('troubleshooting','faq','best_practice','known_issue','compatibility_tip')),
subcategory TEXT,
question TEXT NOT NULL,
answer TEXT NOT NULL,
source_vendor TEXT,
source_url TEXT,
applies_to_form_factors TEXT[] DEFAULT '{}',
applies_to_speeds TEXT[] DEFAULT '{}',
applies_to_vendors TEXT[] DEFAULT '{}',
severity TEXT CHECK (severity IN ('critical','high','medium','low','info')),
resolution_steps JSONB,
last_verified TIMESTAMPTZ,
helpful_count INTEGER DEFAULT 0,
tags TEXT[] DEFAULT '{}',
search_vector TSVECTOR,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- FACTORIES
-- ============================================================
CREATE TABLE IF NOT EXISTS factories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
vendor_id UUID REFERENCES vendors(id),
name TEXT NOT NULL,
country TEXT NOT NULL,
city TEXT,
factory_type TEXT CHECK (factory_type IN ('manufacturing','assembly','r_and_d','headquarters')),
products TEXT[] DEFAULT '{}',
capacity_units_month INTEGER,
employee_count INTEGER,
certifications TEXT[] DEFAULT '{}',
expansion_planned BOOLEAN DEFAULT FALSE,
expansion_details TEXT,
source_url TEXT,
last_verified TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- NEWS ARTICLES
-- ============================================================
CREATE TABLE IF NOT EXISTS news_articles (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title TEXT NOT NULL,
source TEXT NOT NULL,
source_url TEXT NOT NULL UNIQUE,
published_at TIMESTAMPTZ,
author TEXT,
summary TEXT,
full_text TEXT,
category TEXT CHECK (category IN ('product_launch','market_report','standard','m_and_a','factory','event')),
event TEXT,
mentioned_vendors TEXT[] DEFAULT '{}',
mentioned_products TEXT[] DEFAULT '{}',
mentioned_standards TEXT[] DEFAULT '{}',
sentiment_score NUMERIC,
relevance_score NUMERIC,
content_hash TEXT,
tags TEXT[] DEFAULT '{}',
search_vector TSVECTOR,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- BLOG DRAFTS
-- ============================================================
CREATE TABLE IF NOT EXISTS blog_drafts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title TEXT NOT NULL,
topic TEXT CHECK (topic IN ('hype_cycle','price_trend','new_product','comparison','tutorial')),
target_audience TEXT CHECK (target_audience IN ('sales','technical','customer','seo')),
outline JSONB,
draft_content TEXT,
data_sources JSONB,
status TEXT DEFAULT 'draft' CHECK (status IN ('draft','review','approved','published')),
generated_by TEXT,
word_count INTEGER,
seo_keywords TEXT[] DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- ============================================================
-- TRIGGERS: Auto-update search_vector
-- ============================================================
-- Transceiver search vector
CREATE OR REPLACE FUNCTION transceivers_search_vector_update() RETURNS trigger AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('english', COALESCE(NEW.standard_name, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.form_factor, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.speed, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.use_case, '')), 'B') ||
setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'B') ||
setweight(to_tsvector('english', COALESCE(NEW.wavelengths, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(NEW.modulation, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(NEW.generation, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
NEW.updated_at := NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER transceivers_search_update
BEFORE INSERT OR UPDATE ON transceivers
FOR EACH ROW EXECUTE FUNCTION transceivers_search_vector_update();
-- Switch search vector
CREATE OR REPLACE FUNCTION switches_search_vector_update() RETURNS trigger AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('english', COALESCE(NEW.model, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.series, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'B') ||
setweight(to_tsvector('english', COALESCE(NEW.asic_vendor, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(NEW.asic_model, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
NEW.updated_at := NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER switches_search_update
BEFORE INSERT OR UPDATE ON switches
FOR EACH ROW EXECUTE FUNCTION switches_search_vector_update();
-- Knowledge base search vector
CREATE OR REPLACE FUNCTION kb_search_vector_update() RETURNS trigger AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('english', COALESCE(NEW.question, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.answer, '')), 'B') ||
setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
NEW.updated_at := NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER kb_search_update
BEFORE INSERT OR UPDATE ON knowledge_base
FOR EACH ROW EXECUTE FUNCTION kb_search_vector_update();
-- News search vector
CREATE OR REPLACE FUNCTION news_search_vector_update() RETURNS trigger AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.summary, '')), 'B') ||
setweight(to_tsvector('english', COALESCE(NEW.source, '')), 'C') ||
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER news_search_update
BEFORE INSERT OR UPDATE ON news_articles
FOR EACH ROW EXECUTE FUNCTION news_search_vector_update();