Phase 0 - Foundation: - Restructure into npm workspace monorepo (packages/core, api, scraper) - PostgreSQL 17 + TimescaleDB schema (15 tables incl. hypertables) - Docker Compose for local dev (PostgreSQL on 5433 + Qdrant) - Express 5 API on port 3200 with 6 routes - Seed script to migrate 159 transceivers + 42 standards from npm package - Erik server setup script + PM2 ecosystem config Phase 1 - Scraper Engine: - Crawlee + Playwright framework with pg-boss scheduler - FS.com scraper (PlaywrightCrawler, anti-bot workaround) - Optcore.net scraper (WP REST API enumeration + PlaywrightCrawler) - Uses /wp-json/wp/v2/product to get 2000+ product URLs - Playwright renders individual product pages for price extraction - Cisco TMG Matrix scraper (compatibility data) - News RSS aggregator (optics.org, SPIE, Network World, Nature Photonics) - Keyword relevance scoring for transceiver/fiber topics - xml2js with malformed XML sanitization - SHA-256 content hashing for change detection (skip unchanged records) - pg-boss v10 with explicit queue creation before scheduling
470 lines
18 KiB
PL/PgSQL
470 lines
18 KiB
PL/PgSQL
-- TIP: Transceiver Intelligence Platform
|
|
-- Migration 002: Core Tables
|
|
|
|
-- ============================================================
|
|
-- VENDORS (Hersteller, Distributoren, Reseller, OEMs)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS vendors (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name TEXT NOT NULL UNIQUE,
|
|
slug TEXT NOT NULL UNIQUE,
|
|
type TEXT NOT NULL CHECK (type IN ('manufacturer','distributor','oem','reseller','compatible')),
|
|
headquarters TEXT,
|
|
country TEXT,
|
|
website TEXT,
|
|
shop_url TEXT,
|
|
api_available BOOLEAN DEFAULT FALSE,
|
|
api_endpoint TEXT,
|
|
logo_r2_key TEXT,
|
|
founded_year INTEGER,
|
|
revenue_usd BIGINT,
|
|
employee_count INTEGER,
|
|
market_position TEXT,
|
|
specialties TEXT[] DEFAULT '{}',
|
|
scrape_config JSONB DEFAULT '{}',
|
|
last_scraped TIMESTAMPTZ,
|
|
is_competitor BOOLEAN DEFAULT FALSE,
|
|
is_factory BOOLEAN DEFAULT FALSE,
|
|
factory_locations TEXT[] DEFAULT '{}',
|
|
certifications TEXT[] DEFAULT '{}',
|
|
strengths TEXT[] DEFAULT '{}',
|
|
weaknesses TEXT[] DEFAULT '{}',
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- STANDARDS (IEEE, OIF, MSA)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS standards (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name TEXT NOT NULL UNIQUE,
|
|
ieee_reference TEXT,
|
|
body TEXT CHECK (body IN ('IEEE','OIF','MSA','de_facto','proprietary')),
|
|
speed TEXT,
|
|
speed_gbps NUMERIC,
|
|
lanes INTEGER,
|
|
lane_rate TEXT,
|
|
lane_rate_gbps NUMERIC,
|
|
modulation TEXT,
|
|
fiber_type TEXT,
|
|
wavelength TEXT,
|
|
max_reach_meters INTEGER,
|
|
max_reach_label TEXT,
|
|
connector TEXT,
|
|
fec_required BOOLEAN DEFAULT FALSE,
|
|
form_factors TEXT[] DEFAULT '{}',
|
|
year_draft INTEGER,
|
|
year_ratified INTEGER,
|
|
year_revised INTEGER,
|
|
status TEXT DEFAULT 'ratified' CHECK (status IN ('draft','ratified','revised','superseded')),
|
|
superseded_by TEXT,
|
|
member_count INTEGER,
|
|
notes TEXT,
|
|
url TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- TRANSCEIVERS
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS transceivers (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
slug TEXT NOT NULL UNIQUE,
|
|
vendor_id UUID REFERENCES vendors(id),
|
|
part_number TEXT,
|
|
standard_name TEXT,
|
|
standard_id UUID REFERENCES standards(id),
|
|
ieee_reference TEXT,
|
|
form_factor TEXT NOT NULL,
|
|
speed TEXT NOT NULL,
|
|
speed_gbps NUMERIC NOT NULL,
|
|
lanes INTEGER,
|
|
lane_rate TEXT,
|
|
lane_rate_gbps NUMERIC,
|
|
modulation TEXT,
|
|
reach_meters INTEGER NOT NULL,
|
|
reach_label TEXT,
|
|
fiber_type TEXT,
|
|
wavelengths TEXT,
|
|
connector TEXT,
|
|
power_consumption_w NUMERIC,
|
|
temp_range TEXT DEFAULT 'COM' CHECK (temp_range IN ('COM','IND')),
|
|
category TEXT,
|
|
dom_support BOOLEAN DEFAULT TRUE,
|
|
digital_diagnostics TEXT,
|
|
|
|
-- CWDM/DWDM
|
|
wdm_type TEXT CHECK (wdm_type IN ('CWDM','DWDM',NULL)),
|
|
channel_count INTEGER,
|
|
channel_spacing_ghz NUMERIC,
|
|
tunable BOOLEAN DEFAULT FALSE,
|
|
itu_grid TEXT,
|
|
|
|
-- Coherent
|
|
coherent BOOLEAN DEFAULT FALSE,
|
|
baud_rate_gbaud NUMERIC,
|
|
fec_type TEXT,
|
|
dsp_vendor TEXT,
|
|
|
|
-- Lifecycle
|
|
year_introduced INTEGER,
|
|
year_mainstream INTEGER,
|
|
year_peak INTEGER,
|
|
year_decline INTEGER,
|
|
market_status TEXT DEFAULT 'Mainstream' CHECK (market_status IN ('Mainstream','Growth','Emerging','Legacy','EOL')),
|
|
hype_cycle_phase TEXT,
|
|
generation TEXT,
|
|
|
|
-- Pricing
|
|
price_tier TEXT CHECK (price_tier IN ('Budget','Standard','Premium')),
|
|
msrp_usd NUMERIC,
|
|
street_price_usd NUMERIC,
|
|
|
|
-- Technical
|
|
optical_budget_db NUMERIC,
|
|
tx_power_min_dbm NUMERIC,
|
|
tx_power_max_dbm NUMERIC,
|
|
rx_sensitivity_dbm NUMERIC,
|
|
|
|
-- Breakout
|
|
breakout_capable BOOLEAN DEFAULT FALSE,
|
|
breakout_to TEXT,
|
|
|
|
-- Storage
|
|
datasheet_r2_key TEXT,
|
|
image_r2_key TEXT,
|
|
|
|
-- Meta
|
|
use_case TEXT,
|
|
tags TEXT[] DEFAULT '{}',
|
|
vendor_compat JSONB DEFAULT '[]',
|
|
notes TEXT,
|
|
|
|
-- Search vector (auto-populated by trigger)
|
|
search_vector TSVECTOR,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- SWITCHES
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS switches (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
vendor_id UUID REFERENCES vendors(id),
|
|
model TEXT NOT NULL,
|
|
series TEXT,
|
|
category TEXT CHECK (category IN ('DataCenter','Campus','Edge','Core','SP','Industrial')),
|
|
layer TEXT CHECK (layer IN ('L2','L3','L2/L3')),
|
|
managed BOOLEAN DEFAULT TRUE,
|
|
|
|
-- Ports
|
|
ports_config JSONB DEFAULT '{}',
|
|
total_ports INTEGER,
|
|
uplink_speed_gbps NUMERIC,
|
|
max_speed_gbps NUMERIC,
|
|
|
|
-- Performance
|
|
switching_capacity_tbps NUMERIC,
|
|
forwarding_rate_mpps NUMERIC,
|
|
latency_ns NUMERIC,
|
|
buffer_mb NUMERIC,
|
|
|
|
-- ASIC
|
|
asic_vendor TEXT,
|
|
asic_model TEXT,
|
|
asic_generation TEXT,
|
|
|
|
-- Features
|
|
poe_support TEXT DEFAULT 'None',
|
|
stacking_support BOOLEAN DEFAULT FALSE,
|
|
vxlan_support BOOLEAN DEFAULT FALSE,
|
|
evpn_support BOOLEAN DEFAULT FALSE,
|
|
bgp_support BOOLEAN DEFAULT FALSE,
|
|
mpls_support BOOLEAN DEFAULT FALSE,
|
|
openconfig_support BOOLEAN DEFAULT FALSE,
|
|
sonic_compatible BOOLEAN DEFAULT FALSE,
|
|
macsec_support BOOLEAN DEFAULT FALSE,
|
|
|
|
-- Lifecycle
|
|
release_date DATE,
|
|
eos_date DATE,
|
|
eol_date DATE,
|
|
last_support_date DATE,
|
|
lifecycle_status TEXT DEFAULT 'Active' CHECK (lifecycle_status IN ('Active','EoS_Announced','EoL','Legacy')),
|
|
successor_model TEXT,
|
|
|
|
-- Physical
|
|
rack_units NUMERIC,
|
|
max_power_w NUMERIC,
|
|
typical_power_w NUMERIC,
|
|
weight_kg NUMERIC,
|
|
airflow TEXT,
|
|
|
|
-- Pricing
|
|
msrp_usd NUMERIC,
|
|
street_price_usd NUMERIC,
|
|
|
|
-- Documentation
|
|
manual_r2_key TEXT,
|
|
datasheet_r2_key TEXT,
|
|
config_guide_r2_key TEXT,
|
|
compatibility_list_url TEXT,
|
|
|
|
-- Meta
|
|
tags TEXT[] DEFAULT '{}',
|
|
search_vector TSVECTOR,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
|
|
UNIQUE(vendor_id, model)
|
|
);
|
|
|
|
-- ============================================================
|
|
-- COMPATIBILITY (Switch <-> Transceiver)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS compatibility (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
switch_id UUID REFERENCES switches(id) ON DELETE CASCADE,
|
|
transceiver_id UUID REFERENCES transceivers(id) ON DELETE CASCADE,
|
|
verified_by TEXT,
|
|
verification_date DATE,
|
|
verification_method TEXT CHECK (verification_method IN ('tested','vendor_matrix','datasheet','community')),
|
|
status TEXT DEFAULT 'compatible' CHECK (status IN ('compatible','incompatible','partial','unknown')),
|
|
notes TEXT,
|
|
firmware_min TEXT,
|
|
known_issues TEXT,
|
|
source_url TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
UNIQUE(switch_id, transceiver_id)
|
|
);
|
|
|
|
-- ============================================================
|
|
-- BREAKOUTS
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS breakouts (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
slug TEXT NOT NULL UNIQUE,
|
|
from_standard TEXT NOT NULL,
|
|
to_standard TEXT NOT NULL,
|
|
form_factor TEXT,
|
|
description TEXT,
|
|
cable_type TEXT CHECK (cable_type IN ('Passive','Active')),
|
|
max_length TEXT,
|
|
speed_per_lane TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- TEMPLATES (FlexBox Coding + Switch Config)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS templates (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
type TEXT NOT NULL CHECK (type IN ('flexbox_coding','switch_config')),
|
|
name TEXT NOT NULL,
|
|
description TEXT,
|
|
switch_vendor TEXT,
|
|
switch_series TEXT,
|
|
transceiver_type TEXT,
|
|
speed_gbps NUMERIC,
|
|
technology TEXT,
|
|
template_content TEXT NOT NULL,
|
|
variables JSONB DEFAULT '{}',
|
|
tags TEXT[] DEFAULT '{}',
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- DOCUMENTS (PDFs in R2)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS documents (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
entity_type TEXT NOT NULL CHECK (entity_type IN ('transceiver','switch','vendor','standard')),
|
|
entity_id UUID,
|
|
doc_type TEXT NOT NULL CHECK (doc_type IN ('manual','datasheet','config_guide','compatibility_list','faq','whitepaper')),
|
|
title TEXT,
|
|
filename TEXT,
|
|
r2_key TEXT NOT NULL,
|
|
source_url TEXT,
|
|
file_size_bytes BIGINT,
|
|
page_count INTEGER,
|
|
ocr_status TEXT DEFAULT 'pending' CHECK (ocr_status IN ('pending','processing','completed','failed')),
|
|
ocr_text TEXT,
|
|
language TEXT DEFAULT 'en',
|
|
content_hash TEXT,
|
|
last_checked TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- KNOWLEDGE BASE (FAQs, Troubleshooting)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS knowledge_base (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
category TEXT NOT NULL CHECK (category IN ('troubleshooting','faq','best_practice','known_issue','compatibility_tip')),
|
|
subcategory TEXT,
|
|
question TEXT NOT NULL,
|
|
answer TEXT NOT NULL,
|
|
source_vendor TEXT,
|
|
source_url TEXT,
|
|
applies_to_form_factors TEXT[] DEFAULT '{}',
|
|
applies_to_speeds TEXT[] DEFAULT '{}',
|
|
applies_to_vendors TEXT[] DEFAULT '{}',
|
|
severity TEXT CHECK (severity IN ('critical','high','medium','low','info')),
|
|
resolution_steps JSONB,
|
|
last_verified TIMESTAMPTZ,
|
|
helpful_count INTEGER DEFAULT 0,
|
|
tags TEXT[] DEFAULT '{}',
|
|
search_vector TSVECTOR,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- FACTORIES
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS factories (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
vendor_id UUID REFERENCES vendors(id),
|
|
name TEXT NOT NULL,
|
|
country TEXT NOT NULL,
|
|
city TEXT,
|
|
factory_type TEXT CHECK (factory_type IN ('manufacturing','assembly','r_and_d','headquarters')),
|
|
products TEXT[] DEFAULT '{}',
|
|
capacity_units_month INTEGER,
|
|
employee_count INTEGER,
|
|
certifications TEXT[] DEFAULT '{}',
|
|
expansion_planned BOOLEAN DEFAULT FALSE,
|
|
expansion_details TEXT,
|
|
source_url TEXT,
|
|
last_verified TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- NEWS ARTICLES
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS news_articles (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
title TEXT NOT NULL,
|
|
source TEXT NOT NULL,
|
|
source_url TEXT NOT NULL UNIQUE,
|
|
published_at TIMESTAMPTZ,
|
|
author TEXT,
|
|
summary TEXT,
|
|
full_text TEXT,
|
|
category TEXT CHECK (category IN ('product_launch','market_report','standard','m_and_a','factory','event')),
|
|
event TEXT,
|
|
mentioned_vendors TEXT[] DEFAULT '{}',
|
|
mentioned_products TEXT[] DEFAULT '{}',
|
|
mentioned_standards TEXT[] DEFAULT '{}',
|
|
sentiment_score NUMERIC,
|
|
relevance_score NUMERIC,
|
|
content_hash TEXT,
|
|
tags TEXT[] DEFAULT '{}',
|
|
search_vector TSVECTOR,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- BLOG DRAFTS
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS blog_drafts (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
title TEXT NOT NULL,
|
|
topic TEXT CHECK (topic IN ('hype_cycle','price_trend','new_product','comparison','tutorial')),
|
|
target_audience TEXT CHECK (target_audience IN ('sales','technical','customer','seo')),
|
|
outline JSONB,
|
|
draft_content TEXT,
|
|
data_sources JSONB,
|
|
status TEXT DEFAULT 'draft' CHECK (status IN ('draft','review','approved','published')),
|
|
generated_by TEXT,
|
|
word_count INTEGER,
|
|
seo_keywords TEXT[] DEFAULT '{}',
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- ============================================================
|
|
-- TRIGGERS: Auto-update search_vector
|
|
-- ============================================================
|
|
|
|
-- Transceiver search vector
|
|
CREATE OR REPLACE FUNCTION transceivers_search_vector_update() RETURNS trigger AS $$
|
|
BEGIN
|
|
NEW.search_vector :=
|
|
setweight(to_tsvector('english', COALESCE(NEW.standard_name, '')), 'A') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.form_factor, '')), 'A') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.speed, '')), 'A') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.use_case, '')), 'B') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'B') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.wavelengths, '')), 'C') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.modulation, '')), 'C') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.generation, '')), 'C') ||
|
|
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
|
|
NEW.updated_at := NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER transceivers_search_update
|
|
BEFORE INSERT OR UPDATE ON transceivers
|
|
FOR EACH ROW EXECUTE FUNCTION transceivers_search_vector_update();
|
|
|
|
-- Switch search vector
|
|
CREATE OR REPLACE FUNCTION switches_search_vector_update() RETURNS trigger AS $$
|
|
BEGIN
|
|
NEW.search_vector :=
|
|
setweight(to_tsvector('english', COALESCE(NEW.model, '')), 'A') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.series, '')), 'A') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'B') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.asic_vendor, '')), 'C') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.asic_model, '')), 'C') ||
|
|
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
|
|
NEW.updated_at := NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER switches_search_update
|
|
BEFORE INSERT OR UPDATE ON switches
|
|
FOR EACH ROW EXECUTE FUNCTION switches_search_vector_update();
|
|
|
|
-- Knowledge base search vector
|
|
CREATE OR REPLACE FUNCTION kb_search_vector_update() RETURNS trigger AS $$
|
|
BEGIN
|
|
NEW.search_vector :=
|
|
setweight(to_tsvector('english', COALESCE(NEW.question, '')), 'A') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.answer, '')), 'B') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.category, '')), 'C') ||
|
|
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
|
|
NEW.updated_at := NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER kb_search_update
|
|
BEFORE INSERT OR UPDATE ON knowledge_base
|
|
FOR EACH ROW EXECUTE FUNCTION kb_search_vector_update();
|
|
|
|
-- News search vector
|
|
CREATE OR REPLACE FUNCTION news_search_vector_update() RETURNS trigger AS $$
|
|
BEGIN
|
|
NEW.search_vector :=
|
|
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.summary, '')), 'B') ||
|
|
setweight(to_tsvector('english', COALESCE(NEW.source, '')), 'C') ||
|
|
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'D');
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER news_search_update
|
|
BEFORE INSERT OR UPDATE ON news_articles
|
|
FOR EACH ROW EXECUTE FUNCTION news_search_vector_update();
|