Phase 0 - Foundation: - Restructure into npm workspace monorepo (packages/core, api, scraper) - PostgreSQL 17 + TimescaleDB schema (15 tables incl. hypertables) - Docker Compose for local dev (PostgreSQL on 5433 + Qdrant) - Express 5 API on port 3200 with 6 routes - Seed script to migrate 159 transceivers + 42 standards from npm package - Erik server setup script + PM2 ecosystem config Phase 1 - Scraper Engine: - Crawlee + Playwright framework with pg-boss scheduler - FS.com scraper (PlaywrightCrawler, anti-bot workaround) - Optcore.net scraper (WP REST API enumeration + PlaywrightCrawler) - Uses /wp-json/wp/v2/product to get 2000+ product URLs - Playwright renders individual product pages for price extraction - Cisco TMG Matrix scraper (compatibility data) - News RSS aggregator (optics.org, SPIE, Network World, Nature Photonics) - Keyword relevance scoring for transceiver/fiber topics - xml2js with malformed XML sanitization - SHA-256 content hashing for change detection (skip unchanged records) - pg-boss v10 with explicit queue creation before scheduling
76 lines
4.6 KiB
SQL
76 lines
4.6 KiB
SQL
-- TIP: Transceiver Intelligence Platform
|
|
-- Migration 004: Indexes
|
|
|
|
-- ============================================================
|
|
-- FULL-TEXT SEARCH INDEXES (GIN on tsvector)
|
|
-- ============================================================
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_search ON transceivers USING GIN(search_vector);
|
|
CREATE INDEX IF NOT EXISTS idx_switches_search ON switches USING GIN(search_vector);
|
|
CREATE INDEX IF NOT EXISTS idx_kb_search ON knowledge_base USING GIN(search_vector);
|
|
CREATE INDEX IF NOT EXISTS idx_news_search ON news_articles USING GIN(search_vector);
|
|
|
|
-- ============================================================
|
|
-- ARRAY INDEXES (GIN on text[])
|
|
-- ============================================================
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_tags ON transceivers USING GIN(tags);
|
|
CREATE INDEX IF NOT EXISTS idx_switches_tags ON switches USING GIN(tags);
|
|
CREATE INDEX IF NOT EXISTS idx_vendors_specialties ON vendors USING GIN(specialties);
|
|
CREATE INDEX IF NOT EXISTS idx_standards_form_factors ON standards USING GIN(form_factors);
|
|
CREATE INDEX IF NOT EXISTS idx_kb_form_factors ON knowledge_base USING GIN(applies_to_form_factors);
|
|
CREATE INDEX IF NOT EXISTS idx_kb_speeds ON knowledge_base USING GIN(applies_to_speeds);
|
|
CREATE INDEX IF NOT EXISTS idx_news_vendors ON news_articles USING GIN(mentioned_vendors);
|
|
|
|
-- ============================================================
|
|
-- JSONB INDEXES
|
|
-- ============================================================
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_vendor_compat ON transceivers USING GIN(vendor_compat);
|
|
CREATE INDEX IF NOT EXISTS idx_switches_ports ON switches USING GIN(ports_config);
|
|
|
|
-- ============================================================
|
|
-- B-TREE INDEXES (lookups, sorting)
|
|
-- ============================================================
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_form_factor ON transceivers(form_factor);
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_speed_gbps ON transceivers(speed_gbps);
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_category ON transceivers(category);
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_market_status ON transceivers(market_status);
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_reach ON transceivers(reach_meters);
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_fiber_type ON transceivers(fiber_type);
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_wdm_type ON transceivers(wdm_type);
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_coherent ON transceivers(coherent);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_switches_vendor ON switches(vendor_id);
|
|
CREATE INDEX IF NOT EXISTS idx_switches_category ON switches(category);
|
|
CREATE INDEX IF NOT EXISTS idx_switches_lifecycle ON switches(lifecycle_status);
|
|
CREATE INDEX IF NOT EXISTS idx_switches_max_speed ON switches(max_speed_gbps);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_compatibility_switch ON compatibility(switch_id);
|
|
CREATE INDEX IF NOT EXISTS idx_compatibility_transceiver ON compatibility(transceiver_id);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_documents_entity ON documents(entity_type, entity_id);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_ocr_status ON documents(ocr_status);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_kb_category ON knowledge_base(category);
|
|
CREATE INDEX IF NOT EXISTS idx_kb_severity ON knowledge_base(severity);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_news_published ON news_articles(published_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_news_category ON news_articles(category);
|
|
CREATE INDEX IF NOT EXISTS idx_news_event ON news_articles(event);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_vendors_type ON vendors(type);
|
|
CREATE INDEX IF NOT EXISTS idx_vendors_competitor ON vendors(is_competitor) WHERE is_competitor = TRUE;
|
|
|
|
-- ============================================================
|
|
-- TRIGRAM INDEXES (fuzzy search on names)
|
|
-- ============================================================
|
|
CREATE INDEX IF NOT EXISTS idx_vendors_name_trgm ON vendors USING GIN(name gin_trgm_ops);
|
|
CREATE INDEX IF NOT EXISTS idx_switches_model_trgm ON switches USING GIN(model gin_trgm_ops);
|
|
CREATE INDEX IF NOT EXISTS idx_standards_name_trgm ON standards USING GIN(name gin_trgm_ops);
|
|
|
|
-- ============================================================
|
|
-- TIMESERIES INDEXES
|
|
-- ============================================================
|
|
CREATE INDEX IF NOT EXISTS idx_price_transceiver ON price_observations(transceiver_id, time DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_price_source ON price_observations(source_vendor_id, time DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_stock_transceiver ON stock_observations(transceiver_id, time DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_metrics_technology ON market_metrics(technology, metric_type, time DESC);
|