Phase 0 - Foundation: - Restructure into npm workspace monorepo (packages/core, api, scraper) - PostgreSQL 17 + TimescaleDB schema (15 tables incl. hypertables) - Docker Compose for local dev (PostgreSQL on 5433 + Qdrant) - Express 5 API on port 3200 with 6 routes - Seed script to migrate 159 transceivers + 42 standards from npm package - Erik server setup script + PM2 ecosystem config Phase 1 - Scraper Engine: - Crawlee + Playwright framework with pg-boss scheduler - FS.com scraper (PlaywrightCrawler, anti-bot workaround) - Optcore.net scraper (WP REST API enumeration + PlaywrightCrawler) - Uses /wp-json/wp/v2/product to get 2000+ product URLs - Playwright renders individual product pages for price extraction - Cisco TMG Matrix scraper (compatibility data) - News RSS aggregator (optics.org, SPIE, Network World, Nature Photonics) - Keyword relevance scoring for transceiver/fiber topics - xml2js with malformed XML sanitization - SHA-256 content hashing for change detection (skip unchanged records) - pg-boss v10 with explicit queue creation before scheduling
123 lines
4.2 KiB
SQL
123 lines
4.2 KiB
SQL
-- TIP: Transceiver Intelligence Platform
|
|
-- Migration 003: TimescaleDB Hypertables
|
|
|
|
-- ============================================================
|
|
-- PRICE OBSERVATIONS (Real-time competitor pricing)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS price_observations (
|
|
time TIMESTAMPTZ NOT NULL,
|
|
transceiver_id UUID NOT NULL,
|
|
source_vendor_id UUID NOT NULL,
|
|
price NUMERIC NOT NULL,
|
|
currency TEXT DEFAULT 'USD',
|
|
stock_level TEXT CHECK (stock_level IN ('in_stock','low_stock','out_of_stock','on_request','discontinued')),
|
|
quantity_available INTEGER,
|
|
lead_time_days INTEGER,
|
|
min_order_qty INTEGER,
|
|
url TEXT,
|
|
content_hash TEXT
|
|
);
|
|
|
|
SELECT create_hypertable('price_observations', by_range('time'),
|
|
if_not_exists => TRUE);
|
|
|
|
-- ============================================================
|
|
-- STOCK OBSERVATIONS (Separate stock tracking)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS stock_observations (
|
|
time TIMESTAMPTZ NOT NULL,
|
|
transceiver_id UUID NOT NULL,
|
|
source_vendor_id UUID NOT NULL,
|
|
in_stock BOOLEAN NOT NULL,
|
|
quantity_available INTEGER,
|
|
lead_time_days INTEGER,
|
|
content_hash TEXT
|
|
);
|
|
|
|
SELECT create_hypertable('stock_observations', by_range('time'),
|
|
if_not_exists => TRUE);
|
|
|
|
-- ============================================================
|
|
-- MARKET METRICS (Hype Cycle input data)
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS market_metrics (
|
|
time TIMESTAMPTZ NOT NULL,
|
|
technology TEXT NOT NULL,
|
|
metric_type TEXT NOT NULL CHECK (metric_type IN (
|
|
'vendor_count','shipment_share','asp_decline_rate',
|
|
'media_hype_index','patent_filings','port_shipments',
|
|
'revenue_usd','asp_usd'
|
|
)),
|
|
value NUMERIC NOT NULL,
|
|
source TEXT,
|
|
notes TEXT
|
|
);
|
|
|
|
SELECT create_hypertable('market_metrics', by_range('time'),
|
|
if_not_exists => TRUE);
|
|
|
|
-- ============================================================
|
|
-- CONTINUOUS AGGREGATES
|
|
-- ============================================================
|
|
|
|
-- Daily price aggregates
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS price_daily
|
|
WITH (timescaledb.continuous) AS
|
|
SELECT
|
|
time_bucket('1 day', time) AS bucket,
|
|
transceiver_id,
|
|
source_vendor_id,
|
|
AVG(price) AS avg_price,
|
|
MIN(price) AS min_price,
|
|
MAX(price) AS max_price,
|
|
last(stock_level, time) AS latest_stock,
|
|
COUNT(*) AS observation_count
|
|
FROM price_observations
|
|
GROUP BY bucket, transceiver_id, source_vendor_id
|
|
WITH NO DATA;
|
|
|
|
-- Weekly price aggregates
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS price_weekly
|
|
WITH (timescaledb.continuous) AS
|
|
SELECT
|
|
time_bucket('7 days', time) AS bucket,
|
|
transceiver_id,
|
|
source_vendor_id,
|
|
AVG(price) AS avg_price,
|
|
MIN(price) AS min_price,
|
|
MAX(price) AS max_price,
|
|
COUNT(*) AS observation_count
|
|
FROM price_observations
|
|
GROUP BY bucket, transceiver_id, source_vendor_id
|
|
WITH NO DATA;
|
|
|
|
-- ============================================================
|
|
-- RETENTION POLICIES
|
|
-- ============================================================
|
|
|
|
-- Raw price data: keep 90 days
|
|
SELECT add_retention_policy('price_observations', INTERVAL '90 days',
|
|
if_not_exists => TRUE);
|
|
|
|
-- Raw stock data: keep 90 days
|
|
SELECT add_retention_policy('stock_observations', INTERVAL '90 days',
|
|
if_not_exists => TRUE);
|
|
|
|
-- Market metrics: keep 10 years (small volume)
|
|
-- No retention policy needed
|
|
|
|
-- ============================================================
|
|
-- REFRESH POLICIES for continuous aggregates
|
|
-- ============================================================
|
|
SELECT add_continuous_aggregate_policy('price_daily',
|
|
start_offset => INTERVAL '3 days',
|
|
end_offset => INTERVAL '1 hour',
|
|
schedule_interval => INTERVAL '1 hour',
|
|
if_not_exists => TRUE);
|
|
|
|
SELECT add_continuous_aggregate_policy('price_weekly',
|
|
start_offset => INTERVAL '30 days',
|
|
end_offset => INTERVAL '7 days',
|
|
schedule_interval => INTERVAL '1 day',
|
|
if_not_exists => TRUE);
|