transceiver-db/sql/003-timeseries.sql
Rene Fichtmueller b43bdd3060 feat: TIP Phase 0+1 — monorepo, DB schema, API, scraper engine
Phase 0 - Foundation:
- Restructure into npm workspace monorepo (packages/core, api, scraper)
- PostgreSQL 17 + TimescaleDB schema (15 tables incl. hypertables)
- Docker Compose for local dev (PostgreSQL on 5433 + Qdrant)
- Express 5 API on port 3200 with 6 routes
- Seed script to migrate 159 transceivers + 42 standards from npm package
- Erik server setup script + PM2 ecosystem config

Phase 1 - Scraper Engine:
- Crawlee + Playwright framework with pg-boss scheduler
- FS.com scraper (PlaywrightCrawler, anti-bot workaround)
- Optcore.net scraper (WP REST API enumeration + PlaywrightCrawler)
  - Uses /wp-json/wp/v2/product to get 2000+ product URLs
  - Playwright renders individual product pages for price extraction
- Cisco TMG Matrix scraper (compatibility data)
- News RSS aggregator (optics.org, SPIE, Network World, Nature Photonics)
  - Keyword relevance scoring for transceiver/fiber topics
  - xml2js with malformed XML sanitization
- SHA-256 content hashing for change detection (skip unchanged records)
- pg-boss v10 with explicit queue creation before scheduling
2026-03-27 16:27:31 +13:00

123 lines
4.2 KiB
SQL

-- TIP: Transceiver Intelligence Platform
-- Migration 003: TimescaleDB Hypertables
-- ============================================================
-- PRICE OBSERVATIONS (Real-time competitor pricing)
-- ============================================================
CREATE TABLE IF NOT EXISTS price_observations (
time TIMESTAMPTZ NOT NULL,
transceiver_id UUID NOT NULL,
source_vendor_id UUID NOT NULL,
price NUMERIC NOT NULL,
currency TEXT DEFAULT 'USD',
stock_level TEXT CHECK (stock_level IN ('in_stock','low_stock','out_of_stock','on_request','discontinued')),
quantity_available INTEGER,
lead_time_days INTEGER,
min_order_qty INTEGER,
url TEXT,
content_hash TEXT
);
SELECT create_hypertable('price_observations', by_range('time'),
if_not_exists => TRUE);
-- ============================================================
-- STOCK OBSERVATIONS (Separate stock tracking)
-- ============================================================
CREATE TABLE IF NOT EXISTS stock_observations (
time TIMESTAMPTZ NOT NULL,
transceiver_id UUID NOT NULL,
source_vendor_id UUID NOT NULL,
in_stock BOOLEAN NOT NULL,
quantity_available INTEGER,
lead_time_days INTEGER,
content_hash TEXT
);
SELECT create_hypertable('stock_observations', by_range('time'),
if_not_exists => TRUE);
-- ============================================================
-- MARKET METRICS (Hype Cycle input data)
-- ============================================================
CREATE TABLE IF NOT EXISTS market_metrics (
time TIMESTAMPTZ NOT NULL,
technology TEXT NOT NULL,
metric_type TEXT NOT NULL CHECK (metric_type IN (
'vendor_count','shipment_share','asp_decline_rate',
'media_hype_index','patent_filings','port_shipments',
'revenue_usd','asp_usd'
)),
value NUMERIC NOT NULL,
source TEXT,
notes TEXT
);
SELECT create_hypertable('market_metrics', by_range('time'),
if_not_exists => TRUE);
-- ============================================================
-- CONTINUOUS AGGREGATES
-- ============================================================
-- Daily price aggregates
CREATE MATERIALIZED VIEW IF NOT EXISTS price_daily
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 day', time) AS bucket,
transceiver_id,
source_vendor_id,
AVG(price) AS avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price,
last(stock_level, time) AS latest_stock,
COUNT(*) AS observation_count
FROM price_observations
GROUP BY bucket, transceiver_id, source_vendor_id
WITH NO DATA;
-- Weekly price aggregates
CREATE MATERIALIZED VIEW IF NOT EXISTS price_weekly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('7 days', time) AS bucket,
transceiver_id,
source_vendor_id,
AVG(price) AS avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price,
COUNT(*) AS observation_count
FROM price_observations
GROUP BY bucket, transceiver_id, source_vendor_id
WITH NO DATA;
-- ============================================================
-- RETENTION POLICIES
-- ============================================================
-- Raw price data: keep 90 days
SELECT add_retention_policy('price_observations', INTERVAL '90 days',
if_not_exists => TRUE);
-- Raw stock data: keep 90 days
SELECT add_retention_policy('stock_observations', INTERVAL '90 days',
if_not_exists => TRUE);
-- Market metrics: keep 10 years (small volume)
-- No retention policy needed
-- ============================================================
-- REFRESH POLICIES for continuous aggregates
-- ============================================================
SELECT add_continuous_aggregate_policy('price_daily',
start_offset => INTERVAL '3 days',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour',
if_not_exists => TRUE);
SELECT add_continuous_aggregate_policy('price_weekly',
start_offset => INTERVAL '30 days',
end_offset => INTERVAL '7 days',
schedule_interval => INTERVAL '1 day',
if_not_exists => TRUE);