transceiver-db/sql/022-prediction-signals.sql
Rene Fichtmueller 370c1d8801 feat: 6 prediction signal scrapers + forecast engine
New scrapers (all registered in pg-boss, 50 total jobs):
  - sec-edgar.ts       : SEC EDGAR XBRL API — hyperscaler CapEx from 10-Q/10-K
  - github-signals.ts  : GitHub Search/Stats API — tech adoption metrics weekly
  - ebay-velocity.ts   : eBay completed listings — sold count + price distribution
  - ai-clusters.ts     : RSS feeds (6 sources) — AI cluster & DC announcements
  - distributor-leads.ts : Mouser, Digi-Key, RS Components — lead time + stock
  - standards-tracker.ts : IEEE 802.3, OIF, IETF — draft/ballot/published status

New utilities:
  - forecast-engine.ts : Weighted signal aggregator → demand_index + price_direction
    6 signal types, 4 horizons (3/9/12/18 months), 5 technologies tracked

New DB tables (migration 022):
  hyperscaler_capex, distributor_lead_times, github_tech_signals,
  marketplace_velocity, ai_cluster_announcements, standards_activity,
  forecast_signals

Schedules:
  - EDGAR: weekly Mon 06:00
  - GitHub: weekly Sun 05:00
  - eBay velocity: every 12h
  - AI clusters: every 4h (news-speed)
  - Distributor leads: daily 03:30
  - Standards: weekly Wed 04:00
  - Forecast engine: daily 08:00 (after all nightly scrapers)
2026-04-02 02:02:44 +02:00

132 lines
7.1 KiB
SQL

-- ─────────────────────────────────────────────────────────────────────────────
-- 022 — Prediction Signals Data Model
-- New tables for demand/supply/adoption signals that feed the forecast engine
-- ─────────────────────────────────────────────────────────────────────────────
-- Hyperscaler quarterly CapEx (from SEC EDGAR XBRL)
CREATE TABLE IF NOT EXISTS hyperscaler_capex (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
company TEXT NOT NULL, -- 'amazon', 'microsoft', 'google', 'meta'
period_label TEXT NOT NULL, -- 'Q1 2025'
period_end DATE NOT NULL,
capex_usd_millions NUMERIC(12,1), -- total CapEx in USD millions
dc_capex_est_millions NUMERIC(12,1), -- estimated DC/cloud share
yoy_growth_pct NUMERIC(7,2), -- YoY growth %
source_url TEXT,
filing_type TEXT, -- '10-Q', '10-K'
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(company, period_end)
);
-- Distributor lead times and stock levels
CREATE TABLE IF NOT EXISTS distributor_lead_times (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
distributor TEXT NOT NULL, -- 'mouser', 'digikey', 'rs'
form_factor TEXT, -- 'QSFP28', 'QSFP-DD'
speed_label TEXT, -- '100G', '400G'
part_number TEXT,
product_name TEXT,
in_stock BOOLEAN,
stock_qty INTEGER,
lead_time_weeks INTEGER,
price_usd NUMERIC(10,2),
product_url TEXT,
scraped_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_dist_leads_form_factor ON distributor_lead_times(form_factor, scraped_at DESC);
-- GitHub technology adoption signals (weekly snapshots)
CREATE TABLE IF NOT EXISTS github_tech_signals (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
technology TEXT NOT NULL, -- '400G', '800G', 'ZR', 'CPO', 'silicon-photonics', 'CMIS'
metric TEXT NOT NULL, -- 'repo_count', 'commit_count', 'issue_count', 'stars_total'
repo_context TEXT, -- 'sonic-net/SONiC', 'openconfig', 'all'
value INTEGER NOT NULL,
week_start DATE NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(technology, metric, repo_context, week_start)
);
-- eBay marketplace velocity (sold listings count + average price)
CREATE TABLE IF NOT EXISTS marketplace_velocity (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
marketplace TEXT NOT NULL, -- 'ebay', 'aliexpress'
keyword TEXT NOT NULL,
form_factor TEXT,
speed_label TEXT,
sold_count_30d INTEGER, -- listings sold in last 30 days
active_listings INTEGER, -- current active listings
avg_sold_price NUMERIC(10,2),
min_price NUMERIC(10,2),
max_price NUMERIC(10,2),
currency TEXT DEFAULT 'USD',
scraped_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_velocity_keyword ON marketplace_velocity(keyword, scraped_at DESC);
-- AI cluster / hyperscale DC announcements
CREATE TABLE IF NOT EXISTS ai_cluster_announcements (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
company TEXT NOT NULL,
title TEXT NOT NULL,
summary TEXT,
announced_date DATE,
scale_mw NUMERIC(10,1), -- announced power in MW
scale_servers INTEGER,
network_speed TEXT, -- '400G', '800G', 'IB-NDR'
estimated_transceivers INTEGER, -- rough estimate
deployment_date DATE, -- expected go-live
location TEXT,
source_url TEXT NOT NULL,
source_name TEXT,
content_hash TEXT UNIQUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_ai_clusters_date ON ai_cluster_announcements(announced_date DESC);
-- Network standards status tracker
CREATE TABLE IF NOT EXISTS standards_activity (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
standard_body TEXT NOT NULL, -- 'ieee', 'ietf', 'oif', 'msa', 'snia'
standard_name TEXT NOT NULL, -- '802.3df', 'CMIS 5.2', 'OIF-400ZR'
technology TEXT, -- '400G', '800G', 'ZR', 'CPO'
current_status TEXT, -- 'in-progress', 'ballot', 'approved', 'published'
draft_version TEXT,
ballot_date DATE,
approval_date DATE,
source_url TEXT,
notes TEXT,
last_checked TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(standard_body, standard_name)
);
-- Aggregated forecast signals (computed by forecast engine)
CREATE TABLE IF NOT EXISTS forecast_signals (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
technology TEXT NOT NULL, -- '100G-QSFP28', '400G-QSFP-DD', '800G-OSFP'
horizon_months INTEGER NOT NULL, -- 3, 9, 12, 18
demand_index NUMERIC(5,2), -- 0-100 composite demand score
price_direction TEXT, -- 'rising', 'falling', 'stable'
price_delta_pct NUMERIC(7,2), -- estimated % price change
confidence NUMERIC(3,2), -- 0.0-1.0
signal_count INTEGER, -- how many signals fed this forecast
signal_breakdown JSONB, -- breakdown by signal type
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
);
CREATE INDEX IF NOT EXISTS idx_forecast_tech ON forecast_signals(technology, computed_at DESC);
CREATE UNIQUE INDEX IF NOT EXISTS idx_forecast_unique ON forecast_signals(technology, horizon_months, date_trunc('day', computed_at));
-- Seed known standards status
INSERT INTO standards_activity (standard_body, standard_name, technology, current_status, source_url, notes) VALUES
('ieee', '802.3bs-400GbE', '400G', 'published', 'https://standards.ieee.org/ieee/802.3bs/5950/', 'Published 2017, baseline 400G standard'),
('ieee', '802.3cd-100G/200G/400G', '400G', 'published', 'https://standards.ieee.org/ieee/802.3cd/6635/', 'Published 2018'),
('ieee', '802.3df-800GbE', '800G', 'in-progress', 'https://www.ieee802.org/3/df/', 'Draft in progress, expected 2025'),
('ieee', '802.3dk-1.6T', '1.6T', 'in-progress', 'https://www.ieee802.org/3/dk/', 'Very early stage'),
('oif', 'OIF-400ZR', '400G-ZR', 'published', 'https://www.oiforum.com/technical-work/hot-topics/400zr-2/', 'Published 2020'),
('oif', 'OIF-CMIS-5.2', 'CMIS', 'published', 'https://www.oiforum.com/technical-work/hot-topics/cmis/', 'CMIS 5.2 published 2023'),
('oif', 'OIF-800ZR', '800G-ZR', 'in-progress', 'https://www.oiforum.com/technical-work/hot-topics/', 'In development'),
('msa', '400G-FR4', '400G', 'published', 'https://www.400g.com/', '400G FR4 MSA complete'),
('msa', '800G-XDR8', '800G', 'in-progress', 'https://www.800g.info/', 'Specification in progress')
ON CONFLICT (standard_body, standard_name) DO NOTHING;