New scrapers (all registered in pg-boss, 50 total jobs):
- sec-edgar.ts : SEC EDGAR XBRL API — hyperscaler CapEx from 10-Q/10-K
- github-signals.ts : GitHub Search/Stats API — tech adoption metrics weekly
- ebay-velocity.ts : eBay completed listings — sold count + price distribution
- ai-clusters.ts : RSS feeds (6 sources) — AI cluster & DC announcements
- distributor-leads.ts : Mouser, Digi-Key, RS Components — lead time + stock
- standards-tracker.ts : IEEE 802.3, OIF, IETF — draft/ballot/published status
New utilities:
- forecast-engine.ts : Weighted signal aggregator → demand_index + price_direction
6 signal types, 4 horizons (3/9/12/18 months), 5 technologies tracked
New DB tables (migration 022):
hyperscaler_capex, distributor_lead_times, github_tech_signals,
marketplace_velocity, ai_cluster_announcements, standards_activity,
forecast_signals
Schedules:
- EDGAR: weekly Mon 06:00
- GitHub: weekly Sun 05:00
- eBay velocity: every 12h
- AI clusters: every 4h (news-speed)
- Distributor leads: daily 03:30
- Standards: weekly Wed 04:00
- Forecast engine: daily 08:00 (after all nightly scrapers)
132 lines
7.1 KiB
SQL
132 lines
7.1 KiB
SQL
-- ─────────────────────────────────────────────────────────────────────────────
|
|
-- 022 — Prediction Signals Data Model
|
|
-- New tables for demand/supply/adoption signals that feed the forecast engine
|
|
-- ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
-- Hyperscaler quarterly CapEx (from SEC EDGAR XBRL)
|
|
CREATE TABLE IF NOT EXISTS hyperscaler_capex (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
company TEXT NOT NULL, -- 'amazon', 'microsoft', 'google', 'meta'
|
|
period_label TEXT NOT NULL, -- 'Q1 2025'
|
|
period_end DATE NOT NULL,
|
|
capex_usd_millions NUMERIC(12,1), -- total CapEx in USD millions
|
|
dc_capex_est_millions NUMERIC(12,1), -- estimated DC/cloud share
|
|
yoy_growth_pct NUMERIC(7,2), -- YoY growth %
|
|
source_url TEXT,
|
|
filing_type TEXT, -- '10-Q', '10-K'
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE(company, period_end)
|
|
);
|
|
|
|
-- Distributor lead times and stock levels
|
|
CREATE TABLE IF NOT EXISTS distributor_lead_times (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
distributor TEXT NOT NULL, -- 'mouser', 'digikey', 'rs'
|
|
form_factor TEXT, -- 'QSFP28', 'QSFP-DD'
|
|
speed_label TEXT, -- '100G', '400G'
|
|
part_number TEXT,
|
|
product_name TEXT,
|
|
in_stock BOOLEAN,
|
|
stock_qty INTEGER,
|
|
lead_time_weeks INTEGER,
|
|
price_usd NUMERIC(10,2),
|
|
product_url TEXT,
|
|
scraped_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_dist_leads_form_factor ON distributor_lead_times(form_factor, scraped_at DESC);
|
|
|
|
-- GitHub technology adoption signals (weekly snapshots)
|
|
CREATE TABLE IF NOT EXISTS github_tech_signals (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
technology TEXT NOT NULL, -- '400G', '800G', 'ZR', 'CPO', 'silicon-photonics', 'CMIS'
|
|
metric TEXT NOT NULL, -- 'repo_count', 'commit_count', 'issue_count', 'stars_total'
|
|
repo_context TEXT, -- 'sonic-net/SONiC', 'openconfig', 'all'
|
|
value INTEGER NOT NULL,
|
|
week_start DATE NOT NULL,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE(technology, metric, repo_context, week_start)
|
|
);
|
|
|
|
-- eBay marketplace velocity (sold listings count + average price)
|
|
CREATE TABLE IF NOT EXISTS marketplace_velocity (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
marketplace TEXT NOT NULL, -- 'ebay', 'aliexpress'
|
|
keyword TEXT NOT NULL,
|
|
form_factor TEXT,
|
|
speed_label TEXT,
|
|
sold_count_30d INTEGER, -- listings sold in last 30 days
|
|
active_listings INTEGER, -- current active listings
|
|
avg_sold_price NUMERIC(10,2),
|
|
min_price NUMERIC(10,2),
|
|
max_price NUMERIC(10,2),
|
|
currency TEXT DEFAULT 'USD',
|
|
scraped_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_velocity_keyword ON marketplace_velocity(keyword, scraped_at DESC);
|
|
|
|
-- AI cluster / hyperscale DC announcements
|
|
CREATE TABLE IF NOT EXISTS ai_cluster_announcements (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
company TEXT NOT NULL,
|
|
title TEXT NOT NULL,
|
|
summary TEXT,
|
|
announced_date DATE,
|
|
scale_mw NUMERIC(10,1), -- announced power in MW
|
|
scale_servers INTEGER,
|
|
network_speed TEXT, -- '400G', '800G', 'IB-NDR'
|
|
estimated_transceivers INTEGER, -- rough estimate
|
|
deployment_date DATE, -- expected go-live
|
|
location TEXT,
|
|
source_url TEXT NOT NULL,
|
|
source_name TEXT,
|
|
content_hash TEXT UNIQUE,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_ai_clusters_date ON ai_cluster_announcements(announced_date DESC);
|
|
|
|
-- Network standards status tracker
|
|
CREATE TABLE IF NOT EXISTS standards_activity (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
standard_body TEXT NOT NULL, -- 'ieee', 'ietf', 'oif', 'msa', 'snia'
|
|
standard_name TEXT NOT NULL, -- '802.3df', 'CMIS 5.2', 'OIF-400ZR'
|
|
technology TEXT, -- '400G', '800G', 'ZR', 'CPO'
|
|
current_status TEXT, -- 'in-progress', 'ballot', 'approved', 'published'
|
|
draft_version TEXT,
|
|
ballot_date DATE,
|
|
approval_date DATE,
|
|
source_url TEXT,
|
|
notes TEXT,
|
|
last_checked TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE(standard_body, standard_name)
|
|
);
|
|
|
|
-- Aggregated forecast signals (computed by forecast engine)
|
|
CREATE TABLE IF NOT EXISTS forecast_signals (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
technology TEXT NOT NULL, -- '100G-QSFP28', '400G-QSFP-DD', '800G-OSFP'
|
|
horizon_months INTEGER NOT NULL, -- 3, 9, 12, 18
|
|
demand_index NUMERIC(5,2), -- 0-100 composite demand score
|
|
price_direction TEXT, -- 'rising', 'falling', 'stable'
|
|
price_delta_pct NUMERIC(7,2), -- estimated % price change
|
|
confidence NUMERIC(3,2), -- 0.0-1.0
|
|
signal_count INTEGER, -- how many signals fed this forecast
|
|
signal_breakdown JSONB, -- breakdown by signal type
|
|
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_forecast_tech ON forecast_signals(technology, computed_at DESC);
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_forecast_unique ON forecast_signals(technology, horizon_months, date_trunc('day', computed_at));
|
|
|
|
-- Seed known standards status
|
|
INSERT INTO standards_activity (standard_body, standard_name, technology, current_status, source_url, notes) VALUES
|
|
('ieee', '802.3bs-400GbE', '400G', 'published', 'https://standards.ieee.org/ieee/802.3bs/5950/', 'Published 2017, baseline 400G standard'),
|
|
('ieee', '802.3cd-100G/200G/400G', '400G', 'published', 'https://standards.ieee.org/ieee/802.3cd/6635/', 'Published 2018'),
|
|
('ieee', '802.3df-800GbE', '800G', 'in-progress', 'https://www.ieee802.org/3/df/', 'Draft in progress, expected 2025'),
|
|
('ieee', '802.3dk-1.6T', '1.6T', 'in-progress', 'https://www.ieee802.org/3/dk/', 'Very early stage'),
|
|
('oif', 'OIF-400ZR', '400G-ZR', 'published', 'https://www.oiforum.com/technical-work/hot-topics/400zr-2/', 'Published 2020'),
|
|
('oif', 'OIF-CMIS-5.2', 'CMIS', 'published', 'https://www.oiforum.com/technical-work/hot-topics/cmis/', 'CMIS 5.2 published 2023'),
|
|
('oif', 'OIF-800ZR', '800G-ZR', 'in-progress', 'https://www.oiforum.com/technical-work/hot-topics/', 'In development'),
|
|
('msa', '400G-FR4', '400G', 'published', 'https://www.400g.com/', '400G FR4 MSA complete'),
|
|
('msa', '800G-XDR8', '800G', 'in-progress', 'https://www.800g.info/', 'Specification in progress')
|
|
ON CONFLICT (standard_body, standard_name) DO NOTHING;
|