/** * SEC EDGAR Hyperscaler CapEx Scraper * * Uses the SEC EDGAR XBRL API (free, no auth) to extract quarterly CapEx * from Amazon, Microsoft, Google/Alphabet, and Meta 10-Q/10-K filings. * * XBRL concept: us-gaap/PaymentsToAcquirePropertyPlantAndEquipment * API: https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json * * Hyperscaler DC CapEx is the strongest 6-12 month leading indicator * for 400G/800G transceiver demand. */ import { pool } from "../utils/db"; import { logger } from "../utils/logger"; const COMPANIES: Record = { amazon: { name: "Amazon (AWS)", cik: "0001018724", dcPct: 0.65 }, // ~65% of CapEx is AWS infra microsoft: { name: "Microsoft Azure", cik: "0000789019", dcPct: 0.55 }, alphabet: { name: "Google Cloud", cik: "0001652044", dcPct: 0.60 }, meta: { name: "Meta AI/DC", cik: "0001326801", dcPct: 0.85 }, // almost all meta capex is DC }; const EDGAR_BASE = "https://data.sec.gov/api/xbrl/companyfacts"; const HEADERS = { "User-Agent": "TIP-DataCollector/1.0 contact@context-x.org", Accept: "application/json", }; interface XbrlUnit { end: string; // ISO date val: number; // value in USD form: string; // '10-Q' or '10-K' filed: string; frame?: string; // 'CY2024Q1' etc accn: string; // accession number fp?: string; // Q1, Q2, Q3, FY fy?: number; // fiscal year } async function fetchCapexData(cik: string): Promise { const url = `${EDGAR_BASE}/CIK${cik}.json`; const res = await fetch(url, { headers: HEADERS }); if (!res.ok) throw new Error(`EDGAR fetch failed for CIK ${cik}: ${res.status}`); const data = await res.json() as { facts: { "us-gaap"?: { PaymentsToAcquirePropertyPlantAndEquipment?: { units: { USD: XbrlUnit[] } } } } }; return data.facts?.["us-gaap"] ?.PaymentsToAcquirePropertyPlantAndEquipment ?.units?.USD ?? []; } function labelFromFrame(unit: XbrlUnit): string { if (unit.frame) { const m = unit.frame.match(/CY(\d{4})(Q\d)?/); if (m) return m[2] ? `${m[2]} ${m[1]}` : `FY ${m[1]}`; } if (unit.fp && unit.fy) return `${unit.fp === "FY" ? "FY" : unit.fp} ${unit.fy}`; return unit.end.substring(0, 7); // YYYY-MM } export async function scrapeSecEdgar(): Promise { logger.info("SEC EDGAR CapEx scraper starting"); let inserted = 0; for (const [key, company] of Object.entries(COMPANIES)) { try { logger.info(`Fetching EDGAR data for ${company.name}`); await new Promise(r => setTimeout(r, 800)); // respect SEC rate limit: 10 req/sec const units = await fetchCapexData(company.cik); if (!units.length) { logger.warn(`No XBRL data for ${company.name}`); continue; } // Filter to quarterly 10-Q/10-K filings from last 3 years const cutoff = new Date(); cutoff.setFullYear(cutoff.getFullYear() - 3); const recent = units .filter(u => (u.form === "10-Q" || u.form === "10-K") && new Date(u.end) >= cutoff) .sort((a, b) => new Date(b.end).getTime() - new Date(a.end).getTime()); // Deduplicate by period end date — keep most recently filed const seen = new Map(); for (const u of recent) { if (!seen.has(u.end)) seen.set(u.end, u); } for (const unit of seen.values()) { const capexM = unit.val / 1_000_000; // convert to millions const dcCapexM = Math.round(capexM * company.dcPct * 10) / 10; const periodLabel = labelFromFrame(unit); const sourceUrl = `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${company.cik}&type=${unit.form}&dateb=&owner=include&count=40`; await pool.query(` INSERT INTO hyperscaler_capex (company, period_label, period_end, capex_usd_millions, dc_capex_est_millions, source_url, filing_type) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (company, period_end) DO UPDATE SET capex_usd_millions = EXCLUDED.capex_usd_millions, dc_capex_est_millions = EXCLUDED.dc_capex_est_millions, period_label = EXCLUDED.period_label, filing_type = EXCLUDED.filing_type `, [key, periodLabel, unit.end, Math.round(capexM * 10) / 10, dcCapexM, sourceUrl, unit.form]); inserted++; } // Compute YoY growth for most recent period await pool.query(` UPDATE hyperscaler_capex h1 SET yoy_growth_pct = ROUND( (h1.capex_usd_millions - h2.capex_usd_millions) / NULLIF(h2.capex_usd_millions, 0) * 100, 1 ) FROM hyperscaler_capex h2 WHERE h1.company = $1 AND h2.company = $1 AND h2.period_end = h1.period_end - INTERVAL '1 year' AND h1.yoy_growth_pct IS NULL `, [key]); logger.info(`${company.name}: ${seen.size} periods upserted`); } catch (err) { logger.error(`EDGAR scraper failed for ${company.name}`, { err }); } } logger.info(`SEC EDGAR scraper done — ${inserted} records`); }