New scrapers (all registered in pg-boss, 50 total jobs):
- sec-edgar.ts : SEC EDGAR XBRL API — hyperscaler CapEx from 10-Q/10-K
- github-signals.ts : GitHub Search/Stats API — tech adoption metrics weekly
- ebay-velocity.ts : eBay completed listings — sold count + price distribution
- ai-clusters.ts : RSS feeds (6 sources) — AI cluster & DC announcements
- distributor-leads.ts : Mouser, Digi-Key, RS Components — lead time + stock
- standards-tracker.ts : IEEE 802.3, OIF, IETF — draft/ballot/published status
New utilities:
- forecast-engine.ts : Weighted signal aggregator → demand_index + price_direction
6 signal types, 4 horizons (3/9/12/18 months), 5 technologies tracked
New DB tables (migration 022):
hyperscaler_capex, distributor_lead_times, github_tech_signals,
marketplace_velocity, ai_cluster_announcements, standards_activity,
forecast_signals
Schedules:
- EDGAR: weekly Mon 06:00
- GitHub: weekly Sun 05:00
- eBay velocity: every 12h
- AI clusters: every 4h (news-speed)
- Distributor leads: daily 03:30
- Standards: weekly Wed 04:00
- Forecast engine: daily 08:00 (after all nightly scrapers)
135 lines
5.0 KiB
TypeScript
135 lines
5.0 KiB
TypeScript
/**
|
|
* SEC EDGAR Hyperscaler CapEx Scraper
|
|
*
|
|
* Uses the SEC EDGAR XBRL API (free, no auth) to extract quarterly CapEx
|
|
* from Amazon, Microsoft, Google/Alphabet, and Meta 10-Q/10-K filings.
|
|
*
|
|
* XBRL concept: us-gaap/PaymentsToAcquirePropertyPlantAndEquipment
|
|
* API: https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json
|
|
*
|
|
* Hyperscaler DC CapEx is the strongest 6-12 month leading indicator
|
|
* for 400G/800G transceiver demand.
|
|
*/
|
|
|
|
import { pool } from "../utils/db";
|
|
import { logger } from "../utils/logger";
|
|
|
|
const COMPANIES: Record<string, { name: string; cik: string; dcPct: number }> = {
|
|
amazon: { name: "Amazon (AWS)", cik: "0001018724", dcPct: 0.65 }, // ~65% of CapEx is AWS infra
|
|
microsoft: { name: "Microsoft Azure", cik: "0000789019", dcPct: 0.55 },
|
|
alphabet: { name: "Google Cloud", cik: "0001652044", dcPct: 0.60 },
|
|
meta: { name: "Meta AI/DC", cik: "0001326801", dcPct: 0.85 }, // almost all meta capex is DC
|
|
};
|
|
|
|
const EDGAR_BASE = "https://data.sec.gov/api/xbrl/companyfacts";
|
|
const HEADERS = {
|
|
"User-Agent": "TIP-DataCollector/1.0 contact@context-x.org",
|
|
Accept: "application/json",
|
|
};
|
|
|
|
interface XbrlUnit {
|
|
end: string; // ISO date
|
|
val: number; // value in USD
|
|
form: string; // '10-Q' or '10-K'
|
|
filed: string;
|
|
frame?: string; // 'CY2024Q1' etc
|
|
accn: string; // accession number
|
|
fp?: string; // Q1, Q2, Q3, FY
|
|
fy?: number; // fiscal year
|
|
}
|
|
|
|
async function fetchCapexData(cik: string): Promise<XbrlUnit[]> {
|
|
const url = `${EDGAR_BASE}/CIK${cik}.json`;
|
|
const res = await fetch(url, { headers: HEADERS });
|
|
if (!res.ok) throw new Error(`EDGAR fetch failed for CIK ${cik}: ${res.status}`);
|
|
const data = await res.json() as {
|
|
facts: {
|
|
"us-gaap"?: {
|
|
PaymentsToAcquirePropertyPlantAndEquipment?: {
|
|
units: { USD: XbrlUnit[] }
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
return data.facts?.["us-gaap"]
|
|
?.PaymentsToAcquirePropertyPlantAndEquipment
|
|
?.units?.USD ?? [];
|
|
}
|
|
|
|
function labelFromFrame(unit: XbrlUnit): string {
|
|
if (unit.frame) {
|
|
const m = unit.frame.match(/CY(\d{4})(Q\d)?/);
|
|
if (m) return m[2] ? `${m[2]} ${m[1]}` : `FY ${m[1]}`;
|
|
}
|
|
if (unit.fp && unit.fy) return `${unit.fp === "FY" ? "FY" : unit.fp} ${unit.fy}`;
|
|
return unit.end.substring(0, 7); // YYYY-MM
|
|
}
|
|
|
|
export async function scrapeSecEdgar(): Promise<void> {
|
|
logger.info("SEC EDGAR CapEx scraper starting");
|
|
let inserted = 0;
|
|
|
|
for (const [key, company] of Object.entries(COMPANIES)) {
|
|
try {
|
|
logger.info(`Fetching EDGAR data for ${company.name}`);
|
|
await new Promise(r => setTimeout(r, 800)); // respect SEC rate limit: 10 req/sec
|
|
|
|
const units = await fetchCapexData(company.cik);
|
|
if (!units.length) { logger.warn(`No XBRL data for ${company.name}`); continue; }
|
|
|
|
// Filter to quarterly 10-Q/10-K filings from last 3 years
|
|
const cutoff = new Date();
|
|
cutoff.setFullYear(cutoff.getFullYear() - 3);
|
|
|
|
const recent = units
|
|
.filter(u => (u.form === "10-Q" || u.form === "10-K") && new Date(u.end) >= cutoff)
|
|
.sort((a, b) => new Date(b.end).getTime() - new Date(a.end).getTime());
|
|
|
|
// Deduplicate by period end date — keep most recently filed
|
|
const seen = new Map<string, XbrlUnit>();
|
|
for (const u of recent) {
|
|
if (!seen.has(u.end)) seen.set(u.end, u);
|
|
}
|
|
|
|
for (const unit of seen.values()) {
|
|
const capexM = unit.val / 1_000_000; // convert to millions
|
|
const dcCapexM = Math.round(capexM * company.dcPct * 10) / 10;
|
|
const periodLabel = labelFromFrame(unit);
|
|
const sourceUrl = `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${company.cik}&type=${unit.form}&dateb=&owner=include&count=40`;
|
|
|
|
await pool.query(`
|
|
INSERT INTO hyperscaler_capex
|
|
(company, period_label, period_end, capex_usd_millions, dc_capex_est_millions, source_url, filing_type)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
ON CONFLICT (company, period_end) DO UPDATE SET
|
|
capex_usd_millions = EXCLUDED.capex_usd_millions,
|
|
dc_capex_est_millions = EXCLUDED.dc_capex_est_millions,
|
|
period_label = EXCLUDED.period_label,
|
|
filing_type = EXCLUDED.filing_type
|
|
`, [key, periodLabel, unit.end, Math.round(capexM * 10) / 10, dcCapexM, sourceUrl, unit.form]);
|
|
inserted++;
|
|
}
|
|
|
|
// Compute YoY growth for most recent period
|
|
await pool.query(`
|
|
UPDATE hyperscaler_capex h1
|
|
SET yoy_growth_pct = ROUND(
|
|
(h1.capex_usd_millions - h2.capex_usd_millions) / NULLIF(h2.capex_usd_millions, 0) * 100, 1
|
|
)
|
|
FROM hyperscaler_capex h2
|
|
WHERE h1.company = $1
|
|
AND h2.company = $1
|
|
AND h2.period_end = h1.period_end - INTERVAL '1 year'
|
|
AND h1.yoy_growth_pct IS NULL
|
|
`, [key]);
|
|
|
|
logger.info(`${company.name}: ${seen.size} periods upserted`);
|
|
} catch (err) {
|
|
logger.error(`EDGAR scraper failed for ${company.name}`, { err });
|
|
}
|
|
}
|
|
|
|
logger.info(`SEC EDGAR scraper done — ${inserted} records`);
|
|
}
|