New scrapers (all registered in pg-boss, 50 total jobs):
- sec-edgar.ts : SEC EDGAR XBRL API — hyperscaler CapEx from 10-Q/10-K
- github-signals.ts : GitHub Search/Stats API — tech adoption metrics weekly
- ebay-velocity.ts : eBay completed listings — sold count + price distribution
- ai-clusters.ts : RSS feeds (6 sources) — AI cluster & DC announcements
- distributor-leads.ts : Mouser, Digi-Key, RS Components — lead time + stock
- standards-tracker.ts : IEEE 802.3, OIF, IETF — draft/ballot/published status
New utilities:
- forecast-engine.ts : Weighted signal aggregator → demand_index + price_direction
6 signal types, 4 horizons (3/9/12/18 months), 5 technologies tracked
New DB tables (migration 022):
hyperscaler_capex, distributor_lead_times, github_tech_signals,
marketplace_velocity, ai_cluster_announcements, standards_activity,
forecast_signals
Schedules:
- EDGAR: weekly Mon 06:00
- GitHub: weekly Sun 05:00
- eBay velocity: every 12h
- AI clusters: every 4h (news-speed)
- Distributor leads: daily 03:30
- Standards: weekly Wed 04:00
- Forecast engine: daily 08:00 (after all nightly scrapers)
200 lines
7.1 KiB
TypeScript
200 lines
7.1 KiB
TypeScript
/**
|
|
* Network Standards & Draft Activity Tracker
|
|
*
|
|
* Monitors the status of key networking standards that directly
|
|
* affect transceiver form factor adoption timelines:
|
|
*
|
|
* - IEEE 802.3 (Ethernet PHY standards)
|
|
* - IETF (network protocols)
|
|
* - OIF (Optical Internetworking Forum — MSA agreements)
|
|
* - CMIS (Common Management Interface Specification)
|
|
*
|
|
* Standard status is a 12-24 month leading indicator:
|
|
* "Draft 3.0 approved" → ~18 months to mass-market products
|
|
* "Published" → OEMs start shipping within 12 months
|
|
* "Ballot closed" → final spec in ~3 months
|
|
*/
|
|
|
|
import * as cheerio from "cheerio";
|
|
import { pool } from "../utils/db";
|
|
import { logger } from "../utils/logger";
|
|
|
|
const HEADERS = {
|
|
"User-Agent": "TIP-DataCollector/1.0 contact@context-x.org",
|
|
"Accept": "text/html,application/xhtml+xml",
|
|
};
|
|
|
|
interface StandardStatus {
|
|
standard_body: string;
|
|
standard_name: string;
|
|
technology: string;
|
|
current_status: string;
|
|
draft_version: string | null;
|
|
approval_date: string | null;
|
|
source_url: string;
|
|
notes: string | null;
|
|
}
|
|
|
|
// ─── IEEE 802.3 ────────────────────────────────────────────────────────────
|
|
|
|
async function scrapeIeee802(): Promise<StandardStatus[]> {
|
|
const url = "https://www.ieee802.org/3/";
|
|
const res = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
|
|
if (!res.ok) throw new Error(`IEEE ${res.status}`);
|
|
const $ = cheerio.load(await res.text());
|
|
|
|
const results: StandardStatus[] = [];
|
|
|
|
// IEEE 802.3 project table
|
|
$("table tr").each((_, row) => {
|
|
const cells = $(row).find("td, th");
|
|
if (cells.length < 2) return;
|
|
|
|
const taskName = $(cells[0]).text().trim();
|
|
const statusText = $(cells[1]).text().trim();
|
|
const link = $(cells[0]).find("a").first().attr("href");
|
|
|
|
// Filter for high-speed Ethernet projects
|
|
const speedMatch = taskName.match(/\b(100G|200G|400G|800G|1\.6T|1600G)\b/i)
|
|
|| taskName.match(/802\.3(bs|cd|cu|ck|df|dk|dj|dl)\b/i);
|
|
|
|
if (!speedMatch) return;
|
|
|
|
const tech = taskName.match(/\b(400G|800G|1\.6T|100G|200G)\b/i)?.[1] ?? "high-speed";
|
|
|
|
let status = "in-progress";
|
|
if (/published|approved/i.test(statusText)) status = "published";
|
|
else if (/ballot/i.test(statusText)) status = "ballot";
|
|
else if (/withdrawn|cancelled/i.test(statusText)) status = "cancelled";
|
|
|
|
results.push({
|
|
standard_body: "ieee",
|
|
standard_name: taskName.substring(0, 100),
|
|
technology: tech,
|
|
current_status: status,
|
|
draft_version: null,
|
|
approval_date: null,
|
|
source_url: link ? `https://www.ieee802.org/3/${link}` : url,
|
|
notes: statusText.substring(0, 200),
|
|
});
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── OIF ──────────────────────────────────────────────────────────────────
|
|
|
|
async function scrapeOif(): Promise<StandardStatus[]> {
|
|
const url = "https://www.oiforum.com/technical-work/hot-topics/";
|
|
const res = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
|
|
if (!res.ok) throw new Error(`OIF ${res.status}`);
|
|
const $ = cheerio.load(await res.text());
|
|
|
|
const results: StandardStatus[] = [];
|
|
|
|
$(".entry-content h2, .entry-content h3").each((_, el) => {
|
|
const heading = $(el).text().trim();
|
|
if (!heading) return;
|
|
|
|
const link = $(el).find("a").first().attr("href") || url;
|
|
const techMatch = heading.match(/\b(400ZR|800ZR|CMIS|400G|800G|CPO|OIF-)\S*/i);
|
|
|
|
if (!techMatch) return;
|
|
|
|
const tech = heading.includes("ZR") ? heading.includes("800") ? "800G-ZR" : "400G-ZR"
|
|
: heading.includes("CMIS") ? "CMIS"
|
|
: heading.includes("800") ? "800G"
|
|
: heading.includes("400") ? "400G"
|
|
: "optical";
|
|
|
|
results.push({
|
|
standard_body: "oif",
|
|
standard_name: heading.substring(0, 100),
|
|
technology: tech,
|
|
current_status: "in-progress", // OIF IA are usually in-progress or published
|
|
draft_version: null,
|
|
approval_date: null,
|
|
source_url: link,
|
|
notes: null,
|
|
});
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── IETF Datatracker ─────────────────────────────────────────────────────
|
|
|
|
async function scrapeIetf(): Promise<StandardStatus[]> {
|
|
// IETF has a proper REST API
|
|
const url = "https://datatracker.ietf.org/api/v1/doc/document/?type=draft&name__contains=optical&limit=20&offset=0";
|
|
const res = await fetch(url, {
|
|
headers: { ...HEADERS, Accept: "application/json" },
|
|
signal: AbortSignal.timeout(15000),
|
|
});
|
|
if (!res.ok) throw new Error(`IETF API ${res.status}`);
|
|
|
|
const data = await res.json() as {
|
|
objects: Array<{
|
|
name: string; title: string; abstract: string;
|
|
std_level: string | null; stream: string;
|
|
}>
|
|
};
|
|
|
|
return (data.objects ?? []).map(doc => {
|
|
const tech = doc.name.includes("400g") ? "400G"
|
|
: doc.name.includes("800g") ? "800G"
|
|
: doc.title.match(/\b(400G|800G|ZR|coherent)\b/i)?.[1] ?? "optical";
|
|
|
|
return {
|
|
standard_body: "ietf",
|
|
standard_name: doc.name.substring(0, 100),
|
|
technology: tech,
|
|
current_status: doc.std_level ? "published" : "in-progress",
|
|
draft_version: null,
|
|
approval_date: null,
|
|
source_url: `https://datatracker.ietf.org/doc/${doc.name}/`,
|
|
notes: doc.title.substring(0, 200),
|
|
};
|
|
});
|
|
}
|
|
|
|
export async function scrapeStandardsTracker(): Promise<void> {
|
|
logger.info("Standards tracker starting");
|
|
let updated = 0;
|
|
|
|
const scrapers: Array<{ name: string; fn: () => Promise<StandardStatus[]> }> = [
|
|
{ name: "IEEE 802.3", fn: scrapeIeee802 },
|
|
{ name: "OIF", fn: scrapeOif },
|
|
{ name: "IETF", fn: scrapeIetf },
|
|
];
|
|
|
|
for (const s of scrapers) {
|
|
try {
|
|
await new Promise(r => setTimeout(r, 2000));
|
|
logger.info(`Checking ${s.name}`);
|
|
const standards = await s.fn();
|
|
|
|
for (const std of standards) {
|
|
await pool.query(`
|
|
INSERT INTO standards_activity
|
|
(standard_body, standard_name, technology, current_status,
|
|
draft_version, approval_date, source_url, notes, last_checked)
|
|
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,NOW())
|
|
ON CONFLICT (standard_body, standard_name) DO UPDATE SET
|
|
current_status = EXCLUDED.current_status,
|
|
technology = EXCLUDED.technology,
|
|
notes = COALESCE(EXCLUDED.notes, standards_activity.notes),
|
|
last_checked = NOW()
|
|
`, [std.standard_body, std.standard_name, std.technology, std.current_status,
|
|
std.draft_version, std.approval_date, std.source_url, std.notes]);
|
|
updated++;
|
|
}
|
|
logger.info(`${s.name}: ${standards.length} standards checked`);
|
|
} catch (err) {
|
|
logger.warn(`Standards scraper failed: ${s.name}`, { err });
|
|
}
|
|
}
|
|
|
|
logger.info(`Standards tracker done — ${updated} records updated`);
|
|
}
|