diff --git a/scripts/pi-scraper-setup.sh b/scripts/pi-scraper-setup.sh index 1575fe4..3985caa 100644 --- a/scripts/pi-scraper-setup.sh +++ b/scripts/pi-scraper-setup.sh @@ -187,7 +187,7 @@ PIEOF # ── 7. WireGuard (connects to Erik 10.10.0.1 for DB access) ───────────────── WG_PRIVKEY="${WG_PRIVKEY:-}" ERIK_PUBKEY="nrh8xiPzUWwLDK4y6+Cu0V3ne56zobIHKtxMGb7BKQo=" -ERIK_ENDPOINT="217.154.82.179:51820" +ERIK_ENDPOINT="82.165.222.127:51820" WG_ADDR="${WG_ADDR:-10.10.0.9}" # override per Pi: WG_ADDR=10.10.0.6 if [ -n "$WG_PRIVKEY" ]; then @@ -233,6 +233,70 @@ PI_NAME="$PI_NAME" pm2 start \ || pm2 restart tip-pi-scraper pm2 save + +# ── 9. Optional: SOCKS5 Proxy Agent (Starlink bandwidth contribution) ──────── +# Allows Erik scraper to route requests THROUGH this Pi's internet connection. +# Especially useful when Pi is on Starlink: different IP range, bypasses IONOS +# IP-based rate limiting on target vendor sites. +# +# Starlink notes: +# - CG-NAT: cannot accept direct incoming TCP from internet +# - WireGuard tunnel already bypasses this (Pi connects OUT to Erik) +# - SOCKS5 listens on WireGuard IP (10.10.0.x), not public interface +# - Erik routes selected scraper jobs through: ALL_PROXY=socks5://10.10.0.x:1080 +# +# To enable: run with PROXY_AGENT=1 WG_ADDR=10.10.0.6 bash pi-scraper-setup.sh +PROXY_AGENT="${PROXY_AGENT:-0}" +PROXY_PORT="${PROXY_PORT:-1080}" + +if [ "$PROXY_AGENT" = "1" ] && [ -n "$WG_PRIVKEY" ]; then + echo "" + echo "── Installing SOCKS5 Proxy Agent ────────────────────────────────────" + + # Install dante-server (lightweight SOCKS5 for Linux) + sudo apt-get install -y dante-server 2>/dev/null | tail -1 || true + + WG_IP=$(ip addr show wg0 2>/dev/null | awk '/inet /{print $2}' | cut -d/ -f1) + if [ -z "$WG_IP" ]; then + WG_IP="$WG_ADDR" + fi + + # Detect Starlink interface (usually eth0 or wlan0 — the WAN interface) + OUTIF=$(ip route get 8.8.8.8 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1) + + cat > /tmp/danted.conf << DANTEEOF +logoutput: syslog + +internal: $WG_IP port = $PROXY_PORT +external: $OUTIF + +socksmethod: none +clientmethod: none + +client pass { + from: 10.10.0.0/24 to: 0.0.0.0/0 + log: error +} + +socks pass { + from: 10.10.0.0/24 to: 0.0.0.0/0 + protocol: tcp + log: error +} +DANTEEOF + + sudo mv /tmp/danted.conf /etc/danted.conf + sudo systemctl enable danted + sudo systemctl restart danted + + echo "SOCKS5 proxy listening on $WG_IP:$PROXY_PORT (WireGuard-only, no public exposure)" + echo "Use from Erik: ALL_PROXY=socks5://$WG_IP:$PROXY_PORT curl https://example.com" + echo "Starlink interface: $OUTIF" + echo "" + echo "To use for scraper jobs, set in Erik ecosystem.config.js:" + echo " ALL_PROXY: 'socks5://$WG_IP:$PROXY_PORT' # for jobs that need Starlink IP" +fi + echo "" echo "✅ TIP Pi Scraper ($PI_NAME) is running" echo " pm2 logs tip-pi-scraper — view logs" @@ -240,3 +304,6 @@ echo " pm2 status — check status" echo "" echo "DB target: $DB_HOST:$DB_PORT/$DB_NAME" echo "Jobs: ${#PI_QUEUES[@]} lightweight scrapers, all day every day" +if [ "$PROXY_AGENT" = "1" ]; then + echo "SOCKS5 proxy: socks5://$WG_ADDR:$PROXY_PORT (Starlink bandwidth via WireGuard)" +fi diff --git a/sql/033-800g-standards-enrichment.sql b/sql/033-800g-standards-enrichment.sql new file mode 100644 index 0000000..93cc250 --- /dev/null +++ b/sql/033-800g-standards-enrichment.sql @@ -0,0 +1,271 @@ +/** + * Migration 033 — 800G Standards Enrichment + * + * Deep technical enrichment of all 800G IEEE/OIF standards with: + * - Precise physical layer specs (lane rate, modulation, wavelength, power) + * - Electrical interface details (PAM4 baud rate, FEC type) + * - Optical power budgets (Tx OMA, Rx sensitivity, ER, TDECQ) + * - Connector and fiber requirements + * - Management interface (CMIS version) + * - Practical deployment context (reach, use cases, market adoption) + * + * Sources: + * - IEEE 802.3df-2024 (standard text, clause mapping) + * - OIF 800ZR Implementation Agreement 2023 + * - NADDOD OSFP 800G-SR8 in-depth analysis (2024) + * - Comnen OSFP-800G-SR8 Product Datasheet + * - Acacia/Cisco, Coherent, VIAVI 800G field guides + */ + +-- ============================================================ +-- 800GBASE-SR8 (IEEE 802.3df-2024, Clause 151) +-- ============================================================ +UPDATE standards SET + ieee_reference = 'IEEE 802.3df-2024 Clause 151', + body = 'IEEE', + speed = '800G', + speed_gbps = 800, + lanes = 8, + lane_rate = '53.125 GBd PAM4 (106.25 Gbps raw per lane)', + lane_rate_gbps = 106.25, + modulation = 'PAM4', + fiber_type = 'MMF OM3 / OM4', + wavelength = '850 nm (8x VCSEL)', + max_reach_meters = 100, + max_reach_label = '60m OM3 / 100m OM4', + connector = 'MPO-16 APC (primary) or 2× MPO-12 APC', + fec_required = true, + form_factors = ARRAY['OSFP', 'QSFP-DD800'], + year_draft = 2022, + year_ratified = 2024, + status = 'ratified', + url = 'https://standards.ieee.org/ieee/802.3df/7849/', + notes = '800G short-reach multi-mode standard (SR8). Physical layer: 8 lanes × 53.125 GBd PAM4 = 800G aggregate. Modulation: PAM4 at 53.125 Gbaud → 2 bits/symbol → 106.25 Gbps raw/lane; after 802.3 RS-FEC (KP4) overhead net payload = 100 Gbps/lane. + +Optical specs (per lane, per IEEE 802.3df Clause 151): + Tx (VCSEL, 850 nm multimode): + OMA_outer (max): 2.3 dBm + OMA_outer (min): −0.5 dBm (OM4 reach) + Extinction Ratio (ER): ≥ 4.4 dB + TDECQ: ≤ 2.5 dB + Spectral width (RMS): ≤ 0.6 nm + Rx (PIN photodetector): + Sensitivity (OMA): max(−4.6 dBm, TDECQ − 6.4 dB) + Stressed Rx sensitivity: −2 dBm + Overload OMA: +2.3 dBm + Link budget: ~7 dB for connectors and fiber loss at 850 nm/OM4 + +FEC: IEEE 802.3 KP4 RS-FEC (544,514) mandatory. + Pre-FEC BER: ≤ 2.4 × 10⁻⁴ (at EOM) + Post-FEC BER: ≤ 1 × 10⁻¹² (uncorrected) + +Fiber plant: + OM3: max 60 m (850 nm bandwidth−length product 2000 MHz·km) + OM4: max 100 m (4700 MHz·km) + OM5: max 100 m (same channel, but supports future WDM) + Note: OM3 and OM4 are backward-compatible; ensure all connector APC polish + +Connectors: + Primary: MPO-16 APC (16-fiber, all 8 Tx + 8 Rx lanes on one plug) + Alternative: 2× MPO-12 APC (first 12-fiber: lanes 1–6, second: lanes 7–8 + spare) + QSA/breakout: not defined in standard; vendor-specific MPO-16 to MPO-8×1 breakout cables exist + +Management: CMIS 5.2 (Common Management Interface Specification) + Module-level: voltage, temperature, 8× Tx/Rx power per lane, 8× Tx/Rx bias per lane + DOM: Tx OMA, Rx power, Tx bias, module temp, Vcc (3.135–3.465 V) + Module state machine: dataPathInit → dataPathDeinit → moduleLowPwr + +Power: + Typical: 12–15 W (OSFP form factor) + Max (Clause 151 budget): ≤ 16 W + Vcc: 3.135–3.465 V (3.3 V nominal) + Thermal: 0–70 °C case temperature (commercial); −5–85 °C industrial variants + +Form factor details: + OSFP (Octal Small Form-factor Pluggable): 22.58 mm × 107.8 mm, ≤ 16 W thermal envelope; defined by OSFP MSA; preferred for 800G SR8 due to thermal headroom + QSFP-DD800 (Quad Small Form-factor Double Density 800G): backward-compatible cage with QSFP-DD; ≤ 14 W in standard cage; limited by face-plate density vs. OSFP + +Market / deployment: + First samples: mid-2023; volume production: late 2023 / Q1 2024 + Primary use: hyperscale AI/ML cluster interconnect (400G→800G upgrade), intra-DC spine-leaf 800G, HPC fat-tree topologies + Typical switch ASICs: Broadcom Tomahawk 5 (51.2 Tbps), Marvell Teralynx 10, Nvidia Spectrum-4 + Competitors on same reach: 800GBASE-DR8 for longer SMF reach (500m), 800GBASE-LR4 for campus SMF + Installed base: fastest-growing 800G form factor as of 2024; >60% of 800G datacenter deployments use SR8 (NADDOD 2024) + +Vendor ecosystem (module manufacturers as of 2024): + Innolight, HG Genuine (Hisense), Lumentum, Fabrinet/Eoptolink, Accelink, Comnen, Naddod, Source Photonics + Switch OEMs: Cisco (Nexus 9000 800G), Arista (7800R4), Juniper (PTX10003), Broadcom dev kit + +Key differences vs. 400GBASE-SR4.2: + 400GBASE-SR4.2 = 4 lanes at 26.5625 GBd PAM4 → 2× MPO-12 (WDM 850+910 nm BiDi) + 800GBASE-SR8 = 8 lanes at 53.125 GBd PAM4 → MPO-16 (all 850 nm, non-WDM) + Fibers used: 400G-SR4.2 uses 8 fibers (4 Tx+4 Rx BiDi), SR8 uses 16 fibers (8 Tx + 8 Rx)' +WHERE name = '800GBASE-SR8'; + +-- ============================================================ +-- 800GBASE-DR8 (IEEE 802.3df-2024, Clause 152) +-- ============================================================ +UPDATE standards SET + ieee_reference = 'IEEE 802.3df-2024 Clause 152', + body = 'IEEE', + speed = '800G', + speed_gbps = 800, + lanes = 8, + lane_rate = '53.125 GBd PAM4 (106.25 Gbps raw per lane)', + lane_rate_gbps = 106.25, + modulation = 'PAM4', + fiber_type = 'SMF OS2', + wavelength = '1310 nm (8× direct detect, CWDM4-like spacing not used — all 1310 nm, parallel fiber)', + max_reach_meters = 500, + max_reach_label = '500m SMF', + connector = 'MPO-12 (8 active lanes) or CS duplex via 8-channel breakout', + fec_required = true, + form_factors = ARRAY['OSFP', 'QSFP-DD800'], + year_draft = 2022, + year_ratified = 2024, + status = 'ratified', + url = 'https://standards.ieee.org/ieee/802.3df/7849/', + notes = '800G single-mode parallel fiber, 500m reach (DR8). Physical layer: 8 lanes × 53.125 GBd PAM4, all at 1310 nm via DFB or EML, 8 parallel SMF fibers (4+4 on MPO-12, no fiber pairs used — 8 Tx + 8 Rx = 16 fibers, or MPO-12 with only 8 used). + +Optical specs (per lane, per IEEE 802.3df Clause 152): + Tx (EML / DFB, 1310 nm SMF): + Launch power: −1.0 to +3.4 dBm OMA + Extinction Ratio: ≥ 6.0 dB + TDECQ: ≤ 3.5 dB + Spectral width: ≤ 1.0 nm (−20 dB) + Rx (APD or PIN, 1310 nm): + Sensitivity (OMA min): −9.0 dBm (at pre-FEC BER 2.4×10⁻⁴) + Overload OMA: +3.4 dBm + Loss budget: 6 dB (connector + fiber: ~0.4 dB/km × 0.5 km = 0.2 dB fiber + ~5.8 dB margin) + +FEC: KP4 RS-FEC mandatory (same as SR8). +Fiber: OS2 (G.652D), 8 parallel SMF, MPO-12 connector (8 active fibers). +Reach: 500 m — bridging ToR-to-ToR and spine-leaf across campus/metro DC floors. +Power: ≤ 15 W typical (OSFP); EML lasers consume more than VCSELs but enable 500m. +CMIS: 5.2 + +Use case: building-to-building data center interconnect, long-run intra-campus 800G, large-floor AI cluster pod-to-pod. +Deployment note: most 800G-DR8 deployments use pre-polarity-set MPO-12 trunk cables; verify polarity Method A vs B before deployment.' +WHERE name = '800GBASE-DR8'; + +-- ============================================================ +-- 800GBASE-LR4 (IEEE 802.3df-2024, Clause 153) — 2km SMF WDM +-- ============================================================ +UPDATE standards SET + ieee_reference = 'IEEE 802.3df-2024 Clause 153', + body = 'IEEE', + speed = '800G', + speed_gbps = 800, + lanes = 4, + lane_rate = '106.25 GBd PAM4 per lane on 4 WDM wavelengths', + lane_rate_gbps = 212.5, + modulation = 'PAM4', + fiber_type = 'SMF OS2', + wavelength = '1270 / 1290 / 1310 / 1330 nm (CWDM4 λ1–λ4, 20 nm spacing)', + max_reach_meters = 2000, + max_reach_label = '2 km SMF', + connector = 'LC duplex (duplex SMF, WDM MUX/DEMUX on-board)', + fec_required = true, + form_factors = ARRAY['OSFP', 'QSFP-DD800'], + year_draft = 2022, + year_ratified = 2024, + status = 'ratified', + url = 'https://standards.ieee.org/ieee/802.3df/7849/', + notes = '800G 2 km SMF WDM (LR4). Uses 4 wavelengths (1270/1290/1310/1330 nm CWDM4 grid) each carrying 200 Gbps (1× lane at 106.25 GBd PAM4 = 2 bits/symbol = 212.5 Gbps raw, FEC overhead → 200 Gbps net). Single duplex LC fiber pair. + +Physical layer: + Lanes: 4 WDM × 106.25 GBd PAM4 = 4 × 212.5 Gbps raw ≈ 800 Gbps net (with KP4 FEC) + Modulation: PAM4 (4-level pulse amplitude) + Tx sources: EML (Electro-absorption Modulated Laser) at CWDM4 spacing + Wavelengths: λ1=1270 nm, λ2=1290 nm, λ3=1310 nm, λ4=1330 nm + +Optical specs (per IEEE 802.3df Clause 153): + Tx launch power (per channel): −4.5 to +4.0 dBm + Tx ER: ≥ 6.5 dB + Rx sensitivity: −14.0 dBm (per channel, at pre-FEC BER 2.4×10⁻⁴) + Power budget per channel: 6.3 dB + Total insertion loss (2 km OS2): ~0.8 dB (fiber) + connectors + +Management: CMIS 5.2, 4-channel per-lane DOM. +Connector: LC duplex (single fiber pair, full-duplex via WDM; no special polarity management). +Power: ≤ 16 W (OSFP); EML + TEC (thermoelectric cooler) add significant power. +FEC: KP4 mandatory. + +Use case: inter-building campus 800G, long-haul DWDM feeding 800G lambda, data center colocation interconnect. +vs. 800GBASE-DR8: LR4 uses 1 fiber pair (LC) vs. DR8 uses 16 fibers (MPO-12). LR4 for longer reach with existing duplex SMF plant.' +WHERE name = '800GBASE-LR4'; + +-- ============================================================ +-- 800G-ZR (OIF-800ZR-01.0, 2023) — 1000 km coherent DWDM +-- ============================================================ +UPDATE standards SET + ieee_reference = 'OIF-800ZR-01.0 (2023)', + body = 'OIF', + speed = '800G', + speed_gbps = 800, + lanes = 1, + lane_rate = '96 GBd DP-16QAM (single carrier)', + lane_rate_gbps = 96, + modulation = 'DP-16QAM (Dual Polarization 16-QAM)', + fiber_type = 'SMF G.652/G.654', + wavelength = 'C-band DWDM (191.7–196.1 THz, 100 GHz grid or 75 GHz flex)', + max_reach_meters = 1000000, + max_reach_label = '1000 km C-band DWDM (EDFA-amplified, target 80 km spans)', + connector = 'LC duplex', + fec_required = true, + form_factors = ARRAY['OSFP', 'QSFP-DD800'], + year_draft = 2022, + year_ratified = 2023, + status = 'ratified', + url = 'https://www.oiforum.com/technical-work/hot-topics/800zr/', + notes = '800G coherent pluggable DWDM (ZR = Zero-reach is a misnomer — it targets 80–1000 km metro/long-haul). OIF-800ZR-01.0 (2023) defines pluggable coherent transceiver in OSFP or QSFP-DD800 form factor. + +Physical layer: + Modulation: DP-16QAM (Dual Polarization, 16 states = 4 bits per symbol per polarization × 2 pol = 8 bits per symbol) + Symbol rate: 96 GBd (target; actual may vary by vendor) + Net data rate: 96 GBd × 8 bits/symbol × FEC overhead reduction = ~800 Gbps client payload + Carrier frequency: C-band, 100 GHz ITU-T grid (or 75 GHz/50 GHz flex-grid options) + Compatible: C-band ROADM, EDFA-amplified terrestrial links + +FEC: Soft-decision FEC (SD-FEC), typically oFEC (OpenROADM) or proprietary concatenated FEC + Pre-FEC BER target: ~2×10⁻² (much higher than intensity-modulated standards) + Post-FEC BER: ≤ 1×10⁻¹⁵ + Note: coherent DSP with SD-FEC enables operation at very high pre-FEC BER + +Optical specs: + Tx launch power: ~0 dBm to +3 dBm (depends on span OSNR budget) + Receiver: coherent intradyne receiver with LO laser; Rx sensitivity down to ~−25 dBm OSNR-limited + CD tolerance: ≥ 40,000 ps/nm (electronic dispersion compensation) + PMD tolerance: ≥ 30 ps mean DGD + OSNR required: ~21 dB at 0.1 nm (Nyquist-limited, per vendor DSP quality) + +Management: CMIS 5.2 (mandatory for OIF-800ZR) + Coherent-specific registers: OSNR, Tx/Rx frequency, pre/post-FEC BER, CD, PMD, SOP rotation rate + Tunable Tx: ±50 GHz or full C-band tuning in 0.01 GHz steps (vendor dependent) + +Power: 20–24 W typical (OSFP with full coherent DSP + TEC); highest power class in pluggable ecosystem. +Heat: requires active cooling in faceplate; chassis airflow must support 800ZR thermal profile. + +DCO license: REQUIRED per 100G capacity on most vendor platforms. Billed at ~$400–600/100G NRE (Cisco, Juniper, Nokia). Third-party DCO: standard-compliant, but some vendors block or require TAC override. + +vs. 400G-ZR (OIF-400ZR-02.0): + 400G-ZR: DP-16QAM at 60 GBd → 400 Gbps, same reach + 800G-ZR: DP-16QAM at 96 GBd → 800 Gbps (50% higher baud rate, requires higher OSNR) + Deployment: 800ZR runs on same fiber plant as 400ZR but may require higher-gain amplifiers or lower-noise EDFA for same span count + +Market: first 800ZR samples appeared late 2023; volume deployments in 2024–2025 for hyperscale DCI and SP long-haul. Key vendors: Cisco, Acacia, Ciena, Coherent Corp., Lumentum, HG Genuine.' +WHERE name = '800G-ZR (OIF)'; + +-- ============================================================ +-- Update search vectors for 800G standards +-- ============================================================ +UPDATE standards SET notes = notes +WHERE speed_gbps = 800; + +-- ============================================================ +-- Report +-- ============================================================ +SELECT name, ieee_reference, speed_gbps, lanes, modulation, max_reach_label, connector +FROM standards +WHERE speed_gbps >= 800 +ORDER BY name;