- Migration 033: comprehensive technical update for all 4x 800G standards
- 800GBASE-SR8: full optical specs (Tx OMA 2.3 dBm, Rx sens. -4.6 dBm, KP4 FEC,
MPO-16 APC, CMIS 5.2, ≤16W, 60m OM3/100m OM4, VCSEL 850nm, 53.125 GBd PAM4)
- 800GBASE-DR8: 500m SMF, EML 1310nm, 8x parallel fiber, MPO-12, -9dBm sensitivity
- 800GBASE-LR4: 2km CWDM4 WDM (1270/1290/1310/1330nm), 4x 106.25 GBd PAM4, LC duplex
- 800G-ZR (OIF-800ZR-01.0): DP-16QAM 96 GBd, 1000km EDFA, SD-FEC, 20-24W, DCO license
- Pi scraper: add optional SOCKS5 proxy via dante-server on WireGuard IP
- Enables Starlink bandwidth contribution (PROXY_AGENT=1 flag)
- Scraper routes selected jobs through Pi SOCKS5 for different IP range
310 lines
12 KiB
Bash
310 lines
12 KiB
Bash
#!/bin/bash
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# TIP Pi Scraper Setup — run this ONCE on each Raspberry Pi
|
|
#
|
|
# Usage (from the Pi itself or via SSH once you have access):
|
|
# curl -sL https://gitea.context-x.org/rene/transceiver-db/raw/branch/main/scripts/pi-scraper-setup.sh | bash
|
|
#
|
|
# Or copy & run manually:
|
|
# bash pi-scraper-setup.sh
|
|
#
|
|
# What this does:
|
|
# 1. Installs Node.js 22 + tsx + pm2
|
|
# 2. Clones the TIP scraper package
|
|
# 3. Installs dependencies (no Playwright — Pi runs fetch-only scrapers)
|
|
# 4. Creates .env pointing to Erik's PostgreSQL via WireGuard
|
|
# 5. Starts pm2 with the Pi-specific scheduler (lightweight scrapers only)
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
set -e
|
|
|
|
PI_NAME="${PI_NAME:-pi-scraper}" # override with PI_NAME=pi2 bash setup.sh
|
|
DB_HOST="${DB_HOST:-10.10.0.1}" # Erik WireGuard IP
|
|
DB_PORT="${DB_PORT:-5433}"
|
|
DB_USER="${DB_USER:-tip}"
|
|
DB_PASS="${DB_PASS:-***REDACTED***}"
|
|
DB_NAME="${DB_NAME:-transceiver_db}"
|
|
GITEA="http://192.168.178.196:3000/rene/transceiver-db.git"
|
|
INSTALL_DIR="/opt/tip-scraper"
|
|
|
|
echo "=== TIP Pi Scraper Setup: $PI_NAME ==="
|
|
|
|
# ── 1. Node.js 22 ────────────────────────────────────────────────────────────
|
|
if ! command -v node &>/dev/null || [[ "$(node --version)" < "v20" ]]; then
|
|
echo "Installing Node.js 22..."
|
|
curl -fsSL https://deb.nodesource.com/setup_22.x | sudo bash -
|
|
sudo apt-get install -y nodejs
|
|
fi
|
|
echo "Node: $(node --version)"
|
|
|
|
# ── 2. Global tools ───────────────────────────────────────────────────────────
|
|
sudo npm install -g tsx pm2 2>/dev/null || npm install -g tsx pm2
|
|
pm2 startup systemd -u "$USER" --hp "$HOME" | tail -1 | sudo bash || true
|
|
|
|
# ── 3. Clone / update repo ───────────────────────────────────────────────────
|
|
if [ -d "$INSTALL_DIR" ]; then
|
|
echo "Updating existing repo..."
|
|
cd "$INSTALL_DIR" && git pull
|
|
else
|
|
echo "Cloning from Gitea..."
|
|
git clone "$GITEA" "$INSTALL_DIR"
|
|
fi
|
|
cd "$INSTALL_DIR"
|
|
|
|
# ── 4. Install deps (scraper package only, skip Playwright) ──────────────────
|
|
cd packages/scraper
|
|
npm install --ignore-scripts # --ignore-scripts skips playwright browser download
|
|
echo "Dependencies installed"
|
|
|
|
# ── 5. .env file ─────────────────────────────────────────────────────────────
|
|
cat > "$INSTALL_DIR/.env" <<EOF
|
|
POSTGRES_HOST=$DB_HOST
|
|
POSTGRES_PORT=$DB_PORT
|
|
POSTGRES_USER=$DB_USER
|
|
POSTGRES_PASSWORD=$DB_PASS
|
|
POSTGRES_DB=$DB_NAME
|
|
CRAWLEE_STORAGE_DIR=/tmp/tip-crawlee
|
|
NODE_ENV=production
|
|
PI_NODE=true
|
|
EOF
|
|
echo ".env written"
|
|
|
|
# ── 6. Pi-specific scheduler index ───────────────────────────────────────────
|
|
# The Pi runs only fetch/cheerio scrapers — no Playwright
|
|
cat > "$INSTALL_DIR/packages/scraper/src/index-pi.ts" <<'PIEOF'
|
|
/**
|
|
* Pi Scraper Index — lightweight fetch/cheerio only
|
|
* No Playwright, no eBay enricher, no heavy compute
|
|
* Runs 24/7 on Raspberry Pi nodes
|
|
*/
|
|
import { config } from "dotenv";
|
|
import { join } from "path";
|
|
config({ path: join(__dirname, "..", "..", "..", ".env") });
|
|
|
|
import PgBoss from "pg-boss";
|
|
|
|
const connectionString = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB}`;
|
|
|
|
const PI_QUEUES = [
|
|
"scrape:pricing:fluxlight",
|
|
"scrape:pricing:gbics",
|
|
"scrape:pricing:optcore",
|
|
"scrape:pricing:champion-one",
|
|
"scrape:pricing:sfpcables",
|
|
"scrape:pricing:blueoptics",
|
|
"scrape:pricing:fiber24",
|
|
"scrape:pricing:tscom",
|
|
"scrape:pricing:skylane",
|
|
"scrape:pricing:ascentoptics",
|
|
"scrape:pricing:gaotek",
|
|
"scrape:catalog:smartoptics",
|
|
"scrape:catalog:hubersuhner",
|
|
"scrape:news",
|
|
"scrape:market-intel",
|
|
];
|
|
|
|
async function main() {
|
|
console.log(`\n=== TIP Pi Scraper (${process.env.PI_NAME || "pi"}) ===\n`);
|
|
|
|
const boss = new PgBoss({
|
|
connectionString,
|
|
retryLimit: 2,
|
|
retryDelay: 60,
|
|
expireInSeconds: 3600,
|
|
monitorStateIntervalSeconds: 60,
|
|
});
|
|
|
|
boss.on("error", (e) => console.error("pg-boss error:", e));
|
|
await boss.start();
|
|
|
|
for (const q of PI_QUEUES) {
|
|
await boss.createQueue(q).catch(() => {});
|
|
}
|
|
|
|
// Register workers for all Pi-safe scrapers
|
|
const mods: Record<string, string> = {
|
|
"scrape:pricing:fluxlight": "./scrapers/fluxlight",
|
|
"scrape:pricing:gbics": "./scrapers/gbics",
|
|
"scrape:pricing:optcore": "./scrapers/optcore",
|
|
"scrape:pricing:champion-one": "./scrapers/champion-one",
|
|
"scrape:pricing:sfpcables": "./scrapers/sfpcables",
|
|
"scrape:pricing:blueoptics": "./scrapers/blueoptics",
|
|
"scrape:pricing:fiber24": "./scrapers/fiber24",
|
|
"scrape:pricing:tscom": "./scrapers/tscom",
|
|
"scrape:pricing:skylane": "./scrapers/skylane",
|
|
"scrape:pricing:ascentoptics": "./scrapers/ascentoptics",
|
|
"scrape:pricing:gaotek": "./scrapers/gaotek",
|
|
"scrape:catalog:smartoptics": "./scrapers/smartoptics",
|
|
"scrape:catalog:hubersuhner": "./scrapers/hubersuhner",
|
|
"scrape:news": "./scrapers/news",
|
|
};
|
|
|
|
const fnNames: Record<string, string> = {
|
|
"scrape:pricing:fluxlight": "scrapeFluxlight",
|
|
"scrape:pricing:gbics": "scrapeGbics",
|
|
"scrape:pricing:optcore": "scrapeOptcore",
|
|
"scrape:pricing:champion-one": "scrapeChampionOne",
|
|
"scrape:pricing:sfpcables": "scrapeSfpCables",
|
|
"scrape:pricing:blueoptics": "scrapeBlueOptics",
|
|
"scrape:pricing:fiber24": "scrapeFiber24",
|
|
"scrape:pricing:tscom": "scrapeTsCom",
|
|
"scrape:pricing:skylane": "scrapeSkylane",
|
|
"scrape:pricing:ascentoptics": "scrapeAscentOptics",
|
|
"scrape:pricing:gaotek": "scrapeGaoTek",
|
|
"scrape:catalog:smartoptics": "scrapeSmartOptics",
|
|
"scrape:catalog:hubersuhner": "scrapeHuberSuhner",
|
|
"scrape:news": "scrapeNews",
|
|
};
|
|
|
|
for (const [queue, modPath] of Object.entries(mods)) {
|
|
const mod = await import(modPath);
|
|
const fn = mod[fnNames[queue]];
|
|
if (!fn) { console.warn(`No function ${fnNames[queue]} in ${modPath}`); continue; }
|
|
await boss.work(queue, async () => {
|
|
console.log(`[${new Date().toISOString()}] [${process.env.PI_NAME || "pi"}] Running: ${queue}`);
|
|
try { await fn(); }
|
|
catch (e) { console.error(`[${queue}] failed:`, String(e).slice(0, 200)); }
|
|
});
|
|
}
|
|
|
|
// Market intel worker
|
|
await boss.work("scrape:market-intel", async () => {
|
|
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
|
|
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
|
|
try { await scrapeMarketIntelligence(); }
|
|
catch (e) { console.error("market-intel failed:", String(e).slice(0, 200)); }
|
|
});
|
|
|
|
console.log(`Pi worker registered for ${PI_QUEUES.length} queues\nWaiting for jobs...\n`);
|
|
|
|
process.on("SIGTERM", async () => { await boss.stop(); process.exit(0); });
|
|
process.on("SIGINT", async () => { await boss.stop(); process.exit(0); });
|
|
}
|
|
|
|
main().catch((e) => { console.error("Fatal:", e); process.exit(1); });
|
|
PIEOF
|
|
|
|
# ── 7. WireGuard (connects to Erik 10.10.0.1 for DB access) ─────────────────
|
|
WG_PRIVKEY="${WG_PRIVKEY:-}"
|
|
ERIK_PUBKEY="nrh8xiPzUWwLDK4y6+Cu0V3ne56zobIHKtxMGb7BKQo="
|
|
ERIK_ENDPOINT="82.165.222.127:51820"
|
|
WG_ADDR="${WG_ADDR:-10.10.0.9}" # override per Pi: WG_ADDR=10.10.0.6
|
|
|
|
if [ -n "$WG_PRIVKEY" ]; then
|
|
sudo apt-get install -y wireguard-tools 2>/dev/null | tail -1 || true
|
|
# Detect primary outgoing interface
|
|
OUTIF=$(ip route get 8.8.8.8 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1)
|
|
POSTUPCMD=""
|
|
if [ -n "$OUTIF" ] && ! ping -c1 -W2 8.8.8.8 &>/dev/null; then
|
|
# Fallback route for WG traffic if default interface has no internet
|
|
GW=$(ip route | awk '/default/{print $3; exit}')
|
|
POSTUPCMD="PostUp = ip route add $ERIK_ENDPOINT via $GW dev $OUTIF 2>/dev/null || true"
|
|
fi
|
|
cat > /tmp/wg0.conf <<WGEOF
|
|
[Interface]
|
|
PrivateKey = $WG_PRIVKEY
|
|
Address = $WG_ADDR/24
|
|
$POSTUPCMD
|
|
|
|
[Peer]
|
|
PublicKey = $ERIK_PUBKEY
|
|
Endpoint = $ERIK_ENDPOINT
|
|
AllowedIPs = 10.10.0.1/32
|
|
PersistentKeepalive = 25
|
|
WGEOF
|
|
sudo mv /tmp/wg0.conf /etc/wireguard/wg0.conf
|
|
sudo chmod 600 /etc/wireguard/wg0.conf
|
|
sudo wg-quick down wg0 2>/dev/null || true
|
|
sudo wg-quick up wg0
|
|
sudo systemctl enable wg-quick@wg0
|
|
echo "WireGuard: $(sudo wg show wg0 | grep 'latest handshake' || echo 'starting...')"
|
|
else
|
|
echo "WireGuard: skipped (set WG_PRIVKEY and WG_ADDR to enable)"
|
|
fi
|
|
|
|
# ── 8. PM2 process ───────────────────────────────────────────────────────────
|
|
cd "$INSTALL_DIR"
|
|
PI_NAME="$PI_NAME" pm2 start \
|
|
--name "tip-pi-scraper" \
|
|
--interpreter "$(which tsx)" \
|
|
--cwd "$INSTALL_DIR" \
|
|
packages/scraper/src/index-pi.ts \
|
|
-- \
|
|
|| pm2 restart tip-pi-scraper
|
|
|
|
pm2 save
|
|
|
|
# ── 9. Optional: SOCKS5 Proxy Agent (Starlink bandwidth contribution) ────────
|
|
# Allows Erik scraper to route requests THROUGH this Pi's internet connection.
|
|
# Especially useful when Pi is on Starlink: different IP range, bypasses IONOS
|
|
# IP-based rate limiting on target vendor sites.
|
|
#
|
|
# Starlink notes:
|
|
# - CG-NAT: cannot accept direct incoming TCP from internet
|
|
# - WireGuard tunnel already bypasses this (Pi connects OUT to Erik)
|
|
# - SOCKS5 listens on WireGuard IP (10.10.0.x), not public interface
|
|
# - Erik routes selected scraper jobs through: ALL_PROXY=socks5://10.10.0.x:1080
|
|
#
|
|
# To enable: run with PROXY_AGENT=1 WG_ADDR=10.10.0.6 bash pi-scraper-setup.sh
|
|
PROXY_AGENT="${PROXY_AGENT:-0}"
|
|
PROXY_PORT="${PROXY_PORT:-1080}"
|
|
|
|
if [ "$PROXY_AGENT" = "1" ] && [ -n "$WG_PRIVKEY" ]; then
|
|
echo ""
|
|
echo "── Installing SOCKS5 Proxy Agent ────────────────────────────────────"
|
|
|
|
# Install dante-server (lightweight SOCKS5 for Linux)
|
|
sudo apt-get install -y dante-server 2>/dev/null | tail -1 || true
|
|
|
|
WG_IP=$(ip addr show wg0 2>/dev/null | awk '/inet /{print $2}' | cut -d/ -f1)
|
|
if [ -z "$WG_IP" ]; then
|
|
WG_IP="$WG_ADDR"
|
|
fi
|
|
|
|
# Detect Starlink interface (usually eth0 or wlan0 — the WAN interface)
|
|
OUTIF=$(ip route get 8.8.8.8 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1)
|
|
|
|
cat > /tmp/danted.conf << DANTEEOF
|
|
logoutput: syslog
|
|
|
|
internal: $WG_IP port = $PROXY_PORT
|
|
external: $OUTIF
|
|
|
|
socksmethod: none
|
|
clientmethod: none
|
|
|
|
client pass {
|
|
from: 10.10.0.0/24 to: 0.0.0.0/0
|
|
log: error
|
|
}
|
|
|
|
socks pass {
|
|
from: 10.10.0.0/24 to: 0.0.0.0/0
|
|
protocol: tcp
|
|
log: error
|
|
}
|
|
DANTEEOF
|
|
|
|
sudo mv /tmp/danted.conf /etc/danted.conf
|
|
sudo systemctl enable danted
|
|
sudo systemctl restart danted
|
|
|
|
echo "SOCKS5 proxy listening on $WG_IP:$PROXY_PORT (WireGuard-only, no public exposure)"
|
|
echo "Use from Erik: ALL_PROXY=socks5://$WG_IP:$PROXY_PORT curl https://example.com"
|
|
echo "Starlink interface: $OUTIF"
|
|
echo ""
|
|
echo "To use for scraper jobs, set in Erik ecosystem.config.js:"
|
|
echo " ALL_PROXY: 'socks5://$WG_IP:$PROXY_PORT' # for jobs that need Starlink IP"
|
|
fi
|
|
|
|
echo ""
|
|
echo "✅ TIP Pi Scraper ($PI_NAME) is running"
|
|
echo " pm2 logs tip-pi-scraper — view logs"
|
|
echo " pm2 status — check status"
|
|
echo ""
|
|
echo "DB target: $DB_HOST:$DB_PORT/$DB_NAME"
|
|
echo "Jobs: ${#PI_QUEUES[@]} lightweight scrapers, all day every day"
|
|
if [ "$PROXY_AGENT" = "1" ]; then
|
|
echo "SOCKS5 proxy: socks5://$WG_ADDR:$PROXY_PORT (Starlink bandwidth via WireGuard)"
|
|
fi
|