#!/bin/bash # ───────────────────────────────────────────────────────────────────────────── # TIP Pi Scraper Setup — run this ONCE on each Raspberry Pi # # Usage (from the Pi itself or via SSH once you have access): # curl -sL https://gitea.context-x.org/rene/transceiver-db/raw/branch/main/scripts/pi-scraper-setup.sh | bash # # Or copy & run manually: # bash pi-scraper-setup.sh # # What this does: # 1. Installs Node.js 22 + tsx + pm2 # 2. Clones the TIP scraper package # 3. Installs dependencies (no Playwright — Pi runs fetch-only scrapers) # 4. Creates .env pointing to Erik's PostgreSQL via WireGuard # 5. Starts pm2 with the Pi-specific scheduler (lightweight scrapers only) # ───────────────────────────────────────────────────────────────────────────── set -e PI_NAME="${PI_NAME:-pi-scraper}" # override with PI_NAME=pi2 bash setup.sh DB_HOST="${DB_HOST:-10.10.0.1}" # Erik WireGuard IP DB_PORT="${DB_PORT:-5433}" DB_USER="${DB_USER:-tip}" DB_PASS="${DB_PASS:-tip_prod_2026}" DB_NAME="${DB_NAME:-transceiver_db}" GITEA="http://192.168.178.196:3000/rene/transceiver-db.git" INSTALL_DIR="/opt/tip-scraper" echo "=== TIP Pi Scraper Setup: $PI_NAME ===" # ── 1. Node.js 22 ──────────────────────────────────────────────────────────── if ! command -v node &>/dev/null || [[ "$(node --version)" < "v20" ]]; then echo "Installing Node.js 22..." curl -fsSL https://deb.nodesource.com/setup_22.x | sudo bash - sudo apt-get install -y nodejs fi echo "Node: $(node --version)" # ── 2. Global tools ─────────────────────────────────────────────────────────── sudo npm install -g tsx pm2 2>/dev/null || npm install -g tsx pm2 pm2 startup systemd -u "$USER" --hp "$HOME" | tail -1 | sudo bash || true # ── 3. Clone / update repo ─────────────────────────────────────────────────── if [ -d "$INSTALL_DIR" ]; then echo "Updating existing repo..." cd "$INSTALL_DIR" && git pull else echo "Cloning from Gitea..." git clone "$GITEA" "$INSTALL_DIR" fi cd "$INSTALL_DIR" # ── 4. Install deps (scraper package only, skip Playwright) ────────────────── cd packages/scraper npm install --ignore-scripts # --ignore-scripts skips playwright browser download echo "Dependencies installed" # ── 5. .env file ───────────────────────────────────────────────────────────── cat > "$INSTALL_DIR/.env" < "$INSTALL_DIR/packages/scraper/src/index-pi.ts" <<'PIEOF' /** * Pi Scraper Index — lightweight fetch/cheerio only * No Playwright, no eBay enricher, no heavy compute * Runs 24/7 on Raspberry Pi nodes */ import { config } from "dotenv"; import { join } from "path"; config({ path: join(__dirname, "..", "..", "..", ".env") }); import PgBoss from "pg-boss"; const connectionString = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB}`; const PI_QUEUES = [ "scrape:pricing:fluxlight", "scrape:pricing:gbics", "scrape:pricing:optcore", "scrape:pricing:champion-one", "scrape:pricing:sfpcables", "scrape:pricing:blueoptics", "scrape:pricing:fiber24", "scrape:pricing:tscom", "scrape:pricing:skylane", "scrape:pricing:ascentoptics", "scrape:pricing:gaotek", "scrape:catalog:smartoptics", "scrape:catalog:hubersuhner", "scrape:news", "scrape:market-intel", ]; async function main() { console.log(`\n=== TIP Pi Scraper (${process.env.PI_NAME || "pi"}) ===\n`); const boss = new PgBoss({ connectionString, retryLimit: 2, retryDelay: 60, expireInSeconds: 3600, monitorStateIntervalSeconds: 60, }); boss.on("error", (e) => console.error("pg-boss error:", e)); await boss.start(); for (const q of PI_QUEUES) { await boss.createQueue(q).catch(() => {}); } // Register workers for all Pi-safe scrapers const mods: Record = { "scrape:pricing:fluxlight": "./scrapers/fluxlight", "scrape:pricing:gbics": "./scrapers/gbics", "scrape:pricing:optcore": "./scrapers/optcore", "scrape:pricing:champion-one": "./scrapers/champion-one", "scrape:pricing:sfpcables": "./scrapers/sfpcables", "scrape:pricing:blueoptics": "./scrapers/blueoptics", "scrape:pricing:fiber24": "./scrapers/fiber24", "scrape:pricing:tscom": "./scrapers/tscom", "scrape:pricing:skylane": "./scrapers/skylane", "scrape:pricing:ascentoptics": "./scrapers/ascentoptics", "scrape:pricing:gaotek": "./scrapers/gaotek", "scrape:catalog:smartoptics": "./scrapers/smartoptics", "scrape:catalog:hubersuhner": "./scrapers/hubersuhner", "scrape:news": "./scrapers/news", }; const fnNames: Record = { "scrape:pricing:fluxlight": "scrapeFluxlight", "scrape:pricing:gbics": "scrapeGbics", "scrape:pricing:optcore": "scrapeOptcore", "scrape:pricing:champion-one": "scrapeChampionOne", "scrape:pricing:sfpcables": "scrapeSfpCables", "scrape:pricing:blueoptics": "scrapeBlueOptics", "scrape:pricing:fiber24": "scrapeFiber24", "scrape:pricing:tscom": "scrapeTsCom", "scrape:pricing:skylane": "scrapeSkylane", "scrape:pricing:ascentoptics": "scrapeAscentOptics", "scrape:pricing:gaotek": "scrapeGaoTek", "scrape:catalog:smartoptics": "scrapeSmartOptics", "scrape:catalog:hubersuhner": "scrapeHuberSuhner", "scrape:news": "scrapeNews", }; for (const [queue, modPath] of Object.entries(mods)) { const mod = await import(modPath); const fn = mod[fnNames[queue]]; if (!fn) { console.warn(`No function ${fnNames[queue]} in ${modPath}`); continue; } await boss.work(queue, async () => { console.log(`[${new Date().toISOString()}] [${process.env.PI_NAME || "pi"}] Running: ${queue}`); try { await fn(); } catch (e) { console.error(`[${queue}] failed:`, String(e).slice(0, 200)); } }); } // Market intel worker await boss.work("scrape:market-intel", async () => { console.log(`[${new Date().toISOString()}] Running: Market intelligence`); const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence"); try { await scrapeMarketIntelligence(); } catch (e) { console.error("market-intel failed:", String(e).slice(0, 200)); } }); console.log(`Pi worker registered for ${PI_QUEUES.length} queues\nWaiting for jobs...\n`); process.on("SIGTERM", async () => { await boss.stop(); process.exit(0); }); process.on("SIGINT", async () => { await boss.stop(); process.exit(0); }); } main().catch((e) => { console.error("Fatal:", e); process.exit(1); }); PIEOF # ── 7. WireGuard (connects to Erik 10.10.0.1 for DB access) ───────────────── WG_PRIVKEY="${WG_PRIVKEY:-}" ERIK_PUBKEY="nrh8xiPzUWwLDK4y6+Cu0V3ne56zobIHKtxMGb7BKQo=" ERIK_ENDPOINT="82.165.222.127:51820" WG_ADDR="${WG_ADDR:-10.10.0.9}" # override per Pi: WG_ADDR=10.10.0.6 if [ -n "$WG_PRIVKEY" ]; then sudo apt-get install -y wireguard-tools 2>/dev/null | tail -1 || true # Detect primary outgoing interface OUTIF=$(ip route get 8.8.8.8 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1) POSTUPCMD="" if [ -n "$OUTIF" ] && ! ping -c1 -W2 8.8.8.8 &>/dev/null; then # Fallback route for WG traffic if default interface has no internet GW=$(ip route | awk '/default/{print $3; exit}') POSTUPCMD="PostUp = ip route add $ERIK_ENDPOINT via $GW dev $OUTIF 2>/dev/null || true" fi cat > /tmp/wg0.conf </dev/null || true sudo wg-quick up wg0 sudo systemctl enable wg-quick@wg0 echo "WireGuard: $(sudo wg show wg0 | grep 'latest handshake' || echo 'starting...')" else echo "WireGuard: skipped (set WG_PRIVKEY and WG_ADDR to enable)" fi # ── 8. PM2 process ─────────────────────────────────────────────────────────── cd "$INSTALL_DIR" PI_NAME="$PI_NAME" pm2 start \ --name "tip-pi-scraper" \ --interpreter "$(which tsx)" \ --cwd "$INSTALL_DIR" \ packages/scraper/src/index-pi.ts \ -- \ || pm2 restart tip-pi-scraper pm2 save # ── 9. Optional: SOCKS5 Proxy Agent (Starlink bandwidth contribution) ──────── # Allows Erik scraper to route requests THROUGH this Pi's internet connection. # Especially useful when Pi is on Starlink: different IP range, bypasses IONOS # IP-based rate limiting on target vendor sites. # # Starlink notes: # - CG-NAT: cannot accept direct incoming TCP from internet # - WireGuard tunnel already bypasses this (Pi connects OUT to Erik) # - SOCKS5 listens on WireGuard IP (10.10.0.x), not public interface # - Erik routes selected scraper jobs through: ALL_PROXY=socks5://10.10.0.x:1080 # # To enable: run with PROXY_AGENT=1 WG_ADDR=10.10.0.6 bash pi-scraper-setup.sh PROXY_AGENT="${PROXY_AGENT:-0}" PROXY_PORT="${PROXY_PORT:-1080}" if [ "$PROXY_AGENT" = "1" ] && [ -n "$WG_PRIVKEY" ]; then echo "" echo "── Installing SOCKS5 Proxy Agent ────────────────────────────────────" # Install dante-server (lightweight SOCKS5 for Linux) sudo apt-get install -y dante-server 2>/dev/null | tail -1 || true WG_IP=$(ip addr show wg0 2>/dev/null | awk '/inet /{print $2}' | cut -d/ -f1) if [ -z "$WG_IP" ]; then WG_IP="$WG_ADDR" fi # Detect Starlink interface (usually eth0 or wlan0 — the WAN interface) OUTIF=$(ip route get 8.8.8.8 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1) cat > /tmp/danted.conf << DANTEEOF logoutput: syslog internal: $WG_IP port = $PROXY_PORT external: $OUTIF socksmethod: none clientmethod: none client pass { from: 10.10.0.0/24 to: 0.0.0.0/0 log: error } socks pass { from: 10.10.0.0/24 to: 0.0.0.0/0 protocol: tcp log: error } DANTEEOF sudo mv /tmp/danted.conf /etc/danted.conf sudo systemctl enable danted sudo systemctl restart danted echo "SOCKS5 proxy listening on $WG_IP:$PROXY_PORT (WireGuard-only, no public exposure)" echo "Use from Erik: ALL_PROXY=socks5://$WG_IP:$PROXY_PORT curl https://example.com" echo "Starlink interface: $OUTIF" echo "" echo "To use for scraper jobs, set in Erik ecosystem.config.js:" echo " ALL_PROXY: 'socks5://$WG_IP:$PROXY_PORT' # for jobs that need Starlink IP" fi echo "" echo "✅ TIP Pi Scraper ($PI_NAME) is running" echo " pm2 logs tip-pi-scraper — view logs" echo " pm2 status — check status" echo "" echo "DB target: $DB_HOST:$DB_PORT/$DB_NAME" echo "Jobs: ${#PI_QUEUES[@]} lightweight scrapers, all day every day" if [ "$PROXY_AGENT" = "1" ]; then echo "SOCKS5 proxy: socks5://$WG_ADDR:$PROXY_PORT (Starlink bandwidth via WireGuard)" fi