Adds /tmp/tip-fs-scraper.lock PID file to prevent launchd from running a second instance while the previous one is still active (e.g. 2am schedule fires, runs past 10am when launchd fires again). Without this, concurrent instances caused rmSync(storage-fs-phase1) in one instance to delete the Crawlee storage dir while the other was still using it, resulting in ENOENT crashes.
90 lines
2.9 KiB
Bash
Executable File
90 lines
2.9 KiB
Bash
Executable File
#!/bin/bash
|
|
# FS.com Scraper — Mac-side runner
|
|
# Runs from this Mac (residential IP) so FS.com isn't blocked.
|
|
# Opens SSH tunnel to Erik's DB → runs scraper → closes tunnel.
|
|
#
|
|
# Schedule: launchd at 02:00, 10:00, 18:00 daily
|
|
# Log: ~/Library/Logs/tip-fs-scraper.log
|
|
|
|
set -euo pipefail
|
|
|
|
LOG="$HOME/Library/Logs/tip-fs-scraper.log"
|
|
REPO="/Users/renefichtmueller/Desktop/Claude Code/github-repos/transceiver-db"
|
|
NODE="/opt/homebrew/bin/node"
|
|
NPX="/opt/homebrew/bin/npx"
|
|
TUNNEL_PID_FILE="/tmp/tip-db-tunnel.pid"
|
|
DB_LOCAL_PORT=5433
|
|
|
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG"; }
|
|
|
|
# ── Open SSH tunnel if not already running ────────────────────────────────────
|
|
open_tunnel() {
|
|
if [ -f "$TUNNEL_PID_FILE" ]; then
|
|
PID=$(cat "$TUNNEL_PID_FILE")
|
|
if kill -0 "$PID" 2>/dev/null; then
|
|
log "Tunnel already running (PID $PID)"
|
|
return 0
|
|
fi
|
|
fi
|
|
log "Opening SSH tunnel → Erik PostgreSQL on port $DB_LOCAL_PORT…"
|
|
ssh -N -f -L "${DB_LOCAL_PORT}:localhost:${DB_LOCAL_PORT}" erik
|
|
# -f forks to background, no PID tracking needed — use pkill to close
|
|
log "Tunnel opened"
|
|
sleep 2 # Give the tunnel a moment to establish
|
|
}
|
|
|
|
close_tunnel() {
|
|
log "Closing SSH tunnel…"
|
|
pkill -f "ssh -N -f -L ${DB_LOCAL_PORT}:localhost:${DB_LOCAL_PORT}" 2>/dev/null || true
|
|
rm -f "$TUNNEL_PID_FILE"
|
|
}
|
|
|
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
LOCK_FILE="/tmp/tip-fs-scraper.lock"
|
|
|
|
mkdir -p "$(dirname "$LOG")"
|
|
|
|
# Prevent simultaneous runs (e.g. launchd fires during a still-running instance)
|
|
if [ -f "$LOCK_FILE" ]; then
|
|
LOCK_PID=$(cat "$LOCK_FILE" 2>/dev/null || echo "")
|
|
if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Already running (PID $LOCK_PID) — skipping" >> "$LOG"
|
|
exit 0
|
|
fi
|
|
rm -f "$LOCK_FILE"
|
|
fi
|
|
echo $$ > "$LOCK_FILE"
|
|
trap 'rm -f "$LOCK_FILE"' EXIT
|
|
|
|
log "=== FS.com Mac Scraper starting ==="
|
|
|
|
# Only close tunnel if we opened it (not if one was already running)
|
|
OPENED_TUNNEL=0
|
|
if ! pgrep -f "ssh -N.*${DB_LOCAL_PORT}:localhost" >/dev/null 2>&1; then
|
|
open_tunnel
|
|
OPENED_TUNNEL=1
|
|
trap close_tunnel EXIT
|
|
fi
|
|
|
|
cd "$REPO"
|
|
|
|
export POSTGRES_HOST=localhost
|
|
export POSTGRES_PORT=$DB_LOCAL_PORT
|
|
export POSTGRES_DB=transceiver_db
|
|
export POSTGRES_USER=tip
|
|
export NODE_ENV=production
|
|
|
|
# Load local credentials from ~/.tip/.env (not committed to git).
|
|
# Must contain: export POSTGRES_PASSWORD=<value>
|
|
if [ -f "$HOME/.tip/.env" ]; then
|
|
# shellcheck disable=SC1091
|
|
source "$HOME/.tip/.env"
|
|
else
|
|
log "WARNING: ~/.tip/.env not found — POSTGRES_PASSWORD may be unset"
|
|
fi
|
|
|
|
log "Running fs-com scraper via tsx…"
|
|
"$NPX" tsx packages/scraper/src/scrapers/fs-com.ts 2>&1 | tee -a "$LOG"
|
|
|
|
log "=== FS.com Mac Scraper complete ==="
|