Phase 0 - Foundation: - Restructure into npm workspace monorepo (packages/core, api, scraper) - PostgreSQL 17 + TimescaleDB schema (15 tables incl. hypertables) - Docker Compose for local dev (PostgreSQL on 5433 + Qdrant) - Express 5 API on port 3200 with 6 routes - Seed script to migrate 159 transceivers + 42 standards from npm package - Erik server setup script + PM2 ecosystem config Phase 1 - Scraper Engine: - Crawlee + Playwright framework with pg-boss scheduler - FS.com scraper (PlaywrightCrawler, anti-bot workaround) - Optcore.net scraper (WP REST API enumeration + PlaywrightCrawler) - Uses /wp-json/wp/v2/product to get 2000+ product URLs - Playwright renders individual product pages for price extraction - Cisco TMG Matrix scraper (compatibility data) - News RSS aggregator (optics.org, SPIE, Network World, Nature Photonics) - Keyword relevance scoring for transceiver/fiber topics - xml2js with malformed XML sanitization - SHA-256 content hashing for change detection (skip unchanged records) - pg-boss v10 with explicit queue creation before scheduling
74 lines
1.9 KiB
Bash
Executable File
74 lines
1.9 KiB
Bash
Executable File
#!/bin/bash
|
|
# TIP: Setup script for Erik server (.82)
|
|
# Run as root or with sudo
|
|
|
|
set -euo pipefail
|
|
|
|
echo "=== TIP: Erik Server Setup ==="
|
|
echo ""
|
|
|
|
# 1. PostgreSQL 17 + TimescaleDB
|
|
echo "--- Installing PostgreSQL 17 + TimescaleDB ---"
|
|
apt-get update
|
|
apt-get install -y gnupg2 lsb-release
|
|
|
|
# TimescaleDB repo (includes PostgreSQL 17)
|
|
echo "deb https://packagecloud.io/timescale/timescaledb/debian/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/timescaledb.list
|
|
curl -fsSL https://packagecloud.io/timescale/timescaledb/gpgkey | gpg --dearmor -o /etc/apt/trusted.gpg.d/timescaledb.gpg
|
|
|
|
apt-get update
|
|
apt-get install -y timescaledb-2-postgresql-17
|
|
|
|
# Enable TimescaleDB
|
|
timescaledb-tune --quiet --yes
|
|
|
|
# pgvector
|
|
apt-get install -y postgresql-17-pgvector
|
|
|
|
systemctl restart postgresql
|
|
systemctl enable postgresql
|
|
|
|
# Create DB and user
|
|
sudo -u postgres psql <<SQL
|
|
CREATE USER tip WITH PASSWORD '${POSTGRES_PASSWORD:-tip_prod_2026}';
|
|
CREATE DATABASE transceiver_db OWNER tip;
|
|
GRANT ALL PRIVILEGES ON DATABASE transceiver_db TO tip;
|
|
\c transceiver_db
|
|
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
SQL
|
|
|
|
echo "PostgreSQL 17 + TimescaleDB + pgvector ready."
|
|
|
|
# 2. Qdrant
|
|
echo ""
|
|
echo "--- Installing Qdrant (Docker) ---"
|
|
docker pull qdrant/qdrant:latest
|
|
docker run -d \
|
|
--name tip-qdrant \
|
|
--restart unless-stopped \
|
|
-p 6333:6333 \
|
|
-p 6334:6334 \
|
|
-v /opt/tip/qdrant:/qdrant/storage \
|
|
qdrant/qdrant:latest
|
|
|
|
echo "Qdrant running on port 6333."
|
|
|
|
# 3. App directory
|
|
echo ""
|
|
echo "--- Setting up app directory ---"
|
|
mkdir -p /opt/tip
|
|
cd /opt/tip
|
|
|
|
echo "=== Setup complete ==="
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo " 1. Clone repo: git clone <repo> /opt/tip/app"
|
|
echo " 2. cd /opt/tip/app && npm install"
|
|
echo " 3. cp .env.example .env && edit .env"
|
|
echo " 4. npm run migrate"
|
|
echo " 5. npm run seed"
|
|
echo " 6. pm2 start ecosystem.config.js"
|