Rene Fichtmueller 48218a553d feat: nightly scraper window 00-08 + NAS Fearghas sync + procurement demo data
- All scrapers now run nightly 00:00-08:00 (staggered, every day)
- NAS sync module: rsync JSON exports + weekly pg_dump to Fearghas via WireGuard
- 07:45 daily: price_observations, switches, transceivers, signals, issues exported as JSON
- Migration 021: 200 ABC classifications, 150 reorder signals, 300 stock snapshots demo data
- 9 market intelligence entries (LightReading, FierceTelecom, Farnell, Mouser, EU TED, Arista)
- 6 lifecycle events (ZR, 800G OSFP, 100G DR4 price floor, SFP-10G-SR EOL)
2026-04-01 23:07:26 +02:00

288 lines
13 KiB
TypeScript

/**
* NAS Sync — Fearghas
*
* Exports TIP data nightly to the home NAS "Fearghas" via rsync over SSH.
* Erik reaches Fearghas through the WireGuard tunnel (192.168.178.x).
*
* Required environment variables:
* NAS_HOST = 192.168.178.50 (Fearghas IP on home network)
* NAS_USER = tip (NAS user with write access to share)
* NAS_SSH_KEY = /root/.ssh/id_ed25519 (SSH key for rsync)
* NAS_PATH = /volume1/tip-data (Synology: /volume1/<share>)
*
* Directory layout on NAS:
* /volume1/tip-data/
* exports/
* YYYY-MM-DD/
* price_observations.json
* switches.json
* transceivers.json
* market_intelligence.json
* reorder_signals.json
* product_issues.json
* changelog.md
* raw-cache/ ← raw HTML from Crawler LLM (future)
* db-backups/ ← pg_dump nightly
* YYYY-MM-DD.sql.gz
*/
import { exec } from "child_process";
import { promisify } from "util";
import { writeFile, mkdir, rm } from "fs/promises";
import { join } from "path";
import { tmpdir } from "os";
import { db as pool } from "./db";
import { logger } from "./logger";
const execAsync = promisify(exec);
const NAS_HOST = process.env.NAS_HOST || "192.168.178.50";
const NAS_USER = process.env.NAS_USER || "tip";
const NAS_SSH_KEY = process.env.NAS_SSH_KEY || "/root/.ssh/id_ed25519";
const NAS_PATH = process.env.NAS_PATH || "/volume1/tip-data";
const DB_HOST = process.env.DB_HOST || "localhost";
const DB_PORT = process.env.DB_PORT || "5433";
const DB_USER = process.env.DB_USER || "tip";
const DB_NAME = process.env.DB_NAME || "transceiver_db";
const DB_PASS = process.env.DB_PASS || process.env.PGPASSWORD || "";
// ─────────────────────────────────────────────────────────────────────────────
// Check NAS reachability via WireGuard
// ─────────────────────────────────────────────────────────────────────────────
export async function checkNasReachable(): Promise<boolean> {
try {
await execAsync(`ssh -i ${NAS_SSH_KEY} -o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=no ${NAS_USER}@${NAS_HOST} "echo ok"`, { timeout: 8000 });
return true;
} catch {
return false;
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Export all TIP data tables to JSON files
// ─────────────────────────────────────────────────────────────────────────────
async function exportDataToJson(exportDir: string, date: string): Promise<string[]> {
const files: string[] = [];
const exports: Array<{ name: string; query: string }> = [
{
name: "price_observations",
query: `SELECT po.time, t.slug, t.form_factor, t.speed_gbps, v.name AS vendor,
po.price, po.currency, po.condition, po.marketplace, po.stock_level, po.url
FROM price_observations po
JOIN transceivers t ON po.transceiver_id = t.id
JOIN vendors v ON po.source_vendor_id = v.id
WHERE po.time > NOW() - INTERVAL '24 hours'
ORDER BY po.time DESC`,
},
{
name: "transceivers",
query: `SELECT t.slug, t.form_factor, t.speed, t.speed_gbps, t.reach_label,
t.fiber_type, t.wavelengths, t.connector, t.power_consumption_w,
v.name AS vendor, v.type AS vendor_type,
t.price_verified_eur, t.fully_verified,
t.image_url, t.product_page_url, t.part_number,
t.created_at, t.updated_at
FROM transceivers t JOIN vendors v ON t.vendor_id = v.id
ORDER BY t.speed_gbps DESC, t.form_factor, t.reach_label`,
},
{
name: "switches",
query: `SELECT sw.model, sw.series, sw.category, sw.layer,
v.name AS vendor,
sw.max_speed_gbps, sw.total_ports, sw.ports_config,
sw.switching_capacity_tbps, sw.macsec_support,
sw.vxlan_support, sw.evpn_support, sw.bgp_support,
sw.lifecycle_status, sw.features, sw.description,
sw.ebay_refurb_price_usd, sw.image_url, sw.product_page_url
FROM switches sw JOIN vendors v ON sw.vendor_id = v.id
ORDER BY sw.max_speed_gbps DESC, sw.model`,
},
{
name: "market_intelligence",
query: `SELECT source_name, source_url, title, description, category,
impact_level, buy_signal_effect, confidence, published_date, created_at
FROM market_intelligence
ORDER BY created_at DESC
LIMIT 500`,
},
{
name: "reorder_signals",
query: `SELECT t.slug, t.form_factor, t.speed_gbps,
rs.signal, rs.signal_strength, rs.reasons,
rs.current_asp_eur, rs.price_trend_30d,
rs.computed_at
FROM reorder_signals rs
JOIN transceivers t ON rs.transceiver_id = t.id
ORDER BY rs.signal_strength DESC`,
},
{
name: "product_issues",
query: `SELECT pi.product_model, pi.source_type, pi.source_name, pi.source_url,
pi.title, pi.summary, pi.severity, pi.issue_tags,
pi.affected_firmware, pi.fix_firmware,
pi.date_reported, pi.is_resolved
FROM product_issues pi
ORDER BY pi.severity, pi.date_reported DESC`,
},
{
name: "abc_classification",
query: `SELECT t.slug, t.form_factor, t.speed_gbps,
abc.abc_class, abc.score, abc.obs_90d,
abc.compat_count, abc.vendor_count, abc.classified_at
FROM abc_classification abc
JOIN transceivers t ON abc.transceiver_id = t.id
ORDER BY abc.score DESC`,
},
{
name: "competitor_alerts",
query: `SELECT ca.alert_type, ca.severity,
v.name AS vendor, ca.product_name, ca.form_factor, ca.speed_gbps,
ca.old_price, ca.new_price, ca.currency, ca.price_delta_pct,
ca.url, ca.detected_at
FROM competitor_alerts ca
JOIN vendors v ON ca.vendor_id = v.id
WHERE ca.detected_at > NOW() - INTERVAL '7 days'
ORDER BY ca.detected_at DESC`,
},
];
for (const exp of exports) {
try {
const result = await pool.query(exp.query);
const filePath = join(exportDir, `${exp.name}.json`);
await writeFile(filePath, JSON.stringify({
exported_at: new Date().toISOString(),
date,
table: exp.name,
count: result.rows.length,
data: result.rows,
}, null, 2));
files.push(filePath);
logger.info(`Exported ${result.rows.length} rows → ${exp.name}.json`);
} catch (err) {
logger.warn(`Export failed for ${exp.name}`, { err });
}
}
// Also export changelog
const changelogSrc = join(process.cwd(), "..", "..", "CHANGELOG_PENDING.md");
try {
const { readFile } = await import("fs/promises");
const changelog = await readFile(changelogSrc, "utf-8");
const clPath = join(exportDir, "changelog.md");
await writeFile(clPath, changelog);
files.push(clPath);
} catch {
// Changelog file may not exist in this working directory
}
return files;
}
// ─────────────────────────────────────────────────────────────────────────────
// DB Backup via pg_dump
// ─────────────────────────────────────────────────────────────────────────────
async function createDbBackup(backupDir: string, date: string): Promise<string | null> {
const backupFile = join(backupDir, `transceiver_db_${date}.sql.gz`);
const cmd = `PGPASSWORD="${DB_PASS}" pg_dump -h ${DB_HOST} -p ${DB_PORT} -U ${DB_USER} ${DB_NAME} | gzip -9 > ${backupFile}`;
try {
await execAsync(cmd, { timeout: 300000 }); // 5 min timeout
logger.info(`DB backup created: ${backupFile}`);
return backupFile;
} catch (err) {
logger.error("DB backup failed", { err });
return null;
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Sync local export dir to NAS via rsync over SSH
// ─────────────────────────────────────────────────────────────────────────────
async function rsyncToNas(localDir: string, nasSubPath: string): Promise<boolean> {
const rsyncCmd = [
"rsync",
"-avz",
"--timeout=60",
`--rsh="ssh -i ${NAS_SSH_KEY} -o StrictHostKeyChecking=no -o ConnectTimeout=15"`,
`${localDir}/`,
`${NAS_USER}@${NAS_HOST}:${NAS_PATH}/${nasSubPath}/`,
].join(" ");
try {
const { stdout } = await execAsync(rsyncCmd, { timeout: 120000 });
logger.info("rsync to NAS complete", { lines: stdout.split("\n").length });
return true;
} catch (err) {
logger.error("rsync to NAS failed", { err });
return false;
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Main export + sync job — called nightly at 07:30
// ─────────────────────────────────────────────────────────────────────────────
export async function runNightlyNasSync(): Promise<void> {
const date = new Date().toISOString().split("T")[0]!; // YYYY-MM-DD
const tmpBase = join(tmpdir(), `tip-nas-export-${date}`);
logger.info(`NAS sync starting for ${date}`);
// Check NAS is reachable
const reachable = await checkNasReachable();
if (!reachable) {
logger.warn(`NAS ${NAS_HOST} not reachable via WireGuard — skipping sync`);
return;
}
try {
// 1. Create temp dirs
const exportDir = join(tmpBase, "exports", date);
const backupDir = join(tmpBase, "db-backups");
await mkdir(exportDir, { recursive: true });
await mkdir(backupDir, { recursive: true });
// 2. Export data tables
await exportDataToJson(exportDir, date);
// 3. DB backup (every Sunday + 1st of month)
const now = new Date();
const isBackupDay = now.getDay() === 0 || now.getDate() === 1;
if (isBackupDay) {
await createDbBackup(backupDir, date);
}
// 4. Sync exports to NAS
await rsyncToNas(join(tmpBase, "exports"), "exports");
// 5. Sync backups if created
if (isBackupDay) {
await rsyncToNas(backupDir, "db-backups");
}
logger.info(`NAS sync complete for ${date}`);
} finally {
// Cleanup temp files
await rm(tmpBase, { recursive: true, force: true }).catch(() => {});
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Setup NAS directory structure (run once)
// ─────────────────────────────────────────────────────────────────────────────
export async function setupNasDirectories(): Promise<void> {
const dirs = ["exports", "db-backups", "raw-cache", "logs"];
for (const dir of dirs) {
try {
await execAsync(
`ssh -i ${NAS_SSH_KEY} -o StrictHostKeyChecking=no ${NAS_USER}@${NAS_HOST} "mkdir -p ${NAS_PATH}/${dir}"`,
{ timeout: 15000 }
);
logger.info(`NAS dir created: ${NAS_PATH}/${dir}`);
} catch (err) {
logger.warn(`Failed to create NAS dir ${dir}`, { err });
}
}
}