- All scrapers now run nightly 00:00-08:00 (staggered, every day) - NAS sync module: rsync JSON exports + weekly pg_dump to Fearghas via WireGuard - 07:45 daily: price_observations, switches, transceivers, signals, issues exported as JSON - Migration 021: 200 ABC classifications, 150 reorder signals, 300 stock snapshots demo data - 9 market intelligence entries (LightReading, FierceTelecom, Farnell, Mouser, EU TED, Arista) - 6 lifecycle events (ZR, 800G OSFP, 100G DR4 price floor, SFP-10G-SR EOL)
288 lines
13 KiB
TypeScript
288 lines
13 KiB
TypeScript
/**
|
|
* NAS Sync — Fearghas
|
|
*
|
|
* Exports TIP data nightly to the home NAS "Fearghas" via rsync over SSH.
|
|
* Erik reaches Fearghas through the WireGuard tunnel (192.168.178.x).
|
|
*
|
|
* Required environment variables:
|
|
* NAS_HOST = 192.168.178.50 (Fearghas IP on home network)
|
|
* NAS_USER = tip (NAS user with write access to share)
|
|
* NAS_SSH_KEY = /root/.ssh/id_ed25519 (SSH key for rsync)
|
|
* NAS_PATH = /volume1/tip-data (Synology: /volume1/<share>)
|
|
*
|
|
* Directory layout on NAS:
|
|
* /volume1/tip-data/
|
|
* exports/
|
|
* YYYY-MM-DD/
|
|
* price_observations.json
|
|
* switches.json
|
|
* transceivers.json
|
|
* market_intelligence.json
|
|
* reorder_signals.json
|
|
* product_issues.json
|
|
* changelog.md
|
|
* raw-cache/ ← raw HTML from Crawler LLM (future)
|
|
* db-backups/ ← pg_dump nightly
|
|
* YYYY-MM-DD.sql.gz
|
|
*/
|
|
|
|
import { exec } from "child_process";
|
|
import { promisify } from "util";
|
|
import { writeFile, mkdir, rm } from "fs/promises";
|
|
import { join } from "path";
|
|
import { tmpdir } from "os";
|
|
import { db as pool } from "./db";
|
|
import { logger } from "./logger";
|
|
|
|
const execAsync = promisify(exec);
|
|
|
|
const NAS_HOST = process.env.NAS_HOST || "192.168.178.50";
|
|
const NAS_USER = process.env.NAS_USER || "tip";
|
|
const NAS_SSH_KEY = process.env.NAS_SSH_KEY || "/root/.ssh/id_ed25519";
|
|
const NAS_PATH = process.env.NAS_PATH || "/volume1/tip-data";
|
|
const DB_HOST = process.env.DB_HOST || "localhost";
|
|
const DB_PORT = process.env.DB_PORT || "5433";
|
|
const DB_USER = process.env.DB_USER || "tip";
|
|
const DB_NAME = process.env.DB_NAME || "transceiver_db";
|
|
const DB_PASS = process.env.DB_PASS || process.env.PGPASSWORD || "";
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Check NAS reachability via WireGuard
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
export async function checkNasReachable(): Promise<boolean> {
|
|
try {
|
|
await execAsync(`ssh -i ${NAS_SSH_KEY} -o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=no ${NAS_USER}@${NAS_HOST} "echo ok"`, { timeout: 8000 });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Export all TIP data tables to JSON files
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
async function exportDataToJson(exportDir: string, date: string): Promise<string[]> {
|
|
const files: string[] = [];
|
|
|
|
const exports: Array<{ name: string; query: string }> = [
|
|
{
|
|
name: "price_observations",
|
|
query: `SELECT po.time, t.slug, t.form_factor, t.speed_gbps, v.name AS vendor,
|
|
po.price, po.currency, po.condition, po.marketplace, po.stock_level, po.url
|
|
FROM price_observations po
|
|
JOIN transceivers t ON po.transceiver_id = t.id
|
|
JOIN vendors v ON po.source_vendor_id = v.id
|
|
WHERE po.time > NOW() - INTERVAL '24 hours'
|
|
ORDER BY po.time DESC`,
|
|
},
|
|
{
|
|
name: "transceivers",
|
|
query: `SELECT t.slug, t.form_factor, t.speed, t.speed_gbps, t.reach_label,
|
|
t.fiber_type, t.wavelengths, t.connector, t.power_consumption_w,
|
|
v.name AS vendor, v.type AS vendor_type,
|
|
t.price_verified_eur, t.fully_verified,
|
|
t.image_url, t.product_page_url, t.part_number,
|
|
t.created_at, t.updated_at
|
|
FROM transceivers t JOIN vendors v ON t.vendor_id = v.id
|
|
ORDER BY t.speed_gbps DESC, t.form_factor, t.reach_label`,
|
|
},
|
|
{
|
|
name: "switches",
|
|
query: `SELECT sw.model, sw.series, sw.category, sw.layer,
|
|
v.name AS vendor,
|
|
sw.max_speed_gbps, sw.total_ports, sw.ports_config,
|
|
sw.switching_capacity_tbps, sw.macsec_support,
|
|
sw.vxlan_support, sw.evpn_support, sw.bgp_support,
|
|
sw.lifecycle_status, sw.features, sw.description,
|
|
sw.ebay_refurb_price_usd, sw.image_url, sw.product_page_url
|
|
FROM switches sw JOIN vendors v ON sw.vendor_id = v.id
|
|
ORDER BY sw.max_speed_gbps DESC, sw.model`,
|
|
},
|
|
{
|
|
name: "market_intelligence",
|
|
query: `SELECT source_name, source_url, title, description, category,
|
|
impact_level, buy_signal_effect, confidence, published_date, created_at
|
|
FROM market_intelligence
|
|
ORDER BY created_at DESC
|
|
LIMIT 500`,
|
|
},
|
|
{
|
|
name: "reorder_signals",
|
|
query: `SELECT t.slug, t.form_factor, t.speed_gbps,
|
|
rs.signal, rs.signal_strength, rs.reasons,
|
|
rs.current_asp_eur, rs.price_trend_30d,
|
|
rs.computed_at
|
|
FROM reorder_signals rs
|
|
JOIN transceivers t ON rs.transceiver_id = t.id
|
|
ORDER BY rs.signal_strength DESC`,
|
|
},
|
|
{
|
|
name: "product_issues",
|
|
query: `SELECT pi.product_model, pi.source_type, pi.source_name, pi.source_url,
|
|
pi.title, pi.summary, pi.severity, pi.issue_tags,
|
|
pi.affected_firmware, pi.fix_firmware,
|
|
pi.date_reported, pi.is_resolved
|
|
FROM product_issues pi
|
|
ORDER BY pi.severity, pi.date_reported DESC`,
|
|
},
|
|
{
|
|
name: "abc_classification",
|
|
query: `SELECT t.slug, t.form_factor, t.speed_gbps,
|
|
abc.abc_class, abc.score, abc.obs_90d,
|
|
abc.compat_count, abc.vendor_count, abc.classified_at
|
|
FROM abc_classification abc
|
|
JOIN transceivers t ON abc.transceiver_id = t.id
|
|
ORDER BY abc.score DESC`,
|
|
},
|
|
{
|
|
name: "competitor_alerts",
|
|
query: `SELECT ca.alert_type, ca.severity,
|
|
v.name AS vendor, ca.product_name, ca.form_factor, ca.speed_gbps,
|
|
ca.old_price, ca.new_price, ca.currency, ca.price_delta_pct,
|
|
ca.url, ca.detected_at
|
|
FROM competitor_alerts ca
|
|
JOIN vendors v ON ca.vendor_id = v.id
|
|
WHERE ca.detected_at > NOW() - INTERVAL '7 days'
|
|
ORDER BY ca.detected_at DESC`,
|
|
},
|
|
];
|
|
|
|
for (const exp of exports) {
|
|
try {
|
|
const result = await pool.query(exp.query);
|
|
const filePath = join(exportDir, `${exp.name}.json`);
|
|
await writeFile(filePath, JSON.stringify({
|
|
exported_at: new Date().toISOString(),
|
|
date,
|
|
table: exp.name,
|
|
count: result.rows.length,
|
|
data: result.rows,
|
|
}, null, 2));
|
|
files.push(filePath);
|
|
logger.info(`Exported ${result.rows.length} rows → ${exp.name}.json`);
|
|
} catch (err) {
|
|
logger.warn(`Export failed for ${exp.name}`, { err });
|
|
}
|
|
}
|
|
|
|
// Also export changelog
|
|
const changelogSrc = join(process.cwd(), "..", "..", "CHANGELOG_PENDING.md");
|
|
try {
|
|
const { readFile } = await import("fs/promises");
|
|
const changelog = await readFile(changelogSrc, "utf-8");
|
|
const clPath = join(exportDir, "changelog.md");
|
|
await writeFile(clPath, changelog);
|
|
files.push(clPath);
|
|
} catch {
|
|
// Changelog file may not exist in this working directory
|
|
}
|
|
|
|
return files;
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// DB Backup via pg_dump
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
async function createDbBackup(backupDir: string, date: string): Promise<string | null> {
|
|
const backupFile = join(backupDir, `transceiver_db_${date}.sql.gz`);
|
|
const cmd = `PGPASSWORD="${DB_PASS}" pg_dump -h ${DB_HOST} -p ${DB_PORT} -U ${DB_USER} ${DB_NAME} | gzip -9 > ${backupFile}`;
|
|
|
|
try {
|
|
await execAsync(cmd, { timeout: 300000 }); // 5 min timeout
|
|
logger.info(`DB backup created: ${backupFile}`);
|
|
return backupFile;
|
|
} catch (err) {
|
|
logger.error("DB backup failed", { err });
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Sync local export dir to NAS via rsync over SSH
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
async function rsyncToNas(localDir: string, nasSubPath: string): Promise<boolean> {
|
|
const rsyncCmd = [
|
|
"rsync",
|
|
"-avz",
|
|
"--timeout=60",
|
|
`--rsh="ssh -i ${NAS_SSH_KEY} -o StrictHostKeyChecking=no -o ConnectTimeout=15"`,
|
|
`${localDir}/`,
|
|
`${NAS_USER}@${NAS_HOST}:${NAS_PATH}/${nasSubPath}/`,
|
|
].join(" ");
|
|
|
|
try {
|
|
const { stdout } = await execAsync(rsyncCmd, { timeout: 120000 });
|
|
logger.info("rsync to NAS complete", { lines: stdout.split("\n").length });
|
|
return true;
|
|
} catch (err) {
|
|
logger.error("rsync to NAS failed", { err });
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Main export + sync job — called nightly at 07:30
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
export async function runNightlyNasSync(): Promise<void> {
|
|
const date = new Date().toISOString().split("T")[0]!; // YYYY-MM-DD
|
|
const tmpBase = join(tmpdir(), `tip-nas-export-${date}`);
|
|
|
|
logger.info(`NAS sync starting for ${date}`);
|
|
|
|
// Check NAS is reachable
|
|
const reachable = await checkNasReachable();
|
|
if (!reachable) {
|
|
logger.warn(`NAS ${NAS_HOST} not reachable via WireGuard — skipping sync`);
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// 1. Create temp dirs
|
|
const exportDir = join(tmpBase, "exports", date);
|
|
const backupDir = join(tmpBase, "db-backups");
|
|
await mkdir(exportDir, { recursive: true });
|
|
await mkdir(backupDir, { recursive: true });
|
|
|
|
// 2. Export data tables
|
|
await exportDataToJson(exportDir, date);
|
|
|
|
// 3. DB backup (every Sunday + 1st of month)
|
|
const now = new Date();
|
|
const isBackupDay = now.getDay() === 0 || now.getDate() === 1;
|
|
if (isBackupDay) {
|
|
await createDbBackup(backupDir, date);
|
|
}
|
|
|
|
// 4. Sync exports to NAS
|
|
await rsyncToNas(join(tmpBase, "exports"), "exports");
|
|
|
|
// 5. Sync backups if created
|
|
if (isBackupDay) {
|
|
await rsyncToNas(backupDir, "db-backups");
|
|
}
|
|
|
|
logger.info(`NAS sync complete for ${date}`);
|
|
} finally {
|
|
// Cleanup temp files
|
|
await rm(tmpBase, { recursive: true, force: true }).catch(() => {});
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Setup NAS directory structure (run once)
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
export async function setupNasDirectories(): Promise<void> {
|
|
const dirs = ["exports", "db-backups", "raw-cache", "logs"];
|
|
for (const dir of dirs) {
|
|
try {
|
|
await execAsync(
|
|
`ssh -i ${NAS_SSH_KEY} -o StrictHostKeyChecking=no ${NAS_USER}@${NAS_HOST} "mkdir -p ${NAS_PATH}/${dir}"`,
|
|
{ timeout: 15000 }
|
|
);
|
|
logger.info(`NAS dir created: ${NAS_PATH}/${dir}`);
|
|
} catch (err) {
|
|
logger.warn(`Failed to create NAS dir ${dir}`, { err });
|
|
}
|
|
}
|
|
}
|