feat: download datasheets + manuals to Fearghas NAS in nightly sync
- downloadDocuments(): fetches PDFs from product_documents and documents tables
using curl, organises into switches/ transceivers/ whitepapers/ other/ subdirs
- Integrated into runNightlyNasSync() — runs after JSON exports
- rsync incremental — only new/changed files transferred
- NAS dir structure: /volume1/tip-data/datasheets/{switches,transceivers,whitepapers,other}
- max-filesize 50MB guard per file
This commit is contained in:
parent
5abe6397c4
commit
c156e8d9f6
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
import { exec } from "child_process";
|
import { exec } from "child_process";
|
||||||
import { promisify } from "util";
|
import { promisify } from "util";
|
||||||
import { writeFile, mkdir, rm } from "fs/promises";
|
import { writeFile, mkdir, rm, access } from "fs/promises";
|
||||||
import { join } from "path";
|
import { join } from "path";
|
||||||
import { tmpdir } from "os";
|
import { tmpdir } from "os";
|
||||||
import { db as pool } from "./db";
|
import { db as pool } from "./db";
|
||||||
@ -253,10 +253,17 @@ export async function runNightlyNasSync(): Promise<void> {
|
|||||||
await createDbBackup(backupDir, date);
|
await createDbBackup(backupDir, date);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. Sync exports to NAS
|
// 4. Download datasheets / manuals / whitepapers
|
||||||
|
const datasheetDir = join(tmpBase, "datasheets");
|
||||||
|
await downloadDocuments(datasheetDir);
|
||||||
|
|
||||||
|
// 5. Sync exports to NAS
|
||||||
await rsyncToNas(join(tmpBase, "exports"), "exports");
|
await rsyncToNas(join(tmpBase, "exports"), "exports");
|
||||||
|
|
||||||
// 5. Sync backups if created
|
// 6. Sync datasheets to NAS (incremental — only new files)
|
||||||
|
await rsyncToNas(datasheetDir, "datasheets");
|
||||||
|
|
||||||
|
// 7. Sync backups if created
|
||||||
if (isBackupDay) {
|
if (isBackupDay) {
|
||||||
await rsyncToNas(backupDir, "db-backups");
|
await rsyncToNas(backupDir, "db-backups");
|
||||||
}
|
}
|
||||||
@ -268,11 +275,86 @@ export async function runNightlyNasSync(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Download datasheets, manuals & documents to local dir for NAS sync
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
async function downloadDocuments(destDir: string): Promise<number> {
|
||||||
|
await mkdir(destDir, { recursive: true });
|
||||||
|
await mkdir(join(destDir, "switches"), { recursive: true });
|
||||||
|
await mkdir(join(destDir, "transceivers"), { recursive: true });
|
||||||
|
await mkdir(join(destDir, "whitepapers"), { recursive: true });
|
||||||
|
await mkdir(join(destDir, "other"), { recursive: true });
|
||||||
|
|
||||||
|
let downloaded = 0;
|
||||||
|
|
||||||
|
// 1. product_documents (switch + transceiver datasheets, manuals)
|
||||||
|
const pdRows = await pool.query<{
|
||||||
|
id: string; doc_type: string; title: string;
|
||||||
|
url: string; switch_id: string | null; transceiver_id: string | null;
|
||||||
|
}>(`
|
||||||
|
SELECT id, doc_type, title,
|
||||||
|
COALESCE(download_url, source_url) AS url,
|
||||||
|
switch_id, transceiver_id
|
||||||
|
FROM product_documents
|
||||||
|
WHERE COALESCE(download_url, source_url) IS NOT NULL
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
// 2. documents table (whitepapers, research)
|
||||||
|
const docsRows = await pool.query<{
|
||||||
|
id: string; doc_type: string; title: string; url: string;
|
||||||
|
}>(`
|
||||||
|
SELECT id, doc_type, title, source_url AS url
|
||||||
|
FROM documents
|
||||||
|
WHERE source_url IS NOT NULL
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
const allDocs = [
|
||||||
|
...pdRows.rows.map(r => ({ ...r, source: "product_documents" as const })),
|
||||||
|
...docsRows.rows.map(r => ({ ...r, switch_id: null, transceiver_id: null, source: "documents" as const })),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const doc of allDocs) {
|
||||||
|
if (!doc.url) continue;
|
||||||
|
|
||||||
|
const subdir = doc.switch_id ? "switches"
|
||||||
|
: doc.transceiver_id ? "transceivers"
|
||||||
|
: doc.doc_type === "whitepaper" ? "whitepapers"
|
||||||
|
: "other";
|
||||||
|
|
||||||
|
const safeName = (doc.title || "doc").replace(/[^a-z0-9_\-\.]/gi, "_").slice(0, 80);
|
||||||
|
const ext = doc.url.toLowerCase().includes(".pdf") ? ".pdf" : ".pdf";
|
||||||
|
const filePath = join(destDir, subdir, `${safeName}${ext}`);
|
||||||
|
|
||||||
|
// Skip if already downloaded in this run
|
||||||
|
try { await access(filePath); continue; } catch { /* not cached, download */ }
|
||||||
|
|
||||||
|
try {
|
||||||
|
await execAsync(
|
||||||
|
`curl -sL --max-time 60 --max-filesize 52428800 -A "TIP-DataCollector/1.0" -o "${filePath}" "${doc.url}"`,
|
||||||
|
{ timeout: 65000 }
|
||||||
|
);
|
||||||
|
downloaded++;
|
||||||
|
logger.info(`Downloaded: ${subdir}/${safeName}${ext}`);
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn(`Failed to download doc: ${doc.title}`, { url: doc.url, err });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`Documents downloaded: ${downloaded}/${allDocs.length}`);
|
||||||
|
return downloaded;
|
||||||
|
}
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
// Setup NAS directory structure (run once)
|
// Setup NAS directory structure (run once)
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
export async function setupNasDirectories(): Promise<void> {
|
export async function setupNasDirectories(): Promise<void> {
|
||||||
const dirs = ["exports", "db-backups", "raw-cache", "logs"];
|
const dirs = [
|
||||||
|
"exports", "db-backups", "raw-cache", "logs",
|
||||||
|
"datasheets", "datasheets/switches", "datasheets/transceivers",
|
||||||
|
"datasheets/whitepapers", "datasheets/other",
|
||||||
|
];
|
||||||
for (const dir of dirs) {
|
for (const dir of dirs) {
|
||||||
try {
|
try {
|
||||||
await execAsync(
|
await execAsync(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user