/** * Product Asset Utilities — Download images, datasheets, manuals * * Handles downloading product assets from vendor websites, * storing them locally, and updating the database. */ import { pool } from "./db"; import { createHash } from "crypto"; import { writeFile, mkdir } from "fs/promises"; import { join, basename, extname } from "path"; import { existsSync } from "fs"; const ASSETS_DIR = process.env.ASSETS_DIR || join(__dirname, "..", "..", "..", "..", "assets"); const IMAGES_DIR = join(ASSETS_DIR, "images"); const DATASHEETS_DIR = join(ASSETS_DIR, "datasheets"); const MANUALS_DIR = join(ASSETS_DIR, "manuals"); async function ensureDir(dir: string): Promise { if (!existsSync(dir)) { await mkdir(dir, { recursive: true }); } } function contentHash(data: Buffer): string { return createHash("sha256").update(data).digest("hex").slice(0, 16); } function sanitizeFilename(name: string): string { return name.toLowerCase().replace(/[^a-z0-9.-]+/g, "-").replace(/^-|-$/g, ""); } export interface AssetDownloadResult { localPath: string; hash: string; sizeBytes: number; } /** * Download a file from URL and save locally. * Returns null if download fails (non-fatal). */ export async function downloadAsset( url: string, destDir: string, filenamePrefix: string ): Promise { try { await ensureDir(destDir); const ext = extname(new URL(url).pathname) || ".bin"; const filename = `${sanitizeFilename(filenamePrefix)}${ext}`; const localPath = join(destDir, filename); const response = await fetch(url, { headers: { "User-Agent": "TIP-Scraper/1.0 (Transceiver Intelligence Platform)", "Accept": "*/*", }, signal: AbortSignal.timeout(30_000), }); if (!response.ok) { console.log(` [SKIP] ${url} → HTTP ${response.status}`); return null; } const buffer = Buffer.from(await response.arrayBuffer()); const hash = contentHash(buffer); await writeFile(localPath, buffer); return { localPath, hash, sizeBytes: buffer.length }; } catch (err) { console.log(` [FAIL] ${url} → ${(err as Error).message.slice(0, 80)}`); return null; } } /** * Download product image and update switches table. */ export async function downloadSwitchImage( switchId: string, imageUrl: string, vendor: string, model: string ): Promise { const vendorDir = join(IMAGES_DIR, "switches", sanitizeFilename(vendor)); const result = await downloadAsset(imageUrl, vendorDir, model); if (!result) return false; await pool.query( `UPDATE switches SET image_url = $2, image_local_path = $3, assets_scraped_at = NOW() WHERE id = $1`, [switchId, imageUrl, result.localPath] ); return true; } /** * Download datasheet PDF and create product_documents entry. */ export async function downloadSwitchDatasheet( switchId: string, vendorId: string, datasheetUrl: string, title: string, vendor: string, model: string ): Promise { const vendorDir = join(DATASHEETS_DIR, "switches", sanitizeFilename(vendor)); const result = await downloadAsset(datasheetUrl, vendorDir, model); if (!result) return false; // Update switch record await pool.query( `UPDATE switches SET datasheet_url = $2, datasheet_local_path = $3, assets_scraped_at = NOW() WHERE id = $1`, [switchId, datasheetUrl, result.localPath] ); // Create document record (upsert by content_hash) await pool.query( `INSERT INTO product_documents (switch_id, vendor_id, doc_type, title, source_url, local_path, file_size_bytes, content_hash) VALUES ($1, $2, 'datasheet', $3, $4, $5, $6, $7) ON CONFLICT (content_hash) DO UPDATE SET downloaded_at = NOW()`, [switchId, vendorId, title, datasheetUrl, result.localPath, result.sizeBytes, result.hash] ); return true; } /** * Download manual/guide PDF and create product_documents entry. */ export async function downloadSwitchManual( switchId: string, vendorId: string, manualUrl: string, title: string, docType: string, vendor: string, model: string ): Promise { const vendorDir = join(MANUALS_DIR, "switches", sanitizeFilename(vendor)); const filename = `${sanitizeFilename(model)}-${sanitizeFilename(docType)}`; const result = await downloadAsset(manualUrl, vendorDir, filename); if (!result) return false; await pool.query( `INSERT INTO product_documents (switch_id, vendor_id, doc_type, title, source_url, local_path, file_size_bytes, content_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT (content_hash) DO UPDATE SET downloaded_at = NOW()`, [switchId, vendorId, docType, title, manualUrl, result.localPath, result.sizeBytes, result.hash] ); return true; } /** * Update switch product_page_url without downloading. */ export async function setSwitchProductPage(switchId: string, url: string): Promise { await pool.query( `UPDATE switches SET product_page_url = $2, assets_scraped_at = NOW() WHERE id = $1`, [switchId, url] ); } /** * Update vendor documentation portal URLs. */ export async function setVendorDocUrls( vendorId: string, urls: { docsPortal?: string; datasheetLibrary?: string; imageCdn?: string; supportPortal?: string } ): Promise { await pool.query( `UPDATE vendors SET docs_portal_url = COALESCE($2, docs_portal_url), datasheet_library_url = COALESCE($3, datasheet_library_url), image_cdn_base = COALESCE($4, image_cdn_base), support_portal_url = COALESCE($5, support_portal_url), updated_at = NOW() WHERE id = $1`, [vendorId, urls.docsPortal || null, urls.datasheetLibrary || null, urls.imageCdn || null, urls.supportPortal || null] ); }