183 lines
5.6 KiB
TypeScript

/**
* Product Asset Utilities — Download images, datasheets, manuals
*
* Handles downloading product assets from vendor websites,
* storing them locally, and updating the database.
*/
import { pool } from "./db";
import { createHash } from "crypto";
import { writeFile, mkdir } from "fs/promises";
import { join, basename, extname } from "path";
import { existsSync } from "fs";
const ASSETS_DIR = process.env.ASSETS_DIR || join(__dirname, "..", "..", "..", "..", "assets");
const IMAGES_DIR = join(ASSETS_DIR, "images");
const DATASHEETS_DIR = join(ASSETS_DIR, "datasheets");
const MANUALS_DIR = join(ASSETS_DIR, "manuals");
async function ensureDir(dir: string): Promise<void> {
if (!existsSync(dir)) {
await mkdir(dir, { recursive: true });
}
}
function contentHash(data: Buffer): string {
return createHash("sha256").update(data).digest("hex").slice(0, 16);
}
function sanitizeFilename(name: string): string {
return name.toLowerCase().replace(/[^a-z0-9.-]+/g, "-").replace(/^-|-$/g, "");
}
export interface AssetDownloadResult {
localPath: string;
hash: string;
sizeBytes: number;
}
/**
* Download a file from URL and save locally.
* Returns null if download fails (non-fatal).
*/
export async function downloadAsset(
url: string,
destDir: string,
filenamePrefix: string
): Promise<AssetDownloadResult | null> {
try {
await ensureDir(destDir);
const ext = extname(new URL(url).pathname) || ".bin";
const filename = `${sanitizeFilename(filenamePrefix)}${ext}`;
const localPath = join(destDir, filename);
const response = await fetch(url, {
headers: {
"User-Agent": "TIP-Scraper/1.0 (Transceiver Intelligence Platform)",
"Accept": "*/*",
},
signal: AbortSignal.timeout(30_000),
});
if (!response.ok) {
console.log(` [SKIP] ${url} → HTTP ${response.status}`);
return null;
}
const buffer = Buffer.from(await response.arrayBuffer());
const hash = contentHash(buffer);
await writeFile(localPath, buffer);
return { localPath, hash, sizeBytes: buffer.length };
} catch (err) {
console.log(` [FAIL] ${url}${(err as Error).message.slice(0, 80)}`);
return null;
}
}
/**
* Download product image and update switches table.
*/
export async function downloadSwitchImage(
switchId: string,
imageUrl: string,
vendor: string,
model: string
): Promise<boolean> {
const vendorDir = join(IMAGES_DIR, "switches", sanitizeFilename(vendor));
const result = await downloadAsset(imageUrl, vendorDir, model);
if (!result) return false;
await pool.query(
`UPDATE switches SET image_url = $2, image_local_path = $3, assets_scraped_at = NOW() WHERE id = $1`,
[switchId, imageUrl, result.localPath]
);
return true;
}
/**
* Download datasheet PDF and create product_documents entry.
*/
export async function downloadSwitchDatasheet(
switchId: string,
vendorId: string,
datasheetUrl: string,
title: string,
vendor: string,
model: string
): Promise<boolean> {
const vendorDir = join(DATASHEETS_DIR, "switches", sanitizeFilename(vendor));
const result = await downloadAsset(datasheetUrl, vendorDir, model);
if (!result) return false;
// Update switch record
await pool.query(
`UPDATE switches SET datasheet_url = $2, datasheet_local_path = $3, assets_scraped_at = NOW() WHERE id = $1`,
[switchId, datasheetUrl, result.localPath]
);
// Create document record (upsert by content_hash)
await pool.query(
`INSERT INTO product_documents (switch_id, vendor_id, doc_type, title, source_url, local_path, file_size_bytes, content_hash)
VALUES ($1, $2, 'datasheet', $3, $4, $5, $6, $7)
ON CONFLICT (content_hash) DO UPDATE SET downloaded_at = NOW()`,
[switchId, vendorId, title, datasheetUrl, result.localPath, result.sizeBytes, result.hash]
);
return true;
}
/**
* Download manual/guide PDF and create product_documents entry.
*/
export async function downloadSwitchManual(
switchId: string,
vendorId: string,
manualUrl: string,
title: string,
docType: string,
vendor: string,
model: string
): Promise<boolean> {
const vendorDir = join(MANUALS_DIR, "switches", sanitizeFilename(vendor));
const filename = `${sanitizeFilename(model)}-${sanitizeFilename(docType)}`;
const result = await downloadAsset(manualUrl, vendorDir, filename);
if (!result) return false;
await pool.query(
`INSERT INTO product_documents (switch_id, vendor_id, doc_type, title, source_url, local_path, file_size_bytes, content_hash)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (content_hash) DO UPDATE SET downloaded_at = NOW()`,
[switchId, vendorId, docType, title, manualUrl, result.localPath, result.sizeBytes, result.hash]
);
return true;
}
/**
* Update switch product_page_url without downloading.
*/
export async function setSwitchProductPage(switchId: string, url: string): Promise<void> {
await pool.query(
`UPDATE switches SET product_page_url = $2, assets_scraped_at = NOW() WHERE id = $1`,
[switchId, url]
);
}
/**
* Update vendor documentation portal URLs.
*/
export async function setVendorDocUrls(
vendorId: string,
urls: { docsPortal?: string; datasheetLibrary?: string; imageCdn?: string; supportPortal?: string }
): Promise<void> {
await pool.query(
`UPDATE vendors SET
docs_portal_url = COALESCE($2, docs_portal_url),
datasheet_library_url = COALESCE($3, datasheet_library_url),
image_cdn_base = COALESCE($4, image_cdn_base),
support_portal_url = COALESCE($5, support_portal_url),
updated_at = NOW()
WHERE id = $1`,
[vendorId, urls.docsPortal || null, urls.datasheetLibrary || null, urls.imageCdn || null, urls.supportPortal || null]
);
}