183 lines
5.6 KiB
TypeScript
183 lines
5.6 KiB
TypeScript
/**
|
|
* Product Asset Utilities — Download images, datasheets, manuals
|
|
*
|
|
* Handles downloading product assets from vendor websites,
|
|
* storing them locally, and updating the database.
|
|
*/
|
|
import { pool } from "./db";
|
|
import { createHash } from "crypto";
|
|
import { writeFile, mkdir } from "fs/promises";
|
|
import { join, basename, extname } from "path";
|
|
import { existsSync } from "fs";
|
|
|
|
const ASSETS_DIR = process.env.ASSETS_DIR || join(__dirname, "..", "..", "..", "..", "assets");
|
|
const IMAGES_DIR = join(ASSETS_DIR, "images");
|
|
const DATASHEETS_DIR = join(ASSETS_DIR, "datasheets");
|
|
const MANUALS_DIR = join(ASSETS_DIR, "manuals");
|
|
|
|
async function ensureDir(dir: string): Promise<void> {
|
|
if (!existsSync(dir)) {
|
|
await mkdir(dir, { recursive: true });
|
|
}
|
|
}
|
|
|
|
function contentHash(data: Buffer): string {
|
|
return createHash("sha256").update(data).digest("hex").slice(0, 16);
|
|
}
|
|
|
|
function sanitizeFilename(name: string): string {
|
|
return name.toLowerCase().replace(/[^a-z0-9.-]+/g, "-").replace(/^-|-$/g, "");
|
|
}
|
|
|
|
export interface AssetDownloadResult {
|
|
localPath: string;
|
|
hash: string;
|
|
sizeBytes: number;
|
|
}
|
|
|
|
/**
|
|
* Download a file from URL and save locally.
|
|
* Returns null if download fails (non-fatal).
|
|
*/
|
|
export async function downloadAsset(
|
|
url: string,
|
|
destDir: string,
|
|
filenamePrefix: string
|
|
): Promise<AssetDownloadResult | null> {
|
|
try {
|
|
await ensureDir(destDir);
|
|
const ext = extname(new URL(url).pathname) || ".bin";
|
|
const filename = `${sanitizeFilename(filenamePrefix)}${ext}`;
|
|
const localPath = join(destDir, filename);
|
|
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
"User-Agent": "TIP-Scraper/1.0 (Transceiver Intelligence Platform)",
|
|
"Accept": "*/*",
|
|
},
|
|
signal: AbortSignal.timeout(30_000),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
console.log(` [SKIP] ${url} → HTTP ${response.status}`);
|
|
return null;
|
|
}
|
|
|
|
const buffer = Buffer.from(await response.arrayBuffer());
|
|
const hash = contentHash(buffer);
|
|
|
|
await writeFile(localPath, buffer);
|
|
|
|
return { localPath, hash, sizeBytes: buffer.length };
|
|
} catch (err) {
|
|
console.log(` [FAIL] ${url} → ${(err as Error).message.slice(0, 80)}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Download product image and update switches table.
|
|
*/
|
|
export async function downloadSwitchImage(
|
|
switchId: string,
|
|
imageUrl: string,
|
|
vendor: string,
|
|
model: string
|
|
): Promise<boolean> {
|
|
const vendorDir = join(IMAGES_DIR, "switches", sanitizeFilename(vendor));
|
|
const result = await downloadAsset(imageUrl, vendorDir, model);
|
|
if (!result) return false;
|
|
|
|
await pool.query(
|
|
`UPDATE switches SET image_url = $2, image_local_path = $3, assets_scraped_at = NOW() WHERE id = $1`,
|
|
[switchId, imageUrl, result.localPath]
|
|
);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Download datasheet PDF and create product_documents entry.
|
|
*/
|
|
export async function downloadSwitchDatasheet(
|
|
switchId: string,
|
|
vendorId: string,
|
|
datasheetUrl: string,
|
|
title: string,
|
|
vendor: string,
|
|
model: string
|
|
): Promise<boolean> {
|
|
const vendorDir = join(DATASHEETS_DIR, "switches", sanitizeFilename(vendor));
|
|
const result = await downloadAsset(datasheetUrl, vendorDir, model);
|
|
if (!result) return false;
|
|
|
|
// Update switch record
|
|
await pool.query(
|
|
`UPDATE switches SET datasheet_url = $2, datasheet_local_path = $3, assets_scraped_at = NOW() WHERE id = $1`,
|
|
[switchId, datasheetUrl, result.localPath]
|
|
);
|
|
|
|
// Create document record (upsert by content_hash)
|
|
await pool.query(
|
|
`INSERT INTO product_documents (switch_id, vendor_id, doc_type, title, source_url, local_path, file_size_bytes, content_hash)
|
|
VALUES ($1, $2, 'datasheet', $3, $4, $5, $6, $7)
|
|
ON CONFLICT (content_hash) DO UPDATE SET downloaded_at = NOW()`,
|
|
[switchId, vendorId, title, datasheetUrl, result.localPath, result.sizeBytes, result.hash]
|
|
);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Download manual/guide PDF and create product_documents entry.
|
|
*/
|
|
export async function downloadSwitchManual(
|
|
switchId: string,
|
|
vendorId: string,
|
|
manualUrl: string,
|
|
title: string,
|
|
docType: string,
|
|
vendor: string,
|
|
model: string
|
|
): Promise<boolean> {
|
|
const vendorDir = join(MANUALS_DIR, "switches", sanitizeFilename(vendor));
|
|
const filename = `${sanitizeFilename(model)}-${sanitizeFilename(docType)}`;
|
|
const result = await downloadAsset(manualUrl, vendorDir, filename);
|
|
if (!result) return false;
|
|
|
|
await pool.query(
|
|
`INSERT INTO product_documents (switch_id, vendor_id, doc_type, title, source_url, local_path, file_size_bytes, content_hash)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
|
ON CONFLICT (content_hash) DO UPDATE SET downloaded_at = NOW()`,
|
|
[switchId, vendorId, docType, title, manualUrl, result.localPath, result.sizeBytes, result.hash]
|
|
);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Update switch product_page_url without downloading.
|
|
*/
|
|
export async function setSwitchProductPage(switchId: string, url: string): Promise<void> {
|
|
await pool.query(
|
|
`UPDATE switches SET product_page_url = $2, assets_scraped_at = NOW() WHERE id = $1`,
|
|
[switchId, url]
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Update vendor documentation portal URLs.
|
|
*/
|
|
export async function setVendorDocUrls(
|
|
vendorId: string,
|
|
urls: { docsPortal?: string; datasheetLibrary?: string; imageCdn?: string; supportPortal?: string }
|
|
): Promise<void> {
|
|
await pool.query(
|
|
`UPDATE vendors SET
|
|
docs_portal_url = COALESCE($2, docs_portal_url),
|
|
datasheet_library_url = COALESCE($3, datasheet_library_url),
|
|
image_cdn_base = COALESCE($4, image_cdn_base),
|
|
support_portal_url = COALESCE($5, support_portal_url),
|
|
updated_at = NOW()
|
|
WHERE id = $1`,
|
|
[vendorId, urls.docsPortal || null, urls.datasheetLibrary || null, urls.imageCdn || null, urls.supportPortal || null]
|
|
);
|
|
}
|