- Add public /api/price-comparison API (summary, top-50, per-SKU detail)
— no auth required, 3 Express routes, DISTINCT ON latest-price logic
- Add '💲 Price Comparison' dashboard tab: stat cards, form-factor
breakdown, top-50 SKU table (clickable rows → SKU detail), per-vendor
price + stock + spread% lookup panel
- Add Eoptolink OEM catalog scraper (93 product-solution pages,
part-number regex EOLO-*/EOLQ-* etc., no prices, seeds transceivers
table as manufacturer entries)
- Register scrape:catalog:eoptolink in scheduler: schedule every 4h
(40 */4 * * *), lazy-import worker, added to known-jobs array
238 lines
10 KiB
TypeScript
238 lines
10 KiB
TypeScript
/**
|
||
* Eoptolink Manufacturer Catalog Scraper
|
||
*
|
||
* Source: www.eoptolink.com — One of China's top-3 optical transceiver OEMs.
|
||
* (Finisar competitor, supplies tier-1 cloud hyperscalers)
|
||
* Target: Discover all product families + part numbers, seed transceivers table
|
||
* as manufacturer=Eoptolink entries.
|
||
*
|
||
* Strategy:
|
||
* Phase 1: Fetch homepage → extract all /product-solutions/* category URLs (≈90)
|
||
* Phase 2: Fetch each category page → parse product name + Eoptolink part numbers
|
||
* (format: E[A-Z]{2,5}-\d{2,4}[A-Z0-9-]*)
|
||
*
|
||
* Note: Eoptolink does NOT publish retail prices (B2B OEM manufacturer).
|
||
* This scraper adds manufacturer catalog entries — no price_observations.
|
||
*
|
||
* Rate limit: 1 req/2s — polite crawl of OEM's website.
|
||
*/
|
||
|
||
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
||
|
||
const BASE = "https://www.eoptolink.com";
|
||
const HEADERS = {
|
||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||
"Accept-Language": "en-US,en;q=0.9",
|
||
};
|
||
const DELAY_MS = 2000;
|
||
|
||
function sleep(ms: number): Promise<void> {
|
||
return new Promise((r) => setTimeout(r, ms));
|
||
}
|
||
|
||
async function fetchHtml(url: string): Promise<string> {
|
||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20_000) });
|
||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||
return resp.text();
|
||
}
|
||
|
||
// ── Classification helpers ──────────────────────────────────────────────────
|
||
|
||
function speedFromSlug(slug: string): { speed: string; speedGbps: number } {
|
||
if (/\b1\.?6t\b/i.test(slug)) return { speed: "1.6T", speedGbps: 1600 };
|
||
if (/\b800g\b/i.test(slug)) return { speed: "800G", speedGbps: 800 };
|
||
if (/\b400g\b/i.test(slug)) return { speed: "400G", speedGbps: 400 };
|
||
if (/\b200g\b/i.test(slug)) return { speed: "200G", speedGbps: 200 };
|
||
if (/\b100g\b/i.test(slug)) return { speed: "100G", speedGbps: 100 };
|
||
if (/\b50g\b/i.test(slug)) return { speed: "50G", speedGbps: 50 };
|
||
if (/\b40g\b/i.test(slug)) return { speed: "40G", speedGbps: 40 };
|
||
if (/\b32g\b/i.test(slug)) return { speed: "32G", speedGbps: 32 };
|
||
if (/\b25g\b/i.test(slug)) return { speed: "25G", speedGbps: 25 };
|
||
if (/\b16g\b/i.test(slug)) return { speed: "16G", speedGbps: 16 };
|
||
if (/\b10g\b/i.test(slug)) return { speed: "10G", speedGbps: 10 };
|
||
if (/\b8g\b/i.test(slug)) return { speed: "8G", speedGbps: 8 };
|
||
if (/\b4g\b/i.test(slug)) return { speed: "4G", speedGbps: 4 };
|
||
if (/\b1g\b/i.test(slug)) return { speed: "1G", speedGbps: 1 };
|
||
return { speed: "Unknown", speedGbps: 0 };
|
||
}
|
||
|
||
function formFactorFromText(text: string): string {
|
||
const t = text.toUpperCase();
|
||
if (/\bOSFP\b/.test(t)) return "OSFP";
|
||
if (/\bQSFP.?DD800\b|\bQSFP-DD800\b/.test(t)) return "QSFP-DD800";
|
||
if (/\bQSFP.?DD\b/.test(t)) return "QSFP-DD";
|
||
if (/\bQSFP56\b/.test(t)) return "QSFP56";
|
||
if (/\bQSFP112\b/.test(t)) return "QSFP112";
|
||
if (/\bQSFP28\b/.test(t)) return "QSFP28";
|
||
if (/\bQSFP\+|\bQSFP PLUS\b/.test(t)) return "QSFP+";
|
||
if (/\bSFP56.DD\b/.test(t)) return "SFP56-DD";
|
||
if (/\bSFP56\b/.test(t)) return "SFP56";
|
||
if (/\bSFP28\b/.test(t)) return "SFP28";
|
||
if (/\bSFP\+|SFP-PLUS|SFP PLUS\b/.test(t)) return "SFP+";
|
||
if (/\bXFP\b/.test(t)) return "XFP";
|
||
if (/\bCFP4\b/.test(t)) return "CFP4";
|
||
if (/\bCFP2\b/.test(t)) return "CFP2";
|
||
if (/\bCFP\b/.test(t)) return "CFP";
|
||
if (/\bSFP\b/.test(t)) return "SFP";
|
||
return "SFP";
|
||
}
|
||
|
||
function fiberFromText(text: string): string {
|
||
const t = text.toLowerCase();
|
||
if (/multimode|mmf|sr|om[1-5]/i.test(t)) return "MMF";
|
||
if (/single.?mode|smf|lr|er|zr|fr|dr|bidi|cwdm|dwdm|coherent/i.test(t)) return "SMF";
|
||
return "SMF"; // OEM products default to SMF
|
||
}
|
||
|
||
function categoryFromText(text: string): string {
|
||
const t = text.toLowerCase();
|
||
if (/coherent|zr|dpsk/.test(t)) return "Coherent";
|
||
if (/dwdm/.test(t)) return "DWDM";
|
||
if (/cwdm/.test(t)) return "CWDM";
|
||
if (/aoc/.test(t)) return "AOC";
|
||
if (/dac/.test(t)) return "DAC";
|
||
if (/pon|gpon|gepon/.test(t)) return "PON";
|
||
return "DataCenter";
|
||
}
|
||
|
||
// ── Phase 1: Discover product solution URLs ──────────────────────────────────
|
||
|
||
async function fetchProductSolutionUrls(): Promise<string[]> {
|
||
console.log(` Fetching Eoptolink homepage for product solution links...`);
|
||
const html = await fetchHtml(`${BASE}/`);
|
||
const links = html.match(/href="(\/product-solutions\/[^"#?]+)"/gi) ?? [];
|
||
const unique = [...new Set(links.map((l) => l.match(/href="([^"]+)"/)?.[1] ?? "").filter(Boolean))];
|
||
// Skip OSA (optical sub-assemblies) and test-board entries — no transceiver catalog
|
||
const filtered = unique.filter((u) =>
|
||
!u.includes("/osa/") &&
|
||
!u.includes("/other/") &&
|
||
!u.endsWith("/400g/") &&
|
||
!u.endsWith("/800g/") &&
|
||
!u.endsWith("/product-solutions/")
|
||
);
|
||
console.log(` Found ${filtered.length} product solution pages`);
|
||
return filtered;
|
||
}
|
||
|
||
// ── Phase 2: Parse product detail page ──────────────────────────────────────
|
||
|
||
interface EoptolinkProduct {
|
||
pageTitle: string;
|
||
partNumbers: string[];
|
||
speed: string;
|
||
speedGbps: number;
|
||
formFactor: string;
|
||
fiberType: string;
|
||
category: string;
|
||
pageUrl: string;
|
||
}
|
||
|
||
function parseProductPage(html: string, pageUrl: string): EoptolinkProduct | null {
|
||
// Page title
|
||
const titleMatch = html.match(/<title>([^<]+)/i) || html.match(/<h1[^>]*>([^<]{5,80})</i);
|
||
const pageTitle = (titleMatch?.[1] ?? "").replace(/\s*\|.*$/, "").replace(/[||]+[^||]*$/, "").trim();
|
||
if (!pageTitle || pageTitle.length < 3) return null;
|
||
|
||
// Eoptolink part numbers: format like EOLO-168HG-10-XDX, EOLQ-128HG-02-PX
|
||
const pnRegex = /E[A-Z]{2,5}-\d{2,3}[A-Z0-9]{1,3}(?:-\d{1,3})?(?:-[A-Z0-9]{1,6})*/g;
|
||
const partNumbers = [...new Set([...(html.matchAll(pnRegex) ?? [])].map((m) => m[0].trim()))];
|
||
|
||
const slug = pageUrl.split("/").slice(-2).join("-");
|
||
const { speed, speedGbps } = speedFromSlug(slug + " " + pageTitle);
|
||
const formFactor = formFactorFromText(pageTitle + " " + slug);
|
||
const fiberType = fiberFromText(pageTitle + " " + slug);
|
||
const category = categoryFromText(pageTitle + " " + slug);
|
||
|
||
return { pageTitle, partNumbers, speed, speedGbps, formFactor, fiberType, category, pageUrl };
|
||
}
|
||
|
||
// ── Main ────────────────────────────────────────────────────────────────────
|
||
|
||
export async function scrapeEoptolink(): Promise<void> {
|
||
console.log("=== Eoptolink Manufacturer Catalog Scraper ===\n");
|
||
|
||
const vendorId = await ensureVendor(
|
||
"Eoptolink",
|
||
"manufacturer",
|
||
"https://www.eoptolink.com",
|
||
"https://www.eoptolink.com/product-solutions/"
|
||
);
|
||
console.log(` Vendor ID: ${vendorId}`);
|
||
|
||
// Phase 1: Collect product solution URLs
|
||
let productUrls: string[];
|
||
try {
|
||
productUrls = await fetchProductSolutionUrls();
|
||
} catch (err) {
|
||
console.error(` Homepage fetch failed: ${(err as Error).message}`);
|
||
return;
|
||
}
|
||
|
||
console.log(`\n[Phase 2] Fetching ${productUrls.length} product detail pages...\n`);
|
||
|
||
let added = 0;
|
||
let skipped = 0;
|
||
let errors = 0;
|
||
|
||
for (const relPath of productUrls) {
|
||
await sleep(DELAY_MS);
|
||
const url = `${BASE}${relPath}`;
|
||
try {
|
||
const html = await fetchHtml(url);
|
||
const product = parseProductPage(html, relPath);
|
||
if (!product || product.speedGbps === 0) {
|
||
skipped++;
|
||
continue;
|
||
}
|
||
|
||
// Use page title as the primary product entry; also seed one row per part number
|
||
const namesToSeed: string[] = product.partNumbers.length > 0
|
||
? product.partNumbers.slice(0, 10) // max 10 part numbers per product family page
|
||
: [product.pageTitle];
|
||
|
||
for (const partNumber of namesToSeed) {
|
||
try {
|
||
await findOrCreateScrapedTransceiver({
|
||
partNumber: partNumber.slice(0, 80),
|
||
vendorId,
|
||
formFactor: product.formFactor,
|
||
speedGbps: product.speedGbps,
|
||
speed: product.speed,
|
||
fiberType: product.fiberType,
|
||
category: product.category,
|
||
});
|
||
added++;
|
||
} catch (dbErr) {
|
||
// Duplicate or constraint error — expected for re-runs
|
||
}
|
||
}
|
||
|
||
console.log(
|
||
` ✓ ${product.pageTitle.padEnd(45)} ff=${product.formFactor.padEnd(8)} speed=${product.speed.padEnd(5)} pn=${product.partNumbers.length}`
|
||
);
|
||
} catch (err: unknown) {
|
||
errors++;
|
||
if (errors <= 10) console.warn(` ✗ Error ${relPath}: ${(err as Error).message.slice(0, 60)}`);
|
||
}
|
||
}
|
||
|
||
console.log(`\n=== Eoptolink Catalog Scraper Complete ===`);
|
||
console.log(` Pages processed: ${productUrls.length - errors}`);
|
||
console.log(` Transceivers seeded: ${added}`);
|
||
console.log(` Skipped (no speed): ${skipped}`);
|
||
console.log(` Errors: ${errors}`);
|
||
}
|
||
|
||
// ── CLI ────────────────────────────────────────────────────────────────────
|
||
|
||
if (require.main === module) {
|
||
scrapeEoptolink()
|
||
.then(() => pool.end())
|
||
.catch((err: unknown) => {
|
||
console.error("Fatal:", err);
|
||
pool.end();
|
||
process.exit(1);
|
||
});
|
||
}
|