Rene Fichtmueller 1c8dec52c9 feat: Price Comparison dashboard + Eoptolink OEM scraper
- Add public /api/price-comparison API (summary, top-50, per-SKU detail)
  — no auth required, 3 Express routes, DISTINCT ON latest-price logic
- Add '💲 Price Comparison' dashboard tab: stat cards, form-factor
  breakdown, top-50 SKU table (clickable rows → SKU detail), per-vendor
  price + stock + spread% lookup panel
- Add Eoptolink OEM catalog scraper (93 product-solution pages,
  part-number regex EOLO-*/EOLQ-* etc., no prices, seeds transceivers
  table as manufacturer entries)
- Register scrape:catalog:eoptolink in scheduler: schedule every 4h
  (40 */4 * * *), lazy-import worker, added to known-jobs array
2026-04-18 01:02:08 +02:00

238 lines
10 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Eoptolink Manufacturer Catalog Scraper
*
* Source: www.eoptolink.com — One of China's top-3 optical transceiver OEMs.
* (Finisar competitor, supplies tier-1 cloud hyperscalers)
* Target: Discover all product families + part numbers, seed transceivers table
* as manufacturer=Eoptolink entries.
*
* Strategy:
* Phase 1: Fetch homepage → extract all /product-solutions/* category URLs (≈90)
* Phase 2: Fetch each category page → parse product name + Eoptolink part numbers
* (format: E[A-Z]{2,5}-\d{2,4}[A-Z0-9-]*)
*
* Note: Eoptolink does NOT publish retail prices (B2B OEM manufacturer).
* This scraper adds manufacturer catalog entries — no price_observations.
*
* Rate limit: 1 req/2s — polite crawl of OEM's website.
*/
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
const BASE = "https://www.eoptolink.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
};
const DELAY_MS = 2000;
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
async function fetchHtml(url: string): Promise<string> {
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20_000) });
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.text();
}
// ── Classification helpers ──────────────────────────────────────────────────
function speedFromSlug(slug: string): { speed: string; speedGbps: number } {
if (/\b1\.?6t\b/i.test(slug)) return { speed: "1.6T", speedGbps: 1600 };
if (/\b800g\b/i.test(slug)) return { speed: "800G", speedGbps: 800 };
if (/\b400g\b/i.test(slug)) return { speed: "400G", speedGbps: 400 };
if (/\b200g\b/i.test(slug)) return { speed: "200G", speedGbps: 200 };
if (/\b100g\b/i.test(slug)) return { speed: "100G", speedGbps: 100 };
if (/\b50g\b/i.test(slug)) return { speed: "50G", speedGbps: 50 };
if (/\b40g\b/i.test(slug)) return { speed: "40G", speedGbps: 40 };
if (/\b32g\b/i.test(slug)) return { speed: "32G", speedGbps: 32 };
if (/\b25g\b/i.test(slug)) return { speed: "25G", speedGbps: 25 };
if (/\b16g\b/i.test(slug)) return { speed: "16G", speedGbps: 16 };
if (/\b10g\b/i.test(slug)) return { speed: "10G", speedGbps: 10 };
if (/\b8g\b/i.test(slug)) return { speed: "8G", speedGbps: 8 };
if (/\b4g\b/i.test(slug)) return { speed: "4G", speedGbps: 4 };
if (/\b1g\b/i.test(slug)) return { speed: "1G", speedGbps: 1 };
return { speed: "Unknown", speedGbps: 0 };
}
function formFactorFromText(text: string): string {
const t = text.toUpperCase();
if (/\bOSFP\b/.test(t)) return "OSFP";
if (/\bQSFP.?DD800\b|\bQSFP-DD800\b/.test(t)) return "QSFP-DD800";
if (/\bQSFP.?DD\b/.test(t)) return "QSFP-DD";
if (/\bQSFP56\b/.test(t)) return "QSFP56";
if (/\bQSFP112\b/.test(t)) return "QSFP112";
if (/\bQSFP28\b/.test(t)) return "QSFP28";
if (/\bQSFP\+|\bQSFP PLUS\b/.test(t)) return "QSFP+";
if (/\bSFP56.DD\b/.test(t)) return "SFP56-DD";
if (/\bSFP56\b/.test(t)) return "SFP56";
if (/\bSFP28\b/.test(t)) return "SFP28";
if (/\bSFP\+|SFP-PLUS|SFP PLUS\b/.test(t)) return "SFP+";
if (/\bXFP\b/.test(t)) return "XFP";
if (/\bCFP4\b/.test(t)) return "CFP4";
if (/\bCFP2\b/.test(t)) return "CFP2";
if (/\bCFP\b/.test(t)) return "CFP";
if (/\bSFP\b/.test(t)) return "SFP";
return "SFP";
}
function fiberFromText(text: string): string {
const t = text.toLowerCase();
if (/multimode|mmf|sr|om[1-5]/i.test(t)) return "MMF";
if (/single.?mode|smf|lr|er|zr|fr|dr|bidi|cwdm|dwdm|coherent/i.test(t)) return "SMF";
return "SMF"; // OEM products default to SMF
}
function categoryFromText(text: string): string {
const t = text.toLowerCase();
if (/coherent|zr|dpsk/.test(t)) return "Coherent";
if (/dwdm/.test(t)) return "DWDM";
if (/cwdm/.test(t)) return "CWDM";
if (/aoc/.test(t)) return "AOC";
if (/dac/.test(t)) return "DAC";
if (/pon|gpon|gepon/.test(t)) return "PON";
return "DataCenter";
}
// ── Phase 1: Discover product solution URLs ──────────────────────────────────
async function fetchProductSolutionUrls(): Promise<string[]> {
console.log(` Fetching Eoptolink homepage for product solution links...`);
const html = await fetchHtml(`${BASE}/`);
const links = html.match(/href="(\/product-solutions\/[^"#?]+)"/gi) ?? [];
const unique = [...new Set(links.map((l) => l.match(/href="([^"]+)"/)?.[1] ?? "").filter(Boolean))];
// Skip OSA (optical sub-assemblies) and test-board entries — no transceiver catalog
const filtered = unique.filter((u) =>
!u.includes("/osa/") &&
!u.includes("/other/") &&
!u.endsWith("/400g/") &&
!u.endsWith("/800g/") &&
!u.endsWith("/product-solutions/")
);
console.log(` Found ${filtered.length} product solution pages`);
return filtered;
}
// ── Phase 2: Parse product detail page ──────────────────────────────────────
interface EoptolinkProduct {
pageTitle: string;
partNumbers: string[];
speed: string;
speedGbps: number;
formFactor: string;
fiberType: string;
category: string;
pageUrl: string;
}
function parseProductPage(html: string, pageUrl: string): EoptolinkProduct | null {
// Page title
const titleMatch = html.match(/<title>([^<]+)/i) || html.match(/<h1[^>]*>([^<]{5,80})</i);
const pageTitle = (titleMatch?.[1] ?? "").replace(/\s*\|.*$/, "").replace(/[|]+[^|]*$/, "").trim();
if (!pageTitle || pageTitle.length < 3) return null;
// Eoptolink part numbers: format like EOLO-168HG-10-XDX, EOLQ-128HG-02-PX
const pnRegex = /E[A-Z]{2,5}-\d{2,3}[A-Z0-9]{1,3}(?:-\d{1,3})?(?:-[A-Z0-9]{1,6})*/g;
const partNumbers = [...new Set([...(html.matchAll(pnRegex) ?? [])].map((m) => m[0].trim()))];
const slug = pageUrl.split("/").slice(-2).join("-");
const { speed, speedGbps } = speedFromSlug(slug + " " + pageTitle);
const formFactor = formFactorFromText(pageTitle + " " + slug);
const fiberType = fiberFromText(pageTitle + " " + slug);
const category = categoryFromText(pageTitle + " " + slug);
return { pageTitle, partNumbers, speed, speedGbps, formFactor, fiberType, category, pageUrl };
}
// ── Main ────────────────────────────────────────────────────────────────────
export async function scrapeEoptolink(): Promise<void> {
console.log("=== Eoptolink Manufacturer Catalog Scraper ===\n");
const vendorId = await ensureVendor(
"Eoptolink",
"manufacturer",
"https://www.eoptolink.com",
"https://www.eoptolink.com/product-solutions/"
);
console.log(` Vendor ID: ${vendorId}`);
// Phase 1: Collect product solution URLs
let productUrls: string[];
try {
productUrls = await fetchProductSolutionUrls();
} catch (err) {
console.error(` Homepage fetch failed: ${(err as Error).message}`);
return;
}
console.log(`\n[Phase 2] Fetching ${productUrls.length} product detail pages...\n`);
let added = 0;
let skipped = 0;
let errors = 0;
for (const relPath of productUrls) {
await sleep(DELAY_MS);
const url = `${BASE}${relPath}`;
try {
const html = await fetchHtml(url);
const product = parseProductPage(html, relPath);
if (!product || product.speedGbps === 0) {
skipped++;
continue;
}
// Use page title as the primary product entry; also seed one row per part number
const namesToSeed: string[] = product.partNumbers.length > 0
? product.partNumbers.slice(0, 10) // max 10 part numbers per product family page
: [product.pageTitle];
for (const partNumber of namesToSeed) {
try {
await findOrCreateScrapedTransceiver({
partNumber: partNumber.slice(0, 80),
vendorId,
formFactor: product.formFactor,
speedGbps: product.speedGbps,
speed: product.speed,
fiberType: product.fiberType,
category: product.category,
});
added++;
} catch (dbErr) {
// Duplicate or constraint error — expected for re-runs
}
}
console.log(
`${product.pageTitle.padEnd(45)} ff=${product.formFactor.padEnd(8)} speed=${product.speed.padEnd(5)} pn=${product.partNumbers.length}`
);
} catch (err: unknown) {
errors++;
if (errors <= 10) console.warn(` ✗ Error ${relPath}: ${(err as Error).message.slice(0, 60)}`);
}
}
console.log(`\n=== Eoptolink Catalog Scraper Complete ===`);
console.log(` Pages processed: ${productUrls.length - errors}`);
console.log(` Transceivers seeded: ${added}`);
console.log(` Skipped (no speed): ${skipped}`);
console.log(` Errors: ${errors}`);
}
// ── CLI ────────────────────────────────────────────────────────────────────
if (require.main === module) {
scrapeEoptolink()
.then(() => pool.end())
.catch((err: unknown) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});
}