257 lines
9.8 KiB
TypeScript
257 lines
9.8 KiB
TypeScript
/**
|
|
* Ascent Optics Scraper — Chinese OEM transceiver manufacturer
|
|
*
|
|
* ascentoptics.com — product catalog loaded via JSON API endpoint.
|
|
* Products are served via /product-list?is_render=1&category_id=CID
|
|
* (HTML table in JSON response). No retail pricing — "Get Quote" model.
|
|
* Category IDs are discovered from data-cid attributes on sub-category pages.
|
|
*
|
|
* Rate limited: 1 req/2sec.
|
|
*/
|
|
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
|
import * as cheerio from "cheerio";
|
|
|
|
const BASE = "https://ascentoptics.com";
|
|
const HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
Accept: "text/html,application/xhtml+xml,application/json,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
Referer: "https://ascentoptics.com/optical-transceivers/",
|
|
};
|
|
|
|
// Known transceiver categories: slug → { category_id, formFactor, speedGbps, speed }
|
|
// DAC/AOC/DCO/LPO excluded — transceivers only
|
|
const CATEGORIES = [
|
|
{ slug: "/10g-sfp/", categoryId: 33, formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ slug: "/10g-xfp/", categoryId: 34, formFactor: "XFP", speed: "10G", speedGbps: 10 },
|
|
{ slug: "/25g-sfp28/", categoryId: 22, formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
|
{ slug: "/40g-qsfp/", categoryId: 20, formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
|
{ slug: "/100g-qsfp28/", categoryId: 15, formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ slug: "/100g-sfp112/", categoryId: 0, formFactor: "SFP112", speed: "100G", speedGbps: 100 },
|
|
{ slug: "/200g-qsfp56/", categoryId: 3, formFactor: "QSFP56", speed: "200G", speedGbps: 200 },
|
|
{ slug: "/200g-qsfp28-dd/", categoryId: 4, formFactor: "QSFP-DD", speed: "200G", speedGbps: 200 },
|
|
{ slug: "/400g-qsfp56-dd/", categoryId: 5, formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ slug: "/400g-osfp/", categoryId: 6, formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ slug: "/400g-qsfp112/", categoryId: 7, formFactor: "QSFP112", speed: "400G", speedGbps: 400 },
|
|
{ slug: "/800g-osfp/", categoryId: 9, formFactor: "OSFP", speed: "800G", speedGbps: 800 },
|
|
{ slug: "/800g-qsfp-dd800-200g-per-line/", categoryId: 121, formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 },
|
|
{ slug: "/800g-qsfp112-dd/", categoryId: 0, formFactor: "QSFP112", speed: "800G", speedGbps: 800 },
|
|
{ slug: "/50g-sfp56/", categoryId: 0, formFactor: "SFP56", speed: "50G", speedGbps: 50 },
|
|
{ slug: "/16g-sfp/", categoryId: 0, formFactor: "SFP", speed: "16G", speedGbps: 16 },
|
|
];
|
|
|
|
interface Product {
|
|
partNumber: string;
|
|
name: string;
|
|
url: string;
|
|
imageUrl?: string;
|
|
formFactor: string;
|
|
speed: string;
|
|
speedGbps: number;
|
|
reachLabel?: string;
|
|
reachMeters?: number;
|
|
fiberType?: string;
|
|
wavelength?: string;
|
|
}
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
|
const t = text.toUpperCase();
|
|
const patterns: [RegExp, string, number][] = [
|
|
[/\b120\s*KM\b/, "120km", 120000],
|
|
[/\b80\s*KM\b/, "80km", 80000],
|
|
[/\b70\s*KM\b/, "70km", 70000],
|
|
[/\b60\s*KM\b/, "60km", 60000],
|
|
[/\b40\s*KM\b/, "40km", 40000],
|
|
[/\b20\s*KM\b/, "20km", 20000],
|
|
[/\b10\s*KM\b/, "10km", 10000],
|
|
[/\b5\s*KM\b/, "5km", 5000],
|
|
[/\b2\s*KM\b/, "2km", 2000],
|
|
[/\b550\s*M\b/, "550m", 550],
|
|
[/\b500\s*M\b/, "500m", 500],
|
|
[/\b300\s*M\b/, "300m", 300],
|
|
[/\b220\s*M\b/, "220m", 220],
|
|
[/\b100\s*M\b/, "100m", 100],
|
|
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
|
[/\bER\b/, "40km", 40000], [/\bZR\b/, "80km", 80000],
|
|
[/\bSR\b/, "300m", 300], [/\bDR\b/, "500m", 500], [/\bFR\b/, "2km", 2000],
|
|
];
|
|
for (const [regex, label, meters] of patterns) {
|
|
if (regex.test(t)) return { label, meters };
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
function detectFiber(text: string): string {
|
|
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
|
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
|
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
|
return "SMF";
|
|
}
|
|
|
|
function detectWavelength(text: string): string {
|
|
const match = text.match(/(\d{3,4})\s*nm/i);
|
|
return match ? match[1] : "";
|
|
}
|
|
|
|
/** Fetch category ID from a sub-page's data-cid attribute (fallback for categoryId=0 entries) */
|
|
async function fetchCategoryId(slug: string): Promise<number> {
|
|
try {
|
|
const resp = await fetch(BASE + slug, {
|
|
headers: { ...HEADERS, "X-Requested-With": "" },
|
|
signal: AbortSignal.timeout(15000),
|
|
});
|
|
if (!resp.ok) return 0;
|
|
const html = await resp.text();
|
|
const m = html.match(/data-cid="(\d+)"/);
|
|
return m ? parseInt(m[1]) : 0;
|
|
} catch {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/** Fetch product table HTML for a category via the JSON API */
|
|
async function fetchProductTable(categoryId: number, slug: string): Promise<string> {
|
|
const url = `${BASE}/product-list?is_render=1&category_id=${categoryId}`;
|
|
const resp = await fetch(url, {
|
|
headers: { ...HEADERS, Referer: BASE + slug },
|
|
signal: AbortSignal.timeout(30000),
|
|
});
|
|
if (!resp.ok) throw new Error(`HTTP ${resp.status} for /product-list?category_id=${categoryId}`);
|
|
const data = await resp.json() as { product_table_list?: string };
|
|
return data.product_table_list ?? "";
|
|
}
|
|
|
|
/** Parse HTML table returned by /product-list API */
|
|
function parseProductTable(
|
|
tableHtml: string,
|
|
cat: typeof CATEGORIES[number],
|
|
): Product[] {
|
|
const $ = cheerio.load(tableHtml);
|
|
const products: Product[] = [];
|
|
|
|
$("tr").each((_i, row) => {
|
|
const cells = $(row).find("td");
|
|
if (cells.length < 3) return;
|
|
|
|
// Column layout: [image] [part_number] [description] [data_rate] [distance] [wavelength] [connector] [datasheet] [quote]
|
|
const partNumberCell = $(cells[1]);
|
|
const descCell = $(cells[2]);
|
|
|
|
const rawPart = partNumberCell.text().trim();
|
|
const desc = descCell.text().trim();
|
|
|
|
// Skip header rows and non-product rows
|
|
if (!rawPart || rawPart.length < 3 || /part\s*no|description/i.test(rawPart)) return;
|
|
if (rawPart.length > 80) return;
|
|
if (
|
|
rawPart.startsWith("--") ||
|
|
/^(Distance|Optical-Transceivers|Coherent-Transceivers|Fiber-Channel-Transceivers|LPO-Transceivers|Liquid-Cooling-Transceivers|PON-Transceivers|XGSPON)$/i.test(rawPart) ||
|
|
/Transceivers$/i.test(rawPart) ||
|
|
/^(QSFP112|QSFP112-DD|QSFP-DD800|QSFP28-DD|QSFP56-DD|SFP-DD|SFP112|SFP56-DD|OSFP224|OSFP-XD)$/i.test(rawPart)
|
|
) return;
|
|
|
|
const url = (() => {
|
|
const a = partNumberCell.find("a[href]").first().attr("href") ?? descCell.find("a[href]").first().attr("href");
|
|
if (!a) return BASE + cat.slug;
|
|
return a.startsWith("http") ? a : BASE + a;
|
|
})();
|
|
|
|
const combined = `${rawPart} ${desc}`;
|
|
const reach = detectReach(combined);
|
|
const rawImg = $(cells[0]).find("img").first().attr("src") || $(cells[0]).find("img").first().attr("data-src");
|
|
const imageUrl = rawImg && !/(logo|placeholder|default|no-image|icon|sprite)/i.test(rawImg)
|
|
? (rawImg.startsWith("http") ? rawImg : BASE + rawImg)
|
|
: undefined;
|
|
|
|
products.push({
|
|
partNumber: rawPart,
|
|
name: desc || rawPart,
|
|
url,
|
|
imageUrl,
|
|
formFactor: cat.formFactor,
|
|
speed: cat.speed,
|
|
speedGbps: cat.speedGbps,
|
|
reachLabel: reach?.label,
|
|
reachMeters: reach?.meters,
|
|
fiberType: detectFiber(combined),
|
|
wavelength: detectWavelength(combined),
|
|
});
|
|
});
|
|
|
|
return products;
|
|
}
|
|
|
|
export async function scrapeAscentOptics(): Promise<void> {
|
|
console.log("=== Ascent Optics Scraper Starting ===\n");
|
|
|
|
const vendorId = await ensureVendor(
|
|
"Ascent Optics",
|
|
"compatible",
|
|
"https://ascentoptics.com",
|
|
BASE + "/optical-transceivers/",
|
|
);
|
|
|
|
let totalProducts = 0;
|
|
|
|
for (const cat of CATEGORIES) {
|
|
let cid = cat.categoryId;
|
|
|
|
// Resolve unknown category IDs dynamically
|
|
if (cid === 0) {
|
|
cid = await fetchCategoryId(cat.slug);
|
|
if (cid === 0) {
|
|
console.log(` Skipping ${cat.slug} — category ID not found`);
|
|
await sleep(1000);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [cid=${cid}] ---`);
|
|
|
|
try {
|
|
const tableHtml = await fetchProductTable(cid, cat.slug);
|
|
const products = parseProductTable(tableHtml, cat);
|
|
console.log(` Found ${products.length} products`);
|
|
|
|
for (const product of products) {
|
|
try {
|
|
await findOrCreateScrapedTransceiver({
|
|
partNumber: product.partNumber,
|
|
vendorId,
|
|
productUrl: product.url,
|
|
formFactor: product.formFactor,
|
|
speedGbps: product.speedGbps,
|
|
speed: product.speed,
|
|
reachMeters: product.reachMeters,
|
|
reachLabel: product.reachLabel,
|
|
fiberType: product.fiberType,
|
|
wavelengths: product.wavelength,
|
|
category: "DataCenter",
|
|
imageUrl: product.imageUrl,
|
|
});
|
|
totalProducts++;
|
|
} catch (err) {
|
|
console.warn(` Error: ${(err as Error).message.slice(0, 80)}`);
|
|
}
|
|
}
|
|
} catch (err) {
|
|
console.error(` Category failed: ${(err as Error).message}`);
|
|
}
|
|
|
|
await sleep(2000);
|
|
}
|
|
|
|
console.log(`\n=== Ascent Optics Complete: ${totalProducts} products (catalog only — no pricing) ===`);
|
|
}
|
|
|
|
if (require.main === module) {
|
|
scrapeAscentOptics()
|
|
.then(() => pool.end())
|
|
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
|
}
|