FiberMall: - Correct /store-XXXXX-name.htm category URLs (was /c/xxx/ → HTTP 404) - Parser: split on new_proList_mainListLi, price from data-price on currency_price span — fix 0.00 false-match from SKU variant items - Also scrape SKU brand variant links from .sku_item divs - Result: 3,410 prices now in DB (was 0) Flexoptix: - Fix extractPrice regex for EUR thousand-separator format (2,921.60 EUR was parsed as 2 EUR) - Add OSFP224 / 1.6T search queries (4 new, form factor was missing) - Fix O.138HG2.C.05 stale price 3009.60→2921.60 EUR Schema: competitor_verified + competitor_verified_at columns added via ALTER TABLE (were referenced in code but missing in DB) CHANGELOG: added 6 entries for 2026-04-12
601 lines
25 KiB
TypeScript
601 lines
25 KiB
TypeScript
/**
|
|
* Flexoptix Product Catalog Scraper
|
|
*
|
|
* Scrapes flexoptix.net product catalog for transceiver specs and pricing.
|
|
* This is our own data — no restrictions.
|
|
*
|
|
* Strategy: Use the Magento search/suggest AJAX API which returns JSON
|
|
* with product names, URLs, prices, and SKUs. We query by form factor
|
|
* keywords to enumerate the full catalog.
|
|
*
|
|
* Rate limited: 1 req/sec.
|
|
*/
|
|
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
|
import { contentHash } from "../utils/hash";
|
|
|
|
const BASE = "https://www.flexoptix.net";
|
|
const SEARCH_URL = `${BASE}/en/search/ajax/suggest/`;
|
|
const HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; internal-flexoptix)",
|
|
Accept: "application/json, text/html",
|
|
};
|
|
|
|
// Search queries that cover the full transceiver catalog
|
|
const SEARCH_QUERIES = [
|
|
// By form factor
|
|
{ query: "SFP 1G", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "SFP BiDi", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "SFP CWDM", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "SFP DWDM", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "SFP copper", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "SFP+ 10G", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ SR", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ LR", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ ER", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ ZR", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ BiDi", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ CWDM", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ DWDM", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ DAC", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ AOC", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "25G SFP28", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
|
{ query: "SFP28 SR", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
|
{ query: "SFP28 LR", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
|
{ query: "SFP28 DWDM", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
|
{ query: "SFP28 DAC", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
|
{ query: "SFP28 AOC", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
|
{ query: "QSFP+ 40G", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
|
{ query: "QSFP+ SR4", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
|
{ query: "QSFP+ LR4", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
|
{ query: "QSFP+ DAC", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
|
{ query: "QSFP+ AOC", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
|
{ query: "QSFP28 100G", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 SR4", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 LR4", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 ER4", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 CWDM4", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 PSM4", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 DAC", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 AOC", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP56 200G", formFactor: "QSFP56", speed: "200G", speedGbps: 200 },
|
|
{ query: "QSFP-DD 400G", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD DR4", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD FR4", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD LR4", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD SR4", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD ZR", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD800 800G", formFactor: "QSFP-DD800", speed: "800G", speedGbps: 800 },
|
|
{ query: "OSFP 400G", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP SR4", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP DR4", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP FR4", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP LR4", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP ZR", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP 800G", formFactor: "OSFP", speed: "800G", speedGbps: 800 },
|
|
{ query: "OSFP224 1.6T", formFactor: "OSFP224", speed: "1.6T", speedGbps: 1600 },
|
|
{ query: "OSFP224", formFactor: "OSFP224", speed: "1.6T", speedGbps: 1600 },
|
|
{ query: "1.6T DR4", formFactor: "OSFP224", speed: "1.6T", speedGbps: 1600 },
|
|
{ query: "1.6T transceiver", formFactor: "OSFP224", speed: "1.6T", speedGbps: 1600 },
|
|
// Additional granular queries for maximum coverage
|
|
{ query: "SFP+ copper", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ 10GBASE-T", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP+ tunable", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "SFP RJ45", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "SFP 100M", formFactor: "SFP", speed: "100M", speedGbps: 0.1 },
|
|
{ query: "SFP 100BASE", formFactor: "SFP", speed: "100M", speedGbps: 0.1 },
|
|
{ query: "QSFP28 CWDM", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 ZR4", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 FR", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 DR", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP28 copper", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "QSFP-DD DAC", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD AOC", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "QSFP-DD 200G", formFactor: "QSFP-DD", speed: "200G", speedGbps: 200 },
|
|
{ query: "OSFP DAC", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP AOC", formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
|
{ query: "XFP 10G", formFactor: "XFP", speed: "10G", speedGbps: 10 },
|
|
{ query: "XFP SR", formFactor: "XFP", speed: "10G", speedGbps: 10 },
|
|
{ query: "XFP LR", formFactor: "XFP", speed: "10G", speedGbps: 10 },
|
|
{ query: "CFP2 100G", formFactor: "CFP2", speed: "100G", speedGbps: 100 },
|
|
{ query: "CFP2 DCO", formFactor: "CFP2-DCO", speed: "200G", speedGbps: 200 },
|
|
{ query: "GBIC 1G", formFactor: "GBIC", speed: "1G", speedGbps: 1 },
|
|
{ query: "SFP56 50G", formFactor: "SFP56", speed: "50G", speedGbps: 50 },
|
|
{ query: "QSFP56 DAC", formFactor: "QSFP56", speed: "200G", speedGbps: 200 },
|
|
{ query: "QSFP112 400G", formFactor: "QSFP112", speed: "400G", speedGbps: 400 },
|
|
{ query: "OSFP112 800G", formFactor: "OSFP112", speed: "800G", speedGbps: 800 },
|
|
{ query: "direct attach cable", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "active optical cable", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "breakout cable", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "MTP MPO cable", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
// Speed-specific reach variants
|
|
{ query: "10G 80km", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "10G 40km", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "10G 20km", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "100G 80km", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "100G 40km", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "400G 80km", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "400G 120km", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "400G 2km", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "800G 2km", formFactor: "OSFP", speed: "800G", speedGbps: 800 },
|
|
{ query: "800G 10km", formFactor: "OSFP", speed: "800G", speedGbps: 800 },
|
|
// Vendor-specific coding searches
|
|
{ query: "Cisco compatible", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "Juniper compatible", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "Arista compatible", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
|
{ query: "Nokia compatible", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "Huawei compatible", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
// Generic searches to catch stragglers
|
|
{ query: "transceiver SR", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "transceiver LR", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "transceiver ER", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "transceiver ZR", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "transceiver BiDi", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "coherent 400ZR", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "coherent ZR+", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
|
{ query: "coherent 100G", formFactor: "CFP2-DCO", speed: "100G", speedGbps: 100 },
|
|
{ query: "DWDM tunable", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
|
{ query: "WDM multiplexer", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
{ query: "media converter", formFactor: "SFP", speed: "1G", speedGbps: 1 },
|
|
];
|
|
|
|
interface Product {
|
|
name: string;
|
|
partNumber: string;
|
|
url: string;
|
|
price?: number;
|
|
currency?: string;
|
|
formFactor: string;
|
|
speed: string;
|
|
speedGbps: number;
|
|
reachLabel?: string;
|
|
reachMeters?: number;
|
|
fiberType?: string;
|
|
wavelength?: string;
|
|
imageUrl?: string;
|
|
}
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
|
const patterns: [RegExp, string, number][] = [
|
|
[/\b120\s*km\b/i, "120km", 120000],
|
|
[/\b80\s*km\b/i, "80km", 80000],
|
|
[/\b40\s*km\b/i, "40km", 40000],
|
|
[/\b20\s*km\b/i, "20km", 20000],
|
|
[/\b10\s*km\b/i, "10km", 10000],
|
|
[/\b2\s*km\b/i, "2km", 2000],
|
|
[/\b500\s*m\b/i, "500m", 500],
|
|
[/\b300\s*m\b/i, "300m", 300],
|
|
[/\b100\s*m\b/i, "100m", 100],
|
|
[/\bLR4\b/, "10km", 10000],
|
|
[/\bLR\b/, "10km", 10000],
|
|
[/\bER4?\b/, "40km", 40000],
|
|
[/\bZR4?\b/, "80km", 80000],
|
|
[/\bSR4?\b/, "100m", 100],
|
|
[/\bDR4?\b/, "500m", 500],
|
|
[/\bFR4?\b/, "2km", 2000],
|
|
[/\bCWDM4\b/i, "2km", 2000],
|
|
[/\bPSM4\b/i, "500m", 500],
|
|
];
|
|
for (const [regex, label, meters] of patterns) {
|
|
if (regex.test(text)) return { label, meters };
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
function detectFiber(text: string): string {
|
|
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
|
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
|
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
|
if (/aoc|active optical/i.test(text)) return "AOC";
|
|
return "";
|
|
}
|
|
|
|
function detectWavelength(text: string): string {
|
|
const match = text.match(/(\d{3,4})\s*nm/i);
|
|
if (match) return match[1];
|
|
return "";
|
|
}
|
|
|
|
function inferFormFactor(name: string, defaultFF: string): string {
|
|
const lower = name.toLowerCase();
|
|
if (lower.includes("osfp224")) return "OSFP224";
|
|
if (lower.includes("osfp112")) return "OSFP112";
|
|
if (lower.includes("osfp") && !lower.includes("qsfp")) return "OSFP";
|
|
if (lower.includes("qsfp-dd800")) return "QSFP-DD800";
|
|
if (lower.includes("qsfp-dd")) return "QSFP-DD";
|
|
if (lower.includes("qsfp112")) return "QSFP112";
|
|
if (lower.includes("qsfp56")) return "QSFP56";
|
|
if (lower.includes("qsfp28")) return "QSFP28";
|
|
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return "QSFP+";
|
|
if (lower.includes("sfp56")) return "SFP56";
|
|
if (lower.includes("sfp28")) return "SFP28";
|
|
if (lower.includes("sfp+") || lower.includes("sfp plus")) return "SFP+";
|
|
if (lower.includes("cfp2")) return "CFP2";
|
|
if (lower.includes("xfp")) return "XFP";
|
|
if (/\bsfp\b/i.test(lower) && !lower.includes("qsfp")) return "SFP";
|
|
return defaultFF;
|
|
}
|
|
|
|
function inferSpeed(name: string, defaultGbps: number): number {
|
|
const patterns: [RegExp, number][] = [
|
|
[/\b1\.6\s*T\b/i, 1600],
|
|
[/\b800\s*G\b/i, 800],
|
|
[/\b400\s*G\b/i, 400],
|
|
[/\b200\s*G\b/i, 200],
|
|
[/\b100\s*G\b/i, 100],
|
|
[/\b50\s*G\b/i, 50],
|
|
[/\b40\s*G\b/i, 40],
|
|
[/\b25\s*G\b/i, 25],
|
|
[/\b10\s*G\b/i, 10],
|
|
[/\b1\s*G\b/i, 1],
|
|
];
|
|
for (const [regex, gbps] of patterns) {
|
|
if (regex.test(name)) return gbps;
|
|
}
|
|
return defaultGbps;
|
|
}
|
|
|
|
function speedLabel(gbps: number): string {
|
|
if (gbps >= 1000) return `${gbps / 1000}T`;
|
|
return `${gbps}G`;
|
|
}
|
|
|
|
interface SearchResult {
|
|
title: string;
|
|
url: string;
|
|
price?: string;
|
|
sku?: string;
|
|
}
|
|
|
|
async function searchProducts(query: string): Promise<SearchResult[]> {
|
|
const url = `${SEARCH_URL}?q=${encodeURIComponent(query)}`;
|
|
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(15000) });
|
|
if (!resp.ok) return [];
|
|
|
|
const text = await resp.text();
|
|
|
|
// The response may be JSON or HTML with embedded product data
|
|
// Try JSON parse first
|
|
try {
|
|
const data = JSON.parse(text);
|
|
const results: SearchResult[] = [];
|
|
|
|
/** Extract price from Magento price HTML (data-price-amount="39.64") or plain number */
|
|
function extractPrice(priceField: unknown): string | undefined {
|
|
if (!priceField) return undefined;
|
|
const s = String(priceField);
|
|
// Try data-price-amount attribute first (Magento Hyva theme)
|
|
const attrMatch = s.match(/data-price-amount="([\d.]+)"/);
|
|
if (attrMatch) return attrMatch[1];
|
|
// Try plain price text like "2,921.60 EUR" or "39.64 EUR"
|
|
// IMPORTANT: must include comma in char class to handle thousand separators
|
|
const textMatch = s.match(/([\d,]+\.?\d*)\s*EUR/i);
|
|
if (textMatch) return textMatch[1].replace(/,/g, "");
|
|
// Try bare number (strip thousand-separator commas first)
|
|
const cleaned = s.replace(/,/g, "");
|
|
const num = parseFloat(cleaned);
|
|
if (!isNaN(num) && num > 0) return String(num);
|
|
return undefined;
|
|
}
|
|
|
|
// Handle various Magento search response formats
|
|
if (Array.isArray(data)) {
|
|
for (const item of data) {
|
|
if (item.title && item.url) {
|
|
results.push({
|
|
title: item.title,
|
|
url: item.url,
|
|
price: extractPrice(item.price),
|
|
sku: item.sku,
|
|
});
|
|
}
|
|
}
|
|
} else if (data.products && Array.isArray(data.products)) {
|
|
for (const item of data.products) {
|
|
results.push({
|
|
title: item.title || item.name || "",
|
|
url: item.url || item.product_url || "",
|
|
price: extractPrice(item.price),
|
|
sku: item.sku,
|
|
});
|
|
}
|
|
} else if (typeof data === "object") {
|
|
// Iterate over all keys looking for product arrays
|
|
for (const key of Object.keys(data)) {
|
|
const val = data[key];
|
|
if (Array.isArray(val)) {
|
|
for (const item of val) {
|
|
if (item && typeof item === "object" && (item.title || item.name) && item.url) {
|
|
results.push({
|
|
title: item.title || item.name,
|
|
url: item.url,
|
|
price: extractPrice(item.price),
|
|
sku: item.sku,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return results;
|
|
} catch {
|
|
// Not JSON — parse as HTML
|
|
const results: SearchResult[] = [];
|
|
const linkRegex = /href="([^"]*\.html)"[^>]*>([^<]{3,})<\/a>/gi;
|
|
let match;
|
|
while ((match = linkRegex.exec(text)) !== null) {
|
|
const pUrl = match[1];
|
|
const title = match[2].trim();
|
|
if (title.length < 5) continue;
|
|
|
|
// Look for price near this match
|
|
const context = text.slice(match.index, match.index + 500);
|
|
const priceMatch = context.match(/(?:€|EUR)\s*([\d.,]+)/i) || context.match(/([\d.,]+)\s*(?:€|EUR)/i);
|
|
|
|
results.push({
|
|
title,
|
|
url: pUrl.startsWith("http") ? pUrl : BASE + pUrl,
|
|
price: priceMatch ? priceMatch[1].replace(",", ".") : undefined,
|
|
});
|
|
}
|
|
return results;
|
|
}
|
|
}
|
|
|
|
export async function scrapeFlexoptixCatalog(): Promise<void> {
|
|
console.log("=== Flexoptix Catalog Scraper Starting ===\n");
|
|
|
|
const vendorId = await ensureVendor("Flexoptix", "reseller", "https://www.flexoptix.net", "https://www.flexoptix.net/en/");
|
|
|
|
const allProducts = new Map<string, Product>();
|
|
let priceUpdates = 0;
|
|
|
|
for (const sq of SEARCH_QUERIES) {
|
|
console.log(` Searching: "${sq.query}"`);
|
|
|
|
try {
|
|
const results = await searchProducts(sq.query);
|
|
let newCount = 0;
|
|
|
|
for (const r of results) {
|
|
// Skip non-product results
|
|
if (!r.url || !r.title) continue;
|
|
const key = r.url;
|
|
if (allProducts.has(key)) continue;
|
|
|
|
const name = r.title;
|
|
const formFactor = inferFormFactor(name, sq.formFactor);
|
|
const gbps = inferSpeed(name, sq.speedGbps);
|
|
const reach = detectReach(name);
|
|
const price = r.price ? parseFloat(r.price.replace(",", ".")) : undefined;
|
|
|
|
allProducts.set(key, {
|
|
name,
|
|
partNumber: r.sku || name.replace(/\s+/g, "-").slice(0, 80),
|
|
url: r.url.startsWith("http") ? r.url : BASE + r.url,
|
|
price: price && price > 0 && price < 100000 ? price : undefined,
|
|
currency: price ? "EUR" : undefined,
|
|
formFactor,
|
|
speed: speedLabel(gbps),
|
|
speedGbps: gbps,
|
|
reachLabel: reach?.label,
|
|
reachMeters: reach?.meters,
|
|
fiberType: detectFiber(name),
|
|
wavelength: detectWavelength(name),
|
|
});
|
|
newCount++;
|
|
}
|
|
|
|
if (newCount > 0) console.log(` +${newCount} new (${results.length} results)`);
|
|
} catch (err) {
|
|
console.warn(` Search failed: ${(err as Error).message.slice(0, 60)}`);
|
|
}
|
|
|
|
await sleep(1000);
|
|
}
|
|
|
|
// ── Phase 2: GraphQL full catalog enumeration ──
|
|
console.log("\n--- Phase 2: GraphQL Catalog Enumeration ---\n");
|
|
|
|
const GRAPHQL_URL = `${BASE}/graphql`;
|
|
const GRAPHQL_QUERIES = [
|
|
{ search: "1G SFP", defaultFF: "SFP", defaultGbps: 1 },
|
|
{ search: "SFP LX", defaultFF: "SFP", defaultGbps: 1 },
|
|
{ search: "SFP SX", defaultFF: "SFP", defaultGbps: 1 },
|
|
{ search: "SFP ZX", defaultFF: "SFP", defaultGbps: 1 },
|
|
{ search: "SFP+", defaultFF: "SFP+", defaultGbps: 10 },
|
|
{ search: "SFP28", defaultFF: "SFP28", defaultGbps: 25 },
|
|
{ search: "QSFP+", defaultFF: "QSFP+", defaultGbps: 40 },
|
|
{ search: "QSFP28", defaultFF: "QSFP28", defaultGbps: 100 },
|
|
{ search: "QSFP-DD", defaultFF: "QSFP-DD", defaultGbps: 400 },
|
|
{ search: "QSFP56", defaultFF: "QSFP56", defaultGbps: 200 },
|
|
{ search: "OSFP", defaultFF: "OSFP", defaultGbps: 400 },
|
|
{ search: "XFP", defaultFF: "XFP", defaultGbps: 10 },
|
|
{ search: "CFP2", defaultFF: "CFP2", defaultGbps: 100 },
|
|
{ search: "GBIC", defaultFF: "GBIC", defaultGbps: 1 },
|
|
{ search: "SFP56", defaultFF: "SFP56", defaultGbps: 50 },
|
|
{ search: "DAC", defaultFF: "SFP+", defaultGbps: 10 },
|
|
{ search: "AOC", defaultFF: "SFP+", defaultGbps: 10 },
|
|
{ search: "AEC", defaultFF: "OSFP", defaultGbps: 800 },
|
|
{ search: "breakout", defaultFF: "QSFP28", defaultGbps: 100 },
|
|
{ search: "BiDi", defaultFF: "SFP", defaultGbps: 1 },
|
|
{ search: "CWDM", defaultFF: "SFP", defaultGbps: 1 },
|
|
{ search: "DWDM", defaultFF: "SFP", defaultGbps: 1 },
|
|
{ search: "coherent", defaultFF: "QSFP-DD", defaultGbps: 400 },
|
|
{ search: "800G", defaultFF: "OSFP", defaultGbps: 800 },
|
|
{ search: "1.6T", defaultFF: "OSFP", defaultGbps: 1600 },
|
|
];
|
|
|
|
for (const gq of GRAPHQL_QUERIES) {
|
|
let page = 1;
|
|
const pageSize = 20;
|
|
let totalFetched = 0;
|
|
|
|
while (true) {
|
|
try {
|
|
const query = `{
|
|
products(search: "${gq.search}", pageSize: ${pageSize}, currentPage: ${page}) {
|
|
total_count
|
|
items {
|
|
name
|
|
sku
|
|
url_key
|
|
small_image { url }
|
|
price_range {
|
|
minimum_price {
|
|
final_price { value currency }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}`;
|
|
|
|
const resp = await fetch(GRAPHQL_URL, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", ...HEADERS },
|
|
body: JSON.stringify({ query }),
|
|
signal: AbortSignal.timeout(20000),
|
|
});
|
|
|
|
if (!resp.ok) break;
|
|
const data = await resp.json() as {
|
|
data?: {
|
|
products?: {
|
|
total_count: number;
|
|
items: Array<{
|
|
name: string;
|
|
sku: string;
|
|
url_key: string;
|
|
small_image?: { url?: string };
|
|
price_range?: {
|
|
minimum_price?: {
|
|
final_price?: { value: number; currency: string };
|
|
};
|
|
};
|
|
}>;
|
|
};
|
|
};
|
|
};
|
|
|
|
const products = data.data?.products;
|
|
if (!products || products.items.length === 0) break;
|
|
|
|
let newCount = 0;
|
|
for (const item of products.items) {
|
|
if (!item.name || !item.sku) continue;
|
|
|
|
// Skip non-transceiver products (trays, tools, accessories)
|
|
const lower = item.name.toLowerCase();
|
|
if (lower.includes("tray") || lower.includes("tool") || lower.includes("loopback")
|
|
|| lower.includes("cleaning") || lower.includes("sticker") || lower.includes("flexbox")
|
|
|| lower.includes("adapter") || lower.includes("attenuator") || lower.includes("coupler")) continue;
|
|
|
|
const url = `${BASE}/en/${item.url_key}.html`;
|
|
|
|
const formFactor = inferFormFactor(item.name, gq.defaultFF);
|
|
const gbps = inferSpeed(item.name, gq.defaultGbps);
|
|
const reach = detectReach(item.name);
|
|
const price = item.price_range?.minimum_price?.final_price?.value;
|
|
const validPrice = price && price > 0 && price < 100000 ? price : undefined;
|
|
|
|
const rawImg = item.small_image?.url;
|
|
const imageUrl = rawImg && !rawImg.includes("placeholder") ? rawImg : undefined;
|
|
|
|
// Strip the vendor-compatibility suffix (e.g. ":Sx", ":Ci", ":Ju") from SKU
|
|
// The base SKU (before ":") is the canonical FLEXOPTIX part number
|
|
const baseSku = item.sku.includes(":") ? item.sku.split(":")[0] : item.sku;
|
|
|
|
// If URL already in map (added by Phase 1 HTML scraper), enrich with GraphQL price/image
|
|
if (allProducts.has(url)) {
|
|
const existing = allProducts.get(url)!;
|
|
if (!existing.price && validPrice) existing.price = validPrice;
|
|
if (!existing.imageUrl && imageUrl) existing.imageUrl = imageUrl;
|
|
if (!existing.partNumber || existing.partNumber.length < baseSku.length) existing.partNumber = baseSku;
|
|
continue;
|
|
}
|
|
|
|
allProducts.set(url, {
|
|
name: item.name,
|
|
partNumber: baseSku,
|
|
url,
|
|
price: validPrice,
|
|
currency: item.price_range?.minimum_price?.final_price?.currency || "EUR",
|
|
formFactor,
|
|
speed: speedLabel(gbps),
|
|
speedGbps: gbps,
|
|
reachLabel: reach?.label,
|
|
reachMeters: reach?.meters,
|
|
fiberType: detectFiber(item.name),
|
|
wavelength: detectWavelength(item.name),
|
|
imageUrl,
|
|
});
|
|
newCount++;
|
|
}
|
|
|
|
totalFetched += products.items.length;
|
|
if (newCount > 0) console.log(` GraphQL "${gq.search}" p${page}: +${newCount} new (${products.items.length} items, ${products.total_count} total)`);
|
|
|
|
// Stop if we've fetched all pages (no artificial cap — let the API determine the limit)
|
|
if (totalFetched >= products.total_count || page >= 50) break;
|
|
page++;
|
|
await sleep(800);
|
|
} catch (err) {
|
|
console.warn(` GraphQL "${gq.search}" p${page} failed: ${(err as Error).message.slice(0, 60)}`);
|
|
break;
|
|
}
|
|
}
|
|
|
|
await sleep(500);
|
|
}
|
|
|
|
console.log(`\nTotal unique products after GraphQL: ${allProducts.size}`);
|
|
console.log("Writing to database...\n");
|
|
|
|
// Write all products to DB
|
|
for (const product of allProducts.values()) {
|
|
try {
|
|
const txId = await findOrCreateScrapedTransceiver({
|
|
partNumber: product.partNumber,
|
|
vendorId,
|
|
formFactor: product.formFactor,
|
|
speedGbps: product.speedGbps,
|
|
speed: product.speed,
|
|
reachMeters: product.reachMeters,
|
|
reachLabel: product.reachLabel,
|
|
fiberType: product.fiberType,
|
|
wavelengths: product.wavelength,
|
|
category: "DataCenter",
|
|
imageUrl: product.imageUrl,
|
|
});
|
|
|
|
if (product.price && product.price > 0) {
|
|
const hash = contentHash({ price: product.price, part: product.partNumber });
|
|
const updated = await upsertPriceObservation({
|
|
transceiverId: txId,
|
|
sourceVendorId: vendorId,
|
|
price: product.price,
|
|
currency: product.currency || "EUR",
|
|
stockLevel: "in_stock",
|
|
url: product.url,
|
|
contentHash: hash,
|
|
});
|
|
if (updated) priceUpdates++;
|
|
}
|
|
} catch (err) {
|
|
console.warn(` DB error: ${(err as Error).message.slice(0, 80)}`);
|
|
}
|
|
}
|
|
|
|
console.log(`\n=== Flexoptix Catalog Complete: ${allProducts.size} products, ${priceUpdates} prices ===`);
|
|
}
|
|
|
|
if (require.main === module) {
|
|
scrapeFlexoptixCatalog()
|
|
.then(() => pool.end())
|
|
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
|
}
|