transceiver-db/packages/scraper/src/robots/flexoptix-api-sync.ts
Rene Fichtmueller a20094755d feat(scraper): Flexoptix REST API sync robot + scheduler integration
Replaces the GraphQL/search-based Flexoptix scraper with a proper
Magento 2 REST API integration that delivers authoritative SKUs,
prices, stock levels and compatibility data.

New files:
- packages/scraper/src/robots/flexoptix-api-sync.ts
  Self-contained robot: auth → paginated fetch → normalize → DB write.
  Reads FLEXOPTIX_API_BASE_URL / _USERNAME / _PASSWORD from env.
  Returns { fetched, normalized, skipped, priceWrites, stockWrites }.
  No file intermediary — in-memory pipeline.

- scripts/import-flexoptix-catalog.ts
  One-shot CLI importer for the Pulso-generated JSONL (Codex handover).

- docs/FLEXOPTIX_CATALOG_IMPORT.md
  Runbook for manual import + per-SKU specifications enrichment.

Scheduler changes:
- Added sync:flexoptix-catalog queue + work() handler
- Scheduled every 2h at 0 */2 * * * (same cadence as legacy job)
- scrape:pricing:flexoptix kept as legacy GraphQL fallback

Also includes Codex-generated additions from this sprint:
- audiocodes-oem scraper, seed-batch35/36/37, db.ts improvements,
  sql/102 verification reconcile, README + package.json updates
2026-05-13 16:36:33 +02:00

532 lines
20 KiB
TypeScript

/**
* Flexoptix API Sync Robot
*
* Fetches the full Flexoptix product catalog via their Magento 2 REST API,
* normalizes each product, and writes price + stock observations to TIP DB.
*
* Replaces the GraphQL/search-based scrapeFlexoptixCatalog() — the REST API
* provides authoritative SKUs, prices, stock levels, and compatibility data.
*
* Required env vars:
* FLEXOPTIX_API_BASE_URL e.g. https://www.flexoptix.net
* FLEXOPTIX_API_USERNAME Magento customer API username
* FLEXOPTIX_API_PASSWORD Magento customer API password
*
* Optional:
* FLEXOPTIX_API_TOKEN Bearer token (skips username/password login)
* FLEXOPTIX_API_CURRENCY Defaults to EUR
* FLEXOPTIX_API_LIMIT Page size, defaults to 500
* FLEXOPTIX_API_TIMEOUT_MS Request timeout, defaults to 30000
* FLEXOPTIX_API_COMPATIBILITIES 1/0, defaults to 1
*/
import {
ensureVendor,
findOrCreateScrapedTransceiver,
upsertPriceObservation,
upsertStockObservation,
} from "../utils/db";
import { contentHash } from "../utils/hash";
// ── Types ──────────────────────────────────────────────────────────────────
type JsonRecord = Record<string, unknown>;
interface CatalogProduct {
source: "flexoptix-shop-api";
fetchedAt: string;
sku: string;
title: string;
url: string | null;
price: {
amount: number | null;
currency: string | null;
source: "api" | "missing";
fetchedAt: string;
};
stock: {
status: string | null;
quantity: number | null;
source: "api" | "missing";
};
optics: {
formFactor: string | null;
speedGbps: number | null;
reachM: number | null;
wavelengthNm: number | null;
connector: string | null;
fiberType: string | null;
protocol: string | null;
coding: string | null;
bidi: boolean | null;
dwdm: boolean | null;
cwdm: boolean | null;
};
compatibility: Array<{
vendor: string;
platform: string | null;
coding: string | null;
source: "api";
}>;
}
export interface FlexoptixSyncResult {
fetched: number;
normalized: number;
skipped: number;
priceWrites: number;
stockWrites: number;
}
// ── Generic helpers ────────────────────────────────────────────────────────
function isRecord(value: unknown): value is JsonRecord {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function flatten(value: unknown, prefix = "", output: JsonRecord = {}): JsonRecord {
if (Array.isArray(value)) {
output[prefix] = value;
return output;
}
if (!isRecord(value)) {
output[prefix] = value;
return output;
}
for (const [key, child] of Object.entries(value)) {
const childKey = prefix ? `${prefix}.${key}` : key;
if (isRecord(child)) {
flatten(child, childKey, output);
} else {
output[childKey] = child;
}
}
return output;
}
function flatLookup(record: JsonRecord): JsonRecord {
const flat = flatten(record);
const normalized: JsonRecord = {};
for (const [key, value] of Object.entries(flat)) {
normalized[key.toLowerCase().replace(/[_\s-]+/g, "")] = value;
}
return normalized;
}
function pick(flat: JsonRecord, keys: string[]): unknown {
const normalizedKeys = keys.map(key => key.toLowerCase().replace(/[_\s-]+/g, ""));
for (const key of normalizedKeys) {
if (flat[key] !== undefined && flat[key] !== null && flat[key] !== "") return flat[key];
}
for (const [flatKey, value] of Object.entries(flat)) {
if (value === undefined || value === null || value === "") continue;
if (normalizedKeys.some(key => flatKey.endsWith(key))) return value;
}
return null;
}
function asString(value: unknown): string | null {
if (typeof value === "string") return value.trim() || null;
if (typeof value === "number" || typeof value === "boolean") return String(value);
return null;
}
function asNumber(value: unknown): number | null {
if (typeof value === "number" && Number.isFinite(value)) return value;
if (typeof value !== "string") return null;
const trimmed = value.trim();
if (!trimmed) return null;
const stripped = trimmed.replace(/[^\d,.-]/g, "");
const normalized = stripped.includes(",") && !stripped.includes(".")
? stripped.replace(",", ".")
: stripped.replace(/,/g, "");
const parsed = Number(normalized);
return Number.isFinite(parsed) ? parsed : null;
}
function asBoolean(value: unknown): boolean | null {
if (typeof value === "boolean") return value;
if (typeof value !== "string") return null;
const normalized = value.trim().toLowerCase();
if (["true", "yes", "ja", "1"].includes(normalized)) return true;
if (["false", "no", "nein", "0"].includes(normalized)) return false;
return null;
}
// ── Inference helpers ──────────────────────────────────────────────────────
function inferBoolFlags(
...values: Array<string | null>
): { bidi: boolean | null; dwdm: boolean | null; cwdm: boolean | null } {
const text = values.filter(Boolean).join(" ").toLowerCase();
return {
bidi: /\bbidi\b|bi-directional|bidirectional|simplex/.test(text) ? true : null,
dwdm: /\bdwdm\b|itu channel|itu grid/.test(text) ? true : null,
cwdm: /\bcwdm\b/.test(text) ? true : null,
};
}
function parseReachMeters(value: unknown): number | null {
const text = asString(value)?.toLowerCase();
if (text) {
const km = text.match(/([\d.,]+)\s*km/);
if (km) return Math.round((asNumber(km[1]) ?? 0) * 1000);
const meters = text.match(/([\d.,]+)\s*m/);
if (meters) return Math.round(asNumber(meters[1]) ?? 0);
}
return asNumber(value);
}
function parseSpeedGbps(value: unknown): number | null {
const numeric = asNumber(value);
if (numeric !== null) return numeric;
const text = asString(value)?.toLowerCase();
if (!text) return null;
const gbps = text.match(/([\d.,]+)\s*(g|gb|gbps|gbit)/);
if (gbps) return asNumber(gbps[1]);
const mbps = text.match(/([\d.,]+)\s*(m|mb|mbps|mbit)/);
if (mbps) {
const mb = asNumber(mbps[1]);
return mb === null ? null : mb / 1000;
}
return null;
}
function inferFormFactor(...values: Array<string | null>): string | null {
const text = values.filter(Boolean).join(" ").toUpperCase();
const formFactors = [
"QSFP-DD800", "QSFP-DD", "QSFP112", "QSFP56", "QSFP28", "QSFP+", "QSFP",
"OSFP224", "OSFP112", "OSFP",
"SFP56", "SFP28", "SFP+", "SFP",
"XFP", "CFP4", "CFP2", "CFP", "GBIC",
];
return formFactors.find(ff => text.includes(ff)) ?? null;
}
function inferConnector(...values: Array<string | null>): string | null {
const text = values.filter(Boolean).join(" ").toUpperCase();
if (text.includes("LC-DUPLEX") || text.includes("LC DUPLEX")) return "LC-Duplex";
if (text.includes("LC SIMPLEX")) return "LC-Simplex";
if (/\bMPO\b|\bMTP\b/.test(text)) return "MPO/MTP";
if (/\bRJ45\b|COPPER/.test(text)) return "RJ45";
if (/\bSC\b/.test(text)) return "SC";
return null;
}
function inferFiberType(...values: Array<string | null>): string | null {
const text = values.filter(Boolean).join(" ").toLowerCase();
if (/multimode|\bmmf\b|om[1-5]/.test(text)) return "MMF";
if (/singlemode|single-mode|\bsmf\b|os2/.test(text)) return "SMF";
if (/copper|rj45|dac/.test(text)) return "Copper";
return null;
}
function inferWavelengthNm(...values: Array<string | null>): number | null {
const text = values.filter(Boolean).join(" ");
const lambda = text.match(/[λλ]?\s*(\d{3,4})\s*nm/i);
return lambda ? asNumber(lambda[1]) : null;
}
// ── Normalization ──────────────────────────────────────────────────────────
function extractCompatibility(row: JsonRecord): CatalogProduct["compatibility"] {
const rawCompat = row.compatibility ?? row.compatibilities ?? row.vendorCompatibility;
const rows = Array.isArray(rawCompat) ? rawCompat.filter(isRecord) : [];
return rows.flatMap(entry => {
const flat = flatLookup(entry);
const vendor = asString(pick(flat, ["vendor", "manufacturer", "brand", "systemVendor"]));
if (!vendor) return [];
return [{
vendor,
platform: asString(pick(flat, ["platform", "device", "switch", "model", "series"])),
coding: asString(pick(flat, ["coding", "code", "eeprom", "originalPartNumber"])),
source: "api" as const,
}];
});
}
function normalizeProduct(row: JsonRecord, fetchedAt: string): CatalogProduct | null {
const flat = flatLookup(row);
const title = asString(pick(flat, ["title", "name", "productName", "label"]));
const sku = asString(pick(flat, ["sku", "articleNumber", "partNumber", "productCode"]));
if (!sku || !title) return null;
const url = asString(pick(flat, ["url", "productUrl", "canonicalUrl", "link"]));
const amount = asNumber(pick(flat, ["price", "priceNet", "netPrice", "grossPrice", "amount"]));
const currency = asString(pick(flat, ["currency", "priceCurrency", "currencyCode"]))
?? (amount === null ? null : process.env["FLEXOPTIX_API_CURRENCY"]?.trim() ?? "EUR");
const quantity = asNumber(pick(flat, ["stock", "stockQuantity", "quantity", "availableQuantity"]));
const status = asString(pick(flat, ["stockStatus", "availability", "deliveryStatus"]))
?? (quantity === null ? null : quantity > 0 ? "in_stock" : "out_of_stock");
const formFactor = asString(pick(flat, ["formFactor", "form", "moduleType", "transceiverType"]))
?? inferFormFactor(title);
const protocol = asString(pick(flat, ["protocol", "standard", "ethernetStandard"]));
const connector = asString(pick(flat, ["connector", "interface", "mediaConnector"])) ?? inferConnector(title);
const fiberType = asString(pick(flat, ["fiberType", "fiber", "mode"])) ?? inferFiberType(title);
const coding = asString(pick(flat, ["coding", "vendorCoding"]));
const wavelength = asNumber(pick(flat, ["wavelengthNm", "wavelength", "lambda"]))
?? inferWavelengthNm(title);
const flags = inferBoolFlags(title, protocol, formFactor, fiberType);
return {
source: "flexoptix-shop-api",
fetchedAt,
sku,
title,
url,
price: {
amount,
currency,
source: amount === null ? "missing" : "api",
fetchedAt,
},
stock: {
status,
quantity,
source: status === null && quantity === null ? "missing" : "api",
},
optics: {
formFactor,
speedGbps: parseSpeedGbps(pick(flat, ["speedGbps", "speed", "rate", "dataRate"]) ?? title),
reachM: parseReachMeters(pick(flat, ["reachM", "reach", "distance", "transmissionDistance"]) ?? title),
wavelengthNm: wavelength,
connector,
fiberType,
protocol,
coding,
bidi: asBoolean(pick(flat, ["bidi", "bidirectional"])) ?? flags.bidi,
dwdm: asBoolean(pick(flat, ["dwdm"])) ?? flags.dwdm,
cwdm: asBoolean(pick(flat, ["cwdm"])) ?? flags.cwdm,
},
compatibility: extractCompatibility(row),
};
}
// ── Import helpers ─────────────────────────────────────────────────────────
function canImportProduct(product: CatalogProduct): boolean {
return Boolean(
product.sku
&& product.title
&& product.optics.formFactor
&& product.optics.speedGbps !== null
&& product.optics.reachM !== null,
);
}
function reachLabel(reachM: number | null): string | undefined {
if (reachM === null) return undefined;
if (reachM >= 1000 && reachM % 1000 === 0) return `${reachM / 1000}km`;
return `${reachM}m`;
}
function speedLabel(speedGbps: number | null): string | undefined {
if (speedGbps === null) return undefined;
if (speedGbps >= 1000) return `${speedGbps / 1000}T`;
return `${speedGbps}G`;
}
function categoryFor(product: CatalogProduct): string {
const text = `${product.title} ${product.optics.protocol ?? ""}`.toLowerCase();
if (/\bdac\b|direct attach|copper/.test(text)) return "DAC";
if (/\baoc\b|active optical/.test(text)) return "AOC";
if (/coherent|zr|dco/.test(text)) return "Coherent";
return "DataCenter";
}
async function importProduct(
product: CatalogProduct,
vendorId: string,
): Promise<{ priceWritten: boolean; stockWritten: boolean }> {
const transceiverId = await findOrCreateScrapedTransceiver({
partNumber: product.sku,
vendorId,
productUrl: product.url ?? undefined,
formFactor: product.optics.formFactor ?? undefined,
speedGbps: product.optics.speedGbps ?? undefined,
speed: speedLabel(product.optics.speedGbps),
reachMeters: product.optics.reachM ?? undefined,
reachLabel: reachLabel(product.optics.reachM),
fiberType: product.optics.fiberType ?? undefined,
wavelengths: product.optics.wavelengthNm === null ? undefined : `${product.optics.wavelengthNm}nm`,
category: categoryFor(product),
});
let priceWritten = false;
if (product.price.amount !== null && product.price.currency) {
priceWritten = await upsertPriceObservation({
transceiverId,
sourceVendorId: vendorId,
price: product.price.amount,
currency: product.price.currency,
stockLevel: product.stock.status ?? "unknown",
quantityAvailable: product.stock.quantity ?? undefined,
url: product.url ?? undefined,
contentHash: contentHash({
source: product.source,
sku: product.sku,
price: product.price.amount,
currency: product.price.currency,
fetchedAt: product.price.fetchedAt,
}),
});
}
const stockWritten = await upsertStockObservation({
transceiverId,
sourceVendorId: vendorId,
stockLevel: product.stock.status ?? "unknown",
quantityAvailable: product.stock.quantity ?? undefined,
priceNet: product.price.amount ?? undefined,
productUrl: product.url ?? undefined,
priceCurrency: product.price.currency ?? undefined,
stockConfidence: product.stock.quantity === null ? 1 : 2,
});
return { priceWritten, stockWritten };
}
// ── API client ─────────────────────────────────────────────────────────────
function validateEnv(): { baseUrl: string; username: string | null; password: string | null; token: string | null } {
const baseUrl = process.env["FLEXOPTIX_API_BASE_URL"]?.trim();
if (!baseUrl) {
throw new Error("FLEXOPTIX_API_BASE_URL is required for Flexoptix API sync");
}
const token = process.env["FLEXOPTIX_API_TOKEN"]?.trim() ?? null;
const username = process.env["FLEXOPTIX_API_USERNAME"]?.trim() ?? null;
const password = process.env["FLEXOPTIX_API_PASSWORD"]?.trim() ?? null;
if (!token && (!username || !password)) {
throw new Error("FLEXOPTIX_API_TOKEN or FLEXOPTIX_API_USERNAME+PASSWORD required");
}
return { baseUrl, username, password, token };
}
function buildUrl(baseUrl: string, path: string): URL {
const base = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`;
return new URL(path, base);
}
async function fetchJson(url: URL, init: RequestInit, timeoutMs: number): Promise<unknown> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), Number.isFinite(timeoutMs) ? timeoutMs : 30_000);
try {
const response = await fetch(url, { ...init, signal: controller.signal });
if (!response.ok) {
throw new Error(`HTTP ${response.status} from ${url.origin}${url.pathname}`);
}
return response.json();
} finally {
clearTimeout(timer);
}
}
async function getBearerToken(baseUrl: string, username: string, password: string, timeoutMs: number): Promise<string> {
const authPath = process.env["FLEXOPTIX_API_AUTH_PATH"]?.trim() ?? "/rest/V1/integration/customer/token";
const url = buildUrl(baseUrl, authPath);
const payload = await fetchJson(url, {
method: "POST",
headers: { accept: "application/json", "content-type": "application/json" },
body: JSON.stringify({ username, password }),
}, timeoutMs);
if (typeof payload !== "string") {
throw new Error("Flexoptix auth response was not a string token");
}
return payload;
}
async function fetchAllProducts(baseUrl: string, headers: Record<string, string>, timeoutMs: number): Promise<JsonRecord[]> {
const productPath = process.env["FLEXOPTIX_API_PRODUCTS_PATH"]?.trim() ?? "/rest/V2/flexoptix/products";
const compatibilities = process.env["FLEXOPTIX_API_COMPATIBILITIES"]?.trim() ?? "1";
const limit = parseInt(process.env["FLEXOPTIX_API_LIMIT"]?.trim() ?? "500", 10);
const currency = process.env["FLEXOPTIX_API_CURRENCY"]?.trim() ?? "EUR";
const allRows: JsonRecord[] = [];
for (let page = 1; ; page++) {
const url = buildUrl(baseUrl, productPath);
url.searchParams.set("currency", currency);
url.searchParams.set("page", String(page));
url.searchParams.set("limit", String(Number.isFinite(limit) ? limit : 500));
if (compatibilities === "1") url.searchParams.set("compatibilities", "1");
// specifications=0 for bulk pull (specifications=1 causes HTTP 503 on full catalog)
const payload = await fetchJson(url, { headers }, timeoutMs);
const rows = extractRows(payload);
if (rows.length === 0) break;
allRows.push(...rows);
if (rows.length < (Number.isFinite(limit) ? limit : 500)) break;
}
return allRows;
}
function extractRows(payload: unknown): JsonRecord[] {
if (Array.isArray(payload)) return payload.filter(isRecord);
if (!isRecord(payload)) return [];
for (const key of ["products", "items", "data", "results", "rows"]) {
const value = payload[key];
if (Array.isArray(value)) return value.filter(isRecord);
}
return [payload];
}
// ── Main export ────────────────────────────────────────────────────────────
export async function syncFlexoptixCatalog(): Promise<FlexoptixSyncResult> {
const { baseUrl, username, password, token } = validateEnv();
const timeoutMs = parseInt(process.env["FLEXOPTIX_API_TIMEOUT_MS"]?.trim() ?? "30000", 10);
console.log(`[${new Date().toISOString()}] Flexoptix API sync starting`);
const bearerToken = token ?? await getBearerToken(baseUrl, username!, password!, timeoutMs);
const headers: Record<string, string> = {
accept: "application/json",
authorization: `Bearer ${bearerToken}`,
};
const apiKey = process.env["FLEXOPTIX_API_KEY"]?.trim();
if (apiKey) headers["x-api-key"] = apiKey;
const fetchedAt = new Date().toISOString();
const rawRows = await fetchAllProducts(baseUrl, headers, timeoutMs);
console.log(`[${new Date().toISOString()}] Fetched ${rawRows.length} rows from Flexoptix API`);
const products = rawRows
.map(row => normalizeProduct(row, fetchedAt))
.filter((p): p is CatalogProduct => p !== null);
const importable = products.filter(canImportProduct);
const skipped = products.length - importable.length;
console.log(`[${new Date().toISOString()}] Normalized: ${products.length} | importable: ${importable.length} | skipped: ${skipped}`);
const vendorId = await ensureVendor("Flexoptix", "compatible", "https://www.flexoptix.net", "https://www.flexoptix.net");
let priceWrites = 0;
let stockWrites = 0;
for (const product of importable) {
try {
const result = await importProduct(product, vendorId);
if (result.priceWritten) priceWrites++;
if (result.stockWritten) stockWrites++;
} catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err);
console.warn(`[${new Date().toISOString()}] Flexoptix import error (${product.sku}): ${message.slice(0, 100)}`);
}
}
console.log(`[${new Date().toISOString()}] Flexoptix API sync complete: ${importable.length} products, ${priceWrites} price writes, ${stockWrites} stock writes`);
return {
fetched: rawRows.length,
normalized: products.length,
skipped,
priceWrites,
stockWrites,
};
}