transceiver-db/packages/scraper/src/utils/resolve-price-availability.ts
2026-05-10 01:42:24 +02:00

197 lines
8.5 KiB
TypeScript

import { pool, recordVerificationEvidence } from "./db";
const EXCLUDED_CATEGORIES = [
"NonTransceiver",
"Accessory",
"Adapter / Converter",
"Switch / Media Converter",
"Switch / Network Infrastructure",
"NIC / Adapter",
"Mux / Passive Optical",
"Product Family",
"Loopback / Test Module",
];
const NO_PUBLIC_PRICE_VENDOR_POLICIES: Record<string, string> = {
"ADTRAN": "oem_catalog_quote_only",
"Alcatel-Lucent Enterprise": "oem_catalog_quote_only",
"Allied Telesis": "oem_catalog_quote_only",
"Arista Networks": "oem_catalog_quote_only",
"Ascent Optics": "manufacturer_catalog_no_public_checkout_price",
"Black Box": "enterprise_catalog_no_public_checkout_price",
"Brocade": "oem_catalog_quote_only",
"Calix": "oem_catalog_quote_only",
"Cambium Networks": "oem_catalog_quote_only",
"Check Point": "oem_catalog_quote_only",
"Ciena": "oem_catalog_quote_only",
"Cisco Systems": "oem_catalog_quote_only",
"Coherent Corp": "manufacturer_catalog_no_public_checkout_price",
"Dell Technologies": "oem_catalog_quote_only",
"D-LINK": "oem_catalog_quote_only",
"DZS": "oem_catalog_quote_only",
"Eoptolink": "manufacturer_catalog_no_public_checkout_price",
"Ericsson": "oem_catalog_quote_only",
"EXFO": "test_equipment_vendor_quote_only",
"Extreme Networks": "oem_catalog_quote_only",
"F5 Networks": "oem_catalog_quote_only",
"Fortinet": "oem_catalog_quote_only",
"GAO Tek": "quote_request_only_no_public_checkout_price",
"HPE Aruba Networking": "oem_catalog_quote_only",
"Huawei": "oem_catalog_quote_only",
"Infinera": "oem_catalog_quote_only",
"Intel": "oem_catalog_quote_only",
"II-VI / Coherent": "manufacturer_catalog_no_public_checkout_price",
"Juniper Networks": "oem_catalog_quote_only",
"Keysight Technologies": "test_equipment_vendor_quote_only",
"LANCOM Systems": "oem_catalog_quote_only",
"Lumentum": "manufacturer_catalog_no_public_checkout_price",
"Marvell": "component_vendor_no_public_checkout_price",
"MikroTik": "oem_catalog_quote_only",
"Netgear": "oem_catalog_quote_only",
"NVIDIA Mellanox": "oem_catalog_quote_only",
"NVIDIA Networking": "oem_catalog_quote_only",
"Nokia": "oem_catalog_quote_only",
"Palo Alto Networks": "oem_catalog_quote_only",
"Ribbon Communications": "oem_catalog_quote_only",
"Ruckus Networks (CommScope)": "oem_catalog_quote_only",
"SmartOptics": "manufacturer_catalog_no_public_checkout_price",
"Solarflare": "oem_catalog_quote_only",
"Spirent Communications": "test_equipment_vendor_quote_only",
"Stordis": "oem_catalog_quote_only",
"T&S Communication": "quote_request_only_or_price_zero_no_public_checkout_price",
"Turbolink": "manufacturer_catalog_no_public_checkout_price",
"Ubiquiti Networks": "oem_catalog_quote_only",
"Viavi Solutions": "test_equipment_vendor_quote_only",
"Zyxel": "oem_catalog_quote_only",
"ZTE": "oem_catalog_quote_only",
};
const NO_PUBLIC_PRICE_VENDOR_PATTERNS: Array<[RegExp, string]> = [
[/\b(cisco|juniper|arista|nokia|huawei|hpe|hewlett|dell|extreme|brocade|foundry|ruckus|commscope|commScope|arris|aruba|viptela|pure storage|hitachi vantara|ibm storage)\b/i, "oem_catalog_quote_only"],
[/\b(check point|palo alto|fortinet|sonicwall|sophos|watchguard|barracuda|hillstone)\b/i, "security_vendor_catalog_quote_only"],
[/\b(hirschmann|siemens|ruggedcom|antaira|etherwan|red lion|korenix|perle|transition networks|rad data|moxa|omron|abb|beckhoff|belden|ge grid|phoenix contact)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
[/\b(packetlight|harmonic|casa systems|fujitsu|fiberhome|nec|h3c|dlink|d-link|tp-link|netgear|zyxel|ubiquiti|lancom|mikrotik|senao|engenius|telco systems|tejas|teleste|packetfront|westermo|supermicro|coriant|sycamore|dragonwave|radiflow|datang|vecima|cradlepoint|ruijie|utstarcom|edgewater|grass valley|pica8|centec|ceragon|clearfield|cumulus networks|samsung networks|telrad|siklu|drivenets|pluribus|viasat|haivision|mavenir|audiocodes)\b/i, "oem_catalog_quote_only"],
[/\b(anritsu|keysight|viavi|exfo|ixia|spirent|tektronix|rohde|netscout|teledyne lecroy)\b/i, "test_equipment_vendor_quote_only"],
[/\b(intel|marvell|qlogic|chelsio|solarflare|emulex|lanner|kontron|advantech|source photonics|innolight|o-net|emcore|applied optoelectronics|cimc|sumitomo|lumentum|coherent|reflex photonics|ofs|corning|hisense broadband|oplink|accelink|neophotonics|skylane optics)\b/i, "component_vendor_no_public_checkout_price"],
[/\b(adva|ekinops|infinera|ciena|ribbon|nortel|3com|avaya|comnet|comtrend|evertz|ip infusion|stordis|calix|dzs|adtran|allied telesis|eci telecom|zte access|netapp|isol?an|curtiss-wright|l3harris|sierra wireless|black box|ericsson radio)\b/i, "oem_catalog_quote_only"],
[/\b(google cloud|microsoft azure|amazon web services|meta)\b/i, "hyperscaler_no_public_checkout_price"],
[/\b(rockwell automation|schneider electric|schweitzer engineering)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
[/\b(sonic|vmware|versa networks)\b/i, "software_platform_no_public_transceiver_price"],
];
const PUBLIC_PRICE_VENDOR_PATTERNS = [
/\b(10gtek|sfpcables|shopfiber24|gbics|fs\.com|flexoptix|naddod|qsfptek|atgbics|fiber24)\b/i,
];
function getNoPublicPriceReason(vendorName: string): string | undefined {
if (NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName]) return NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName];
if (PUBLIC_PRICE_VENDOR_PATTERNS.some((regex) => regex.test(vendorName))) return undefined;
for (const [regex, reason] of NO_PUBLIC_PRICE_VENDOR_PATTERNS) {
if (regex.test(vendorName)) return reason;
}
return undefined;
}
type Candidate = {
id: string;
vendorName: string;
partNumber: string;
productUrl: string | null;
priceStatus: string | null;
};
async function main(): Promise<void> {
const vendorFilter = (process.env["PRICE_AVAILABILITY_VENDOR"] || "").trim();
const limit = parseInt(process.env["PRICE_AVAILABILITY_LIMIT"] || "1000", 10);
const apply = process.env["PRICE_AVAILABILITY_APPLY"] === "1";
const vendorNames = vendorFilter
? vendorFilter.split(",").map((v) => v.trim()).filter(Boolean)
: [];
const vendorWhere = vendorNames.length > 0 ? "AND v.name = ANY($3)" : "";
const params: unknown[] = [EXCLUDED_CATEGORIES, limit];
if (vendorNames.length > 0) params.push(vendorNames);
const result = await pool.query<Candidate>(
`SELECT t.id,
v.name AS "vendorName",
t.part_number AS "partNumber",
t.product_page_url AS "productUrl",
COALESCE(t.price_status, 'unknown') AS "priceStatus"
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id
WHERE COALESCE(t.category, '') <> ALL($1)
AND COALESCE(t.data_confidence, 'unknown') != 'garbage'
AND COALESCE(t.price_verified, false) = false
AND COALESCE(t.price_status, 'needs_research') IN ('unknown', 'needs_research', 'ambiguous')
${vendorWhere}
ORDER BY v.name, t.part_number
LIMIT $2`,
params,
);
let noPublicPrice = 0;
let skipped = 0;
console.log("=== Price availability resolver ===", { vendorNames, limit, apply, count: result.rows.length });
for (const row of result.rows) {
const reason = row.vendorName === "10Gtek" && !row.productUrl
? "not_listed_in_public_sfpcables_retail_catalog_after_full_crawl"
: getNoPublicPriceReason(row.vendorName);
if (!reason) {
skipped++;
continue;
}
if (!apply) {
console.log("dry-run price availability", {
vendor: row.vendorName,
partNumber: row.partNumber,
outcome: "no_public_price",
reason,
});
noPublicPrice++;
continue;
}
await pool.query(
`UPDATE transceivers
SET price_status = 'no_public_price',
price_status_updated_at = NOW(),
price_unavailable_verified_at = COALESCE(price_unavailable_verified_at, NOW()),
price_unavailable_reason = $2,
updated_at = NOW()
WHERE id = $1`,
[row.id, reason],
);
await recordVerificationEvidence({
transceiverId: row.id,
verificationType: "price_unavailable",
sourceUrl: row.productUrl || undefined,
evidenceValue: {
outcome: "no_public_price",
reason,
vendor: row.vendorName,
partNumber: row.partNumber,
},
robotName: "verify:price-availability",
confidence: 0.9,
});
noPublicPrice++;
}
console.log("Price availability resolver complete", { noPublicPrice, skipped, apply });
}
if (require.main === module) {
main()
.then(() => pool.end())
.catch((err) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});
}