feat: resolve OEM price status and part details
This commit is contained in:
parent
5819eb5eb0
commit
b58f7cee41
@ -17,6 +17,10 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
|||||||
const verStats = await pool.query(`
|
const verStats = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(*) FILTER (WHERE price_verified) AS price_verified,
|
COUNT(*) FILTER (WHERE price_verified) AS price_verified,
|
||||||
|
COUNT(*) FILTER (WHERE price_status = 'public_price') AS price_public_price,
|
||||||
|
COUNT(*) FILTER (WHERE price_status = 'no_public_price') AS price_no_public_price,
|
||||||
|
COUNT(*) FILTER (WHERE price_status = 'ambiguous') AS price_ambiguous,
|
||||||
|
COUNT(*) FILTER (WHERE price_status IN ('unknown', 'needs_research')) AS price_needs_research,
|
||||||
COUNT(*) FILTER (WHERE image_verified) AS image_verified,
|
COUNT(*) FILTER (WHERE image_verified) AS image_verified,
|
||||||
COUNT(*) FILTER (WHERE details_verified) AS details_verified,
|
COUNT(*) FILTER (WHERE details_verified) AS details_verified,
|
||||||
COUNT(*) FILTER (WHERE fully_verified) AS fully_verified,
|
COUNT(*) FILTER (WHERE fully_verified) AS fully_verified,
|
||||||
@ -101,6 +105,12 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
|||||||
},
|
},
|
||||||
verification: {
|
verification: {
|
||||||
price_verified: Number(v.price_verified || 0),
|
price_verified: Number(v.price_verified || 0),
|
||||||
|
price_status: {
|
||||||
|
public_price: Number(v.price_public_price || 0),
|
||||||
|
no_public_price: Number(v.price_no_public_price || 0),
|
||||||
|
ambiguous: Number(v.price_ambiguous || 0),
|
||||||
|
needs_research: Number(v.price_needs_research || 0),
|
||||||
|
},
|
||||||
image_verified: Number(v.image_verified || 0),
|
image_verified: Number(v.image_verified || 0),
|
||||||
details_verified: Number(v.details_verified || 0),
|
details_verified: Number(v.details_verified || 0),
|
||||||
fully_verified: Number(v.fully_verified || 0),
|
fully_verified: Number(v.fully_verified || 0),
|
||||||
|
|||||||
@ -19,8 +19,10 @@
|
|||||||
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
|
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
|
||||||
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
|
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
|
||||||
"verify:product-page-assets": "tsx src/utils/verify-product-page-assets.ts",
|
"verify:product-page-assets": "tsx src/utils/verify-product-page-assets.ts",
|
||||||
|
"verify:part-number-details": "tsx src/utils/verify-part-number-details.ts",
|
||||||
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
|
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
|
||||||
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
|
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
|
||||||
|
"verify:price-availability": "tsx src/utils/resolve-price-availability.ts",
|
||||||
"verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts",
|
"verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts",
|
||||||
"verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts",
|
"verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts",
|
||||||
"scrape:cisco": "tsx src/scrapers/cisco-tmg.ts",
|
"scrape:cisco": "tsx src/scrapers/cisco-tmg.ts",
|
||||||
|
|||||||
@ -21,7 +21,7 @@ export const db = pool;
|
|||||||
|
|
||||||
export async function recordVerificationEvidence(params: {
|
export async function recordVerificationEvidence(params: {
|
||||||
transceiverId: string;
|
transceiverId: string;
|
||||||
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine";
|
verificationType: "price" | "price_unavailable" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine";
|
||||||
sourceUrl?: string;
|
sourceUrl?: string;
|
||||||
sourceVendorId?: string;
|
sourceVendorId?: string;
|
||||||
evidenceValue?: Record<string, unknown>;
|
evidenceValue?: Record<string, unknown>;
|
||||||
|
|||||||
194
packages/scraper/src/utils/resolve-price-availability.ts
Normal file
194
packages/scraper/src/utils/resolve-price-availability.ts
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
import { pool, recordVerificationEvidence } from "./db";
|
||||||
|
|
||||||
|
const EXCLUDED_CATEGORIES = [
|
||||||
|
"NonTransceiver",
|
||||||
|
"Accessory",
|
||||||
|
"Adapter / Converter",
|
||||||
|
"Switch / Media Converter",
|
||||||
|
"Switch / Network Infrastructure",
|
||||||
|
"NIC / Adapter",
|
||||||
|
"Mux / Passive Optical",
|
||||||
|
"Product Family",
|
||||||
|
"Loopback / Test Module",
|
||||||
|
];
|
||||||
|
|
||||||
|
const NO_PUBLIC_PRICE_VENDOR_POLICIES: Record<string, string> = {
|
||||||
|
"ADTRAN": "oem_catalog_quote_only",
|
||||||
|
"Alcatel-Lucent Enterprise": "oem_catalog_quote_only",
|
||||||
|
"Allied Telesis": "oem_catalog_quote_only",
|
||||||
|
"Arista Networks": "oem_catalog_quote_only",
|
||||||
|
"Ascent Optics": "manufacturer_catalog_no_public_checkout_price",
|
||||||
|
"Black Box": "enterprise_catalog_no_public_checkout_price",
|
||||||
|
"Brocade": "oem_catalog_quote_only",
|
||||||
|
"Calix": "oem_catalog_quote_only",
|
||||||
|
"Cambium Networks": "oem_catalog_quote_only",
|
||||||
|
"Check Point": "oem_catalog_quote_only",
|
||||||
|
"Ciena": "oem_catalog_quote_only",
|
||||||
|
"Cisco Systems": "oem_catalog_quote_only",
|
||||||
|
"Coherent Corp": "manufacturer_catalog_no_public_checkout_price",
|
||||||
|
"Dell Technologies": "oem_catalog_quote_only",
|
||||||
|
"D-LINK": "oem_catalog_quote_only",
|
||||||
|
"DZS": "oem_catalog_quote_only",
|
||||||
|
"Eoptolink": "manufacturer_catalog_no_public_checkout_price",
|
||||||
|
"Ericsson": "oem_catalog_quote_only",
|
||||||
|
"EXFO": "test_equipment_vendor_quote_only",
|
||||||
|
"Extreme Networks": "oem_catalog_quote_only",
|
||||||
|
"F5 Networks": "oem_catalog_quote_only",
|
||||||
|
"Fortinet": "oem_catalog_quote_only",
|
||||||
|
"GAO Tek": "quote_request_only_no_public_checkout_price",
|
||||||
|
"HPE Aruba Networking": "oem_catalog_quote_only",
|
||||||
|
"Huawei": "oem_catalog_quote_only",
|
||||||
|
"Infinera": "oem_catalog_quote_only",
|
||||||
|
"Intel": "oem_catalog_quote_only",
|
||||||
|
"II-VI / Coherent": "manufacturer_catalog_no_public_checkout_price",
|
||||||
|
"Juniper Networks": "oem_catalog_quote_only",
|
||||||
|
"Keysight Technologies": "test_equipment_vendor_quote_only",
|
||||||
|
"LANCOM Systems": "oem_catalog_quote_only",
|
||||||
|
"Lumentum": "manufacturer_catalog_no_public_checkout_price",
|
||||||
|
"Marvell": "component_vendor_no_public_checkout_price",
|
||||||
|
"MikroTik": "oem_catalog_quote_only",
|
||||||
|
"Netgear": "oem_catalog_quote_only",
|
||||||
|
"NVIDIA Mellanox": "oem_catalog_quote_only",
|
||||||
|
"NVIDIA Networking": "oem_catalog_quote_only",
|
||||||
|
"Nokia": "oem_catalog_quote_only",
|
||||||
|
"Palo Alto Networks": "oem_catalog_quote_only",
|
||||||
|
"Ribbon Communications": "oem_catalog_quote_only",
|
||||||
|
"Ruckus Networks (CommScope)": "oem_catalog_quote_only",
|
||||||
|
"SmartOptics": "manufacturer_catalog_no_public_checkout_price",
|
||||||
|
"Solarflare": "oem_catalog_quote_only",
|
||||||
|
"Spirent Communications": "test_equipment_vendor_quote_only",
|
||||||
|
"Stordis": "oem_catalog_quote_only",
|
||||||
|
"T&S Communication": "quote_request_only_or_price_zero_no_public_checkout_price",
|
||||||
|
"Turbolink": "manufacturer_catalog_no_public_checkout_price",
|
||||||
|
"Ubiquiti Networks": "oem_catalog_quote_only",
|
||||||
|
"Viavi Solutions": "test_equipment_vendor_quote_only",
|
||||||
|
"Zyxel": "oem_catalog_quote_only",
|
||||||
|
"ZTE": "oem_catalog_quote_only",
|
||||||
|
};
|
||||||
|
|
||||||
|
const NO_PUBLIC_PRICE_VENDOR_PATTERNS: Array<[RegExp, string]> = [
|
||||||
|
[/\b(cisco|juniper|arista|nokia|huawei|hpe|hewlett|dell|extreme|brocade|foundry|ruckus|commscope|commScope|arris|aruba|viptela|pure storage|hitachi vantara|ibm storage)\b/i, "oem_catalog_quote_only"],
|
||||||
|
[/\b(check point|palo alto|fortinet|sonicwall|sophos|watchguard|barracuda|hillstone)\b/i, "security_vendor_catalog_quote_only"],
|
||||||
|
[/\b(hirschmann|siemens|ruggedcom|antaira|etherwan|red lion|korenix|perle|transition networks|rad data|moxa|omron|abb|beckhoff|belden|ge grid|phoenix contact)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
|
||||||
|
[/\b(packetlight|harmonic|casa systems|fujitsu|fiberhome|nec|h3c|dlink|d-link|tp-link|netgear|zyxel|ubiquiti|lancom|mikrotik|senao|engenius|telco systems|tejas|teleste|packetfront|westermo|supermicro|coriant|sycamore|dragonwave|radiflow|datang|vecima|cradlepoint|ruijie|utstarcom|edgewater|grass valley|pica8|centec|ceragon|clearfield|cumulus networks|samsung networks|telrad|siklu|drivenets|pluribus|viasat|haivision|mavenir|audiocodes)\b/i, "oem_catalog_quote_only"],
|
||||||
|
[/\b(anritsu|keysight|viavi|exfo|ixia|spirent|tektronix|rohde|netscout|teledyne lecroy)\b/i, "test_equipment_vendor_quote_only"],
|
||||||
|
[/\b(intel|marvell|qlogic|chelsio|solarflare|emulex|lanner|kontron|advantech|source photonics|innolight|o-net|emcore|applied optoelectronics|cimc|sumitomo|lumentum|coherent|reflex photonics|ofs|corning|hisense broadband|oplink|accelink|neophotonics|skylane optics)\b/i, "component_vendor_no_public_checkout_price"],
|
||||||
|
[/\b(adva|ekinops|infinera|ciena|ribbon|nortel|3com|avaya|comnet|comtrend|evertz|ip infusion|stordis|calix|dzs|adtran|allied telesis|eci telecom|zte access|netapp|isol?an|curtiss-wright|l3harris|sierra wireless|black box|ericsson radio)\b/i, "oem_catalog_quote_only"],
|
||||||
|
[/\b(google cloud|microsoft azure|amazon web services|meta)\b/i, "hyperscaler_no_public_checkout_price"],
|
||||||
|
[/\b(rockwell automation|schneider electric|schweitzer engineering)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
|
||||||
|
[/\b(sonic|vmware|versa networks)\b/i, "software_platform_no_public_transceiver_price"],
|
||||||
|
];
|
||||||
|
|
||||||
|
const PUBLIC_PRICE_VENDOR_PATTERNS = [
|
||||||
|
/\b(10gtek|sfpcables|shopfiber24|gbics|fs\.com|flexoptix|naddod|qsfptek|atgbics|fiber24)\b/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
function getNoPublicPriceReason(vendorName: string): string | undefined {
|
||||||
|
if (NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName]) return NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName];
|
||||||
|
if (PUBLIC_PRICE_VENDOR_PATTERNS.some((regex) => regex.test(vendorName))) return undefined;
|
||||||
|
for (const [regex, reason] of NO_PUBLIC_PRICE_VENDOR_PATTERNS) {
|
||||||
|
if (regex.test(vendorName)) return reason;
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
type Candidate = {
|
||||||
|
id: string;
|
||||||
|
vendorName: string;
|
||||||
|
partNumber: string;
|
||||||
|
productUrl: string | null;
|
||||||
|
priceStatus: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const vendorFilter = (process.env["PRICE_AVAILABILITY_VENDOR"] || "").trim();
|
||||||
|
const limit = parseInt(process.env["PRICE_AVAILABILITY_LIMIT"] || "1000", 10);
|
||||||
|
const apply = process.env["PRICE_AVAILABILITY_APPLY"] === "1";
|
||||||
|
const vendorNames = vendorFilter
|
||||||
|
? vendorFilter.split(",").map((v) => v.trim()).filter(Boolean)
|
||||||
|
: [];
|
||||||
|
|
||||||
|
const vendorWhere = vendorNames.length > 0 ? "AND v.name = ANY($3)" : "";
|
||||||
|
const params: unknown[] = [EXCLUDED_CATEGORIES, limit];
|
||||||
|
if (vendorNames.length > 0) params.push(vendorNames);
|
||||||
|
|
||||||
|
const result = await pool.query<Candidate>(
|
||||||
|
`SELECT t.id,
|
||||||
|
v.name AS "vendorName",
|
||||||
|
t.part_number AS "partNumber",
|
||||||
|
t.product_page_url AS "productUrl",
|
||||||
|
COALESCE(t.price_status, 'unknown') AS "priceStatus"
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON v.id = t.vendor_id
|
||||||
|
WHERE COALESCE(t.category, '') <> ALL($1)
|
||||||
|
AND COALESCE(t.data_confidence, 'unknown') != 'garbage'
|
||||||
|
AND COALESCE(t.price_verified, false) = false
|
||||||
|
AND COALESCE(t.price_status, 'needs_research') IN ('unknown', 'needs_research', 'ambiguous')
|
||||||
|
${vendorWhere}
|
||||||
|
ORDER BY v.name, t.part_number
|
||||||
|
LIMIT $2`,
|
||||||
|
params,
|
||||||
|
);
|
||||||
|
|
||||||
|
let noPublicPrice = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
|
||||||
|
console.log("=== Price availability resolver ===", { vendorNames, limit, apply, count: result.rows.length });
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
const reason = getNoPublicPriceReason(row.vendorName);
|
||||||
|
if (!reason) {
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!apply) {
|
||||||
|
console.log("dry-run price availability", {
|
||||||
|
vendor: row.vendorName,
|
||||||
|
partNumber: row.partNumber,
|
||||||
|
outcome: "no_public_price",
|
||||||
|
reason,
|
||||||
|
});
|
||||||
|
noPublicPrice++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE transceivers
|
||||||
|
SET price_status = 'no_public_price',
|
||||||
|
price_status_updated_at = NOW(),
|
||||||
|
price_unavailable_verified_at = COALESCE(price_unavailable_verified_at, NOW()),
|
||||||
|
price_unavailable_reason = $2,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[row.id, reason],
|
||||||
|
);
|
||||||
|
|
||||||
|
await recordVerificationEvidence({
|
||||||
|
transceiverId: row.id,
|
||||||
|
verificationType: "price_unavailable",
|
||||||
|
sourceUrl: row.productUrl || undefined,
|
||||||
|
evidenceValue: {
|
||||||
|
outcome: "no_public_price",
|
||||||
|
reason,
|
||||||
|
vendor: row.vendorName,
|
||||||
|
partNumber: row.partNumber,
|
||||||
|
},
|
||||||
|
robotName: "verify:price-availability",
|
||||||
|
confidence: 0.9,
|
||||||
|
});
|
||||||
|
noPublicPrice++;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("Price availability resolver complete", { noPublicPrice, skipped, apply });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (require.main === module) {
|
||||||
|
main()
|
||||||
|
.then(() => pool.end())
|
||||||
|
.catch((err) => {
|
||||||
|
console.error("Fatal:", err);
|
||||||
|
pool.end();
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
160
packages/scraper/src/utils/verify-part-number-details.ts
Normal file
160
packages/scraper/src/utils/verify-part-number-details.ts
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
import { pool, markDetailsVerified, recordVerificationEvidence } from "./db";
|
||||||
|
|
||||||
|
const EXCLUDED_CATEGORIES = [
|
||||||
|
"NonTransceiver",
|
||||||
|
"Accessory",
|
||||||
|
"Adapter / Converter",
|
||||||
|
"Switch / Media Converter",
|
||||||
|
"Switch / Network Infrastructure",
|
||||||
|
"NIC / Adapter",
|
||||||
|
"Mux / Passive Optical",
|
||||||
|
"Product Family",
|
||||||
|
"Loopback / Test Module",
|
||||||
|
];
|
||||||
|
|
||||||
|
type Candidate = {
|
||||||
|
id: string;
|
||||||
|
vendorName: string;
|
||||||
|
partNumber: string;
|
||||||
|
productUrl: string | null;
|
||||||
|
formFactor: string | null;
|
||||||
|
speedGbps: string | number | null;
|
||||||
|
reachLabel: string | null;
|
||||||
|
fiberType: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
function inferSpeed(partNumber: string): { speed: string; speedGbps: number; rule: string } | null {
|
||||||
|
const pn = partNumber.toUpperCase();
|
||||||
|
const compact = pn.replace(/[^A-Z0-9]+/g, "-");
|
||||||
|
|
||||||
|
const rules: Array<[RegExp, string, number, string]> = [
|
||||||
|
[/\b(800G|800GE|800GBE)\b|JCO800|8X100G/, "800G", 800, "800G token"],
|
||||||
|
[/\b(400G|400GE|400GBE|400ZR)\b|JCO400|4X100G/, "400G", 400, "400G token"],
|
||||||
|
[/\b(200G|200GE|200GBE)\b|2X100G/, "200G", 200, "200G token"],
|
||||||
|
[/\b(128GFC)\b/, "128G FC", 128, "128GFC token"],
|
||||||
|
[/\b(100G|100GE|100GBE|CGE)\b|100GBASE|4X25G|2X50G|10X10G/, "100G", 100, "100G token"],
|
||||||
|
[/\b(50G|50GE|50GBE)\b/, "50G", 50, "50G token"],
|
||||||
|
[/\b(40G|40GE|40GBE)\b|40GBASE/, "40G", 40, "40G token"],
|
||||||
|
[/\b(32GFC)\b/, "32G FC", 32, "32GFC token"],
|
||||||
|
[/\b(25G|25GE|25GBE)\b|25GBASE/, "25G", 25, "25G token"],
|
||||||
|
[/\b(16GFC)\b/, "16G FC", 16, "16GFC token"],
|
||||||
|
[/\b(10G|10GE|10GBE)\b|10GBASE|SFP10G|XFP-10GE/, "10G", 10, "10G token"],
|
||||||
|
[/\b(2[.-]?5G|2[.-]?5GE|2[.-]?5GBE)\b/, "2.5G", 2.5, "2.5G token"],
|
||||||
|
[/\b(1G|1GE|1000BASE)\b|CTP-SFP-1GE|SFP-GE|GLC-(SX|LX|LH|ZX|EX|BX|TE|T)\b/, "1G", 1, "1G token"],
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const [regex, speed, speedGbps, rule] of rules) {
|
||||||
|
if (regex.test(compact) || regex.test(pn)) {
|
||||||
|
return { speed, speedGbps, rule };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const vendorFilter = (process.env["PART_DETAILS_VENDOR"] || "").trim();
|
||||||
|
const limit = parseInt(process.env["PART_DETAILS_LIMIT"] || "1000", 10);
|
||||||
|
const apply = process.env["PART_DETAILS_APPLY"] === "1";
|
||||||
|
const vendorNames = vendorFilter.split(",").map((v) => v.trim()).filter(Boolean);
|
||||||
|
|
||||||
|
const vendorWhere = vendorNames.length > 0 ? "AND v.name = ANY($3)" : "";
|
||||||
|
const params: unknown[] = [EXCLUDED_CATEGORIES, limit];
|
||||||
|
if (vendorNames.length > 0) params.push(vendorNames);
|
||||||
|
|
||||||
|
const result = await pool.query<Candidate>(
|
||||||
|
`SELECT t.id,
|
||||||
|
v.name AS "vendorName",
|
||||||
|
t.part_number AS "partNumber",
|
||||||
|
t.product_page_url AS "productUrl",
|
||||||
|
t.form_factor AS "formFactor",
|
||||||
|
t.speed_gbps AS "speedGbps",
|
||||||
|
t.reach_label AS "reachLabel",
|
||||||
|
t.fiber_type AS "fiberType"
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON v.id = t.vendor_id
|
||||||
|
WHERE COALESCE(t.category, '') <> ALL($1)
|
||||||
|
AND COALESCE(t.data_confidence, 'unknown') != 'garbage'
|
||||||
|
AND COALESCE(t.part_number, '') != ''
|
||||||
|
AND COALESCE(t.form_factor, '') != ''
|
||||||
|
AND COALESCE(t.reach_label, '') != ''
|
||||||
|
AND COALESCE(t.fiber_type, '') != ''
|
||||||
|
AND (t.speed_gbps IS NULL OR t.speed_gbps = 0 OR COALESCE(t.details_verified, false) = false)
|
||||||
|
${vendorWhere}
|
||||||
|
ORDER BY v.name, t.part_number
|
||||||
|
LIMIT $2`,
|
||||||
|
params,
|
||||||
|
);
|
||||||
|
|
||||||
|
let candidates = 0;
|
||||||
|
let updated = 0;
|
||||||
|
let details = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
|
||||||
|
console.log("=== Part-number details verifier ===", { vendorNames, limit, apply, count: result.rows.length });
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
const inferred = inferSpeed(row.partNumber);
|
||||||
|
if (!inferred) {
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
candidates++;
|
||||||
|
const currentSpeed = Number(row.speedGbps || 0);
|
||||||
|
const shouldUpdateSpeed = currentSpeed === 0 || currentSpeed === null;
|
||||||
|
if (!apply) {
|
||||||
|
console.log("dry-run part details", {
|
||||||
|
vendor: row.vendorName,
|
||||||
|
partNumber: row.partNumber,
|
||||||
|
currentSpeed,
|
||||||
|
inferred,
|
||||||
|
canMarkDetails: Boolean(row.formFactor && row.reachLabel && row.fiberType),
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shouldUpdateSpeed) {
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE transceivers
|
||||||
|
SET speed = $2,
|
||||||
|
speed_gbps = $3,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[row.id, inferred.speed, inferred.speedGbps],
|
||||||
|
);
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const marked = await markDetailsVerified({
|
||||||
|
transceiverId: row.id,
|
||||||
|
sourceUrl: row.productUrl || undefined,
|
||||||
|
});
|
||||||
|
if (marked) details++;
|
||||||
|
|
||||||
|
await recordVerificationEvidence({
|
||||||
|
transceiverId: row.id,
|
||||||
|
verificationType: "details",
|
||||||
|
sourceUrl: row.productUrl || undefined,
|
||||||
|
evidenceValue: {
|
||||||
|
rule: "part_number_speed_inference",
|
||||||
|
partNumber: row.partNumber,
|
||||||
|
speed: inferred.speed,
|
||||||
|
speedGbps: inferred.speedGbps,
|
||||||
|
inferenceRule: inferred.rule,
|
||||||
|
},
|
||||||
|
robotName: "verify:part-number-details",
|
||||||
|
confidence: 0.95,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("Part-number details verifier complete", { candidates, updated, details, skipped, apply });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (require.main === module) {
|
||||||
|
main()
|
||||||
|
.then(() => pool.end())
|
||||||
|
.catch((err) => {
|
||||||
|
console.error("Fatal:", err);
|
||||||
|
pool.end();
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -156,9 +156,9 @@ function inferDetails(text: string): InferredDetails {
|
|||||||
[/\b32\s*gfc\b/i, "32G FC", 32],
|
[/\b32\s*gfc\b/i, "32G FC", 32],
|
||||||
[/\b16\s*gfc\b/i, "16G FC", 16],
|
[/\b16\s*gfc\b/i, "16G FC", 16],
|
||||||
[/\b10\s*g(?:base)?\b/i, "10G", 10],
|
[/\b10\s*g(?:base)?\b/i, "10G", 10],
|
||||||
[/\b1\.?25\s*g(?:base)?\b|\b1000base\b|\b1\s*g(?:base)?\b/i, "1G", 1],
|
[/\b1[\s.-]?25\s*g(?:b(?:ase)?|bps)?\b|\b1000base\b|\b1\s*g(?:base)?\b/i, "1G", 1],
|
||||||
[/(^|[^.\d-])25\s*g(?:base)?\b/i, "25G", 25],
|
[/(^|[^.\d-])25\s*g(?:base)?\b/i, "25G", 25],
|
||||||
[/\b2[.-]?5\s*g(?:base)?\b/i, "2.5G", 2.5],
|
[/\b2[\s.-]?5\s*g(?:base)?\b/i, "2.5G", 2.5],
|
||||||
];
|
];
|
||||||
for (const [regex, speed, speedGbps] of speedPatterns) {
|
for (const [regex, speed, speedGbps] of speedPatterns) {
|
||||||
if (regex.test(text)) {
|
if (regex.test(text)) {
|
||||||
|
|||||||
73
sql/105-price-status-and-unavailable-evidence.sql
Normal file
73
sql/105-price-status-and-unavailable-evidence.sql
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
-- Migration 105: Explicit price availability state
|
||||||
|
--
|
||||||
|
-- Public price verification remains strict: price_verified requires a real
|
||||||
|
-- source-backed price observation. Quote-only/OEM/no-public-price cases are
|
||||||
|
-- represented separately so TIP can close research loops without inventing
|
||||||
|
-- prices.
|
||||||
|
|
||||||
|
ALTER TABLE transceivers
|
||||||
|
ADD COLUMN IF NOT EXISTS price_status VARCHAR(32) NOT NULL DEFAULT 'unknown',
|
||||||
|
ADD COLUMN IF NOT EXISTS price_status_updated_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS price_unavailable_verified_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS price_unavailable_reason TEXT;
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF EXISTS (
|
||||||
|
SELECT 1 FROM pg_constraint
|
||||||
|
WHERE conname = 'transceivers_price_status_check'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE transceivers DROP CONSTRAINT transceivers_price_status_check;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
ALTER TABLE transceivers
|
||||||
|
ADD CONSTRAINT transceivers_price_status_check
|
||||||
|
CHECK (price_status IN (
|
||||||
|
'unknown',
|
||||||
|
'public_price',
|
||||||
|
'no_public_price',
|
||||||
|
'needs_research',
|
||||||
|
'ambiguous'
|
||||||
|
));
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
UPDATE transceivers
|
||||||
|
SET price_status = CASE
|
||||||
|
WHEN price_verified = true THEN 'public_price'
|
||||||
|
WHEN price_status = 'unknown' THEN 'needs_research'
|
||||||
|
ELSE price_status
|
||||||
|
END,
|
||||||
|
price_status_updated_at = COALESCE(price_status_updated_at, NOW())
|
||||||
|
WHERE price_status IS NULL
|
||||||
|
OR price_status = 'unknown'
|
||||||
|
OR price_verified = true;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_transceivers_price_status
|
||||||
|
ON transceivers (price_status);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_transceivers_no_public_price
|
||||||
|
ON transceivers (price_unavailable_verified_at)
|
||||||
|
WHERE price_status = 'no_public_price';
|
||||||
|
|
||||||
|
ALTER TABLE transceiver_verification_evidence
|
||||||
|
DROP CONSTRAINT IF EXISTS transceiver_verification_evidence_verification_type_check;
|
||||||
|
|
||||||
|
ALTER TABLE transceiver_verification_evidence
|
||||||
|
ADD CONSTRAINT transceiver_verification_evidence_verification_type_check
|
||||||
|
CHECK (
|
||||||
|
verification_type::text = ANY (
|
||||||
|
ARRAY[
|
||||||
|
'price',
|
||||||
|
'price_unavailable',
|
||||||
|
'image',
|
||||||
|
'details',
|
||||||
|
'competitor_match',
|
||||||
|
'competitor_no_match',
|
||||||
|
'competitor_ambiguous',
|
||||||
|
'artifact_quarantine'
|
||||||
|
]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN transceivers.price_status IS
|
||||||
|
'Resolution state for price evidence: public_price, no_public_price, needs_research, ambiguous, unknown. price_verified remains true only for real public price observations.';
|
||||||
@ -1,9 +1,50 @@
|
|||||||
# Current TIP Sync State
|
# Current TIP Sync State
|
||||||
|
|
||||||
Updated: 2026-05-09 23:09 UTC
|
Updated: 2026-05-09 23:15 UTC
|
||||||
|
|
||||||
## Newest Work
|
## Newest Work
|
||||||
|
|
||||||
|
- TIP continuation on 2026-05-10 local / 2026-05-09 UTC:
|
||||||
|
- added `verify:part-number-details`
|
||||||
|
- deterministic part-number speed inference for rows where form factor/reach/fiber already exist but `speed_gbps=0`
|
||||||
|
- dry-run caught Cisco `GLC-FE-*` as Fast Ethernet trap; rule hardened before apply
|
||||||
|
- live apply:
|
||||||
|
- Juniper Networks: `375` speed updates/details verified
|
||||||
|
- Cisco Systems: `176` speed updates/details verified
|
||||||
|
- evidence count `verify:part-number-details`: `551`
|
||||||
|
- health detail count moved to `16913`
|
||||||
|
- added migration `sql/105-price-status-and-unavailable-evidence.sql`
|
||||||
|
- new `transceivers.price_status`
|
||||||
|
- new `price_unavailable` evidence type
|
||||||
|
- strict rule remains: `price_verified` only means a real public price observation exists
|
||||||
|
- added `verify:price-availability`
|
||||||
|
- resolves quote-only/OEM/manufacturer/test-equipment/hyperscaler vendors to `price_status=no_public_price`
|
||||||
|
- writes `price_unavailable` evidence, does not fabricate price rows
|
||||||
|
- preserves real retail/source-discovery cases as `needs_research`
|
||||||
|
- Health API now exposes price status buckets
|
||||||
|
- live price-status result:
|
||||||
|
- `public_price=11414`
|
||||||
|
- `no_public_price=5595`
|
||||||
|
- `needs_research=186`
|
||||||
|
- `ambiguous=0`
|
||||||
|
- remaining price research is now limited to real retail/source-discovery vendors:
|
||||||
|
- `10Gtek=126`
|
||||||
|
- `SFPcables=31`
|
||||||
|
- `ShopFiber24=24`
|
||||||
|
- `ATGBICS=3`
|
||||||
|
- `Vcelink=2`
|
||||||
|
- SFPcables search tests for 10Gtek part numbers did not return reliable direct hits; remaining 10Gtek work is source/alias discovery, not no-public-price classification
|
||||||
|
- live health:
|
||||||
|
- active products: `17195`
|
||||||
|
- price verified: `11414`
|
||||||
|
- image verified: `12104`
|
||||||
|
- details verified: `16913`
|
||||||
|
- fully verified: `10505`
|
||||||
|
- competitor status: `matched=10775`, `no_valid_match=74`, `ambiguous=556`, `needs_research=5790`
|
||||||
|
- TIPLLM training pool updated with:
|
||||||
|
- part-number details verifier rules
|
||||||
|
- price_status/no-public-price model
|
||||||
|
|
||||||
- MAGATAMA all-lane RunPod training block started on 2026-05-09:
|
- MAGATAMA all-lane RunPod training block started on 2026-05-09:
|
||||||
- user requested all trainable LLM lanes via RunPod
|
- user requested all trainable LLM lanes via RunPod
|
||||||
- lanes in scope:
|
- lanes in scope:
|
||||||
|
|||||||
@ -0,0 +1,108 @@
|
|||||||
|
# 2026-05-10 — TIP Price Status + OEM Detail Resolution
|
||||||
|
|
||||||
|
## Part-Number Details Verifier
|
||||||
|
|
||||||
|
Added `packages/scraper/src/utils/verify-part-number-details.ts`.
|
||||||
|
|
||||||
|
Purpose:
|
||||||
|
|
||||||
|
- close detail gaps where form factor, reach and fiber are already present but `speed_gbps=0`
|
||||||
|
- use conservative part-number inference only
|
||||||
|
- write canonical `details` evidence with robot `verify:part-number-details`
|
||||||
|
|
||||||
|
Safety catch:
|
||||||
|
|
||||||
|
- Cisco `GLC-FE-*` appeared in dry-run
|
||||||
|
- these are Fast Ethernet, so the generic `GLC-* => 1G` rule was rejected
|
||||||
|
- final rule only infers 1G for safe patterns such as `GLC-SX`, `GLC-LX`, `GLC-LH`, `GLC-ZX`, `GLC-EX`, `GLC-BX`, `GLC-T`, `GLC-TE`
|
||||||
|
|
||||||
|
Live apply:
|
||||||
|
|
||||||
|
- Juniper Networks:
|
||||||
|
- `375` candidates updated
|
||||||
|
- `375` details verified
|
||||||
|
- Cisco Systems:
|
||||||
|
- `176` candidates updated
|
||||||
|
- `176` details verified
|
||||||
|
- evidence:
|
||||||
|
- `details | verify:part-number-details | 551`
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
- `details_verified` increased to `16913`
|
||||||
|
- Juniper detail gaps dropped from `173` to `43`
|
||||||
|
- Cisco detail gaps dropped from `146` to `80`
|
||||||
|
|
||||||
|
## Price Status Model
|
||||||
|
|
||||||
|
Added migration `sql/105-price-status-and-unavailable-evidence.sql`.
|
||||||
|
|
||||||
|
New fields:
|
||||||
|
|
||||||
|
- `transceivers.price_status`
|
||||||
|
- `transceivers.price_status_updated_at`
|
||||||
|
- `transceivers.price_unavailable_verified_at`
|
||||||
|
- `transceivers.price_unavailable_reason`
|
||||||
|
|
||||||
|
Allowed states:
|
||||||
|
|
||||||
|
- `public_price`
|
||||||
|
- `no_public_price`
|
||||||
|
- `needs_research`
|
||||||
|
- `ambiguous`
|
||||||
|
- `unknown`
|
||||||
|
|
||||||
|
Important semantic rule:
|
||||||
|
|
||||||
|
- `price_verified=true` still requires a real public price observation
|
||||||
|
- quote-only/OEM/no-checkout cases use `price_status=no_public_price`
|
||||||
|
- no price rows are fabricated
|
||||||
|
|
||||||
|
Added `price_unavailable` evidence type.
|
||||||
|
|
||||||
|
Added `packages/scraper/src/utils/resolve-price-availability.ts`.
|
||||||
|
|
||||||
|
Live apply:
|
||||||
|
|
||||||
|
- resolved quote-only/OEM/manufacturer/test-equipment/hyperscaler vendors to `no_public_price`
|
||||||
|
- wrote `5361` `price_unavailable` evidence rows
|
||||||
|
- preserved real retail/source-discovery vendors as `needs_research`
|
||||||
|
|
||||||
|
Health API now exposes:
|
||||||
|
|
||||||
|
- `price_status.public_price`
|
||||||
|
- `price_status.no_public_price`
|
||||||
|
- `price_status.ambiguous`
|
||||||
|
- `price_status.needs_research`
|
||||||
|
|
||||||
|
Live health after this pass:
|
||||||
|
|
||||||
|
- active products: `17195`
|
||||||
|
- price verified: `11414`
|
||||||
|
- price status:
|
||||||
|
- `public_price=11414`
|
||||||
|
- `no_public_price=5595`
|
||||||
|
- `needs_research=186`
|
||||||
|
- `ambiguous=0`
|
||||||
|
- image verified: `12104`
|
||||||
|
- details verified: `16913`
|
||||||
|
- fully verified: `10505`
|
||||||
|
- competitor status:
|
||||||
|
- `matched=10775`
|
||||||
|
- `no_valid_match=74`
|
||||||
|
- `ambiguous=556`
|
||||||
|
- `needs_research=5790`
|
||||||
|
|
||||||
|
Remaining price research:
|
||||||
|
|
||||||
|
- `10Gtek=126`
|
||||||
|
- `SFPcables=31`
|
||||||
|
- `ShopFiber24=24`
|
||||||
|
- `ATGBICS=3`
|
||||||
|
- `Vcelink=2`
|
||||||
|
|
||||||
|
Interpretation:
|
||||||
|
|
||||||
|
- remaining price work is now real retail/source discovery
|
||||||
|
- SFPcables search tests for 10Gtek part numbers did not return reliable direct hits
|
||||||
|
- 10Gtek requires a source/alias discovery robot rather than no-public-price classification
|
||||||
@ -49,3 +49,5 @@
|
|||||||
{"id":"tip-20260509-gbics-ts-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Keep active transceiver counts honest by quarantining vendor navigation, category, PDF, cable, and infrastructure artifacts."},{"role":"user","content":"What did the GBICS and T&S cleanup teach the crawler robots?"},{"role":"assistant","content":"GBICS rows such as category labels, patch leads, active optical cables, direct-attach cables, Choose By pages, cart pages, vendor pages, NVIDIA/ConnectX/Cisco/WatchGuard/HP pages, network adapters, media converters, AOC/DAC and MTP/MPO cable pages are not active transceiver SKUs. T&S rows with non-/products URLs, PDFs, generic fiber-optic-transceivers family URLs, Product-Brochure names, or broad 100G/200G/400G-Transceiver family labels are artifacts. Quarantine these outside the active verification base, clear verification flags, and write `artifact_quarantine` evidence before recalculating health."}]}
|
{"id":"tip-20260509-gbics-ts-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Keep active transceiver counts honest by quarantining vendor navigation, category, PDF, cable, and infrastructure artifacts."},{"role":"user","content":"What did the GBICS and T&S cleanup teach the crawler robots?"},{"role":"assistant","content":"GBICS rows such as category labels, patch leads, active optical cables, direct-attach cables, Choose By pages, cart pages, vendor pages, NVIDIA/ConnectX/Cisco/WatchGuard/HP pages, network adapters, media converters, AOC/DAC and MTP/MPO cable pages are not active transceiver SKUs. T&S rows with non-/products URLs, PDFs, generic fiber-optic-transceivers family URLs, Product-Brochure names, or broad 100G/200G/400G-Transceiver family labels are artifacts. Quarantine these outside the active verification base, clear verification flags, and write `artifact_quarantine` evidence before recalculating health."}]}
|
||||||
{"id":"tip-20260509-product-page-asset-verifier-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design deterministic product-page verifiers that improve evidence without broad crawling or invented values."},{"role":"user","content":"How should TIP safely backfill images and details from existing vendor product URLs?"},{"role":"assistant","content":"Use a targeted product-page asset verifier. Select active rows with an existing product_page_url and missing image/details; fetch one page per row with low concurrency; extract only likely product images from og:image, twitter:image, Magento product image IDs, or filtered product image URLs; infer details only from part number, product URL, and page title, not from full body navigation. Always dry-run first to catch navigation pollution such as T&S 800G menu links or 1.25G being misread as 25G. Apply through `markImageVerified` and `markDetailsVerified` so evidence is recorded."}]}
|
{"id":"tip-20260509-product-page-asset-verifier-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design deterministic product-page verifiers that improve evidence without broad crawling or invented values."},{"role":"user","content":"How should TIP safely backfill images and details from existing vendor product URLs?"},{"role":"assistant","content":"Use a targeted product-page asset verifier. Select active rows with an existing product_page_url and missing image/details; fetch one page per row with low concurrency; extract only likely product images from og:image, twitter:image, Magento product image IDs, or filtered product image URLs; infer details only from part number, product URL, and page title, not from full body navigation. Always dry-run first to catch navigation pollution such as T&S 800G menu links or 1.25G being misread as 25G. Apply through `markImageVerified` and `markDetailsVerified` so evidence is recorded."}]}
|
||||||
{"id":"tip-20260509-quote-only-tscom-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Public-price verification requires a real public price."},{"role":"user","content":"T&S Communication product pages have valid images and details but expose no real public prices, only no price or price 0.00. What should TIP do?"},{"role":"assistant","content":"Do not mark price_verified. Treat T&S as image/detail-verifiable but public-price-missing until a real price observation exists. `price: 0.00`, quote/request flows, missing amount fields, or SEO filenames containing `price` are not price evidence. Keep image_verified/details_verified true when source-backed, keep price_verified false, and document the vendor as quote-only/public-price-unavailable rather than fabricating a market price."}]}
|
{"id":"tip-20260509-quote-only-tscom-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Public-price verification requires a real public price."},{"role":"user","content":"T&S Communication product pages have valid images and details but expose no real public prices, only no price or price 0.00. What should TIP do?"},{"role":"assistant","content":"Do not mark price_verified. Treat T&S as image/detail-verifiable but public-price-missing until a real price observation exists. `price: 0.00`, quote/request flows, missing amount fields, or SEO filenames containing `price` are not price evidence. Keep image_verified/details_verified true when source-backed, keep price_verified false, and document the vendor as quote-only/public-price-unavailable rather than fabricating a market price."}]}
|
||||||
|
{"id":"tip-20260510-part-number-details-verifier-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Use deterministic part-number inference only for conservative, auditable detail repair."},{"role":"user","content":"How should TIP close OEM detail gaps when form factor, reach and fiber are known but speed_gbps is zero?"},{"role":"assistant","content":"Use a part-number details verifier. Infer speed only from unambiguous tokens such as 800G, 400G, 100GBASE, 40GBASE, SFP10G, XFP-10GE, 4X25G, 2X50G, 10X10G, CTP-SFP-1GE, and known FC tokens. Dry-run first and exclude traps such as Cisco GLC-FE Fast Ethernet; do not treat all GLC-* as 1G. Update speed/speed_gbps, then mark details through the canonical evidence path with robot_name `verify:part-number-details`."}]}
|
||||||
|
{"id":"tip-20260510-price-status-model-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Separate real public prices from resolved no-public-price states."},{"role":"user","content":"How should TIP handle OEM, component, hyperscaler, test-equipment, and quote-only vendors with no public checkout price?"},{"role":"assistant","content":"Do not set `price_verified` unless there is a real public price observation. Add or use `price_status`: `public_price` for true price observations, `no_public_price` for source/policy-backed quote-only or OEM catalog cases, `needs_research` for real retail/source discovery work, and `ambiguous` for uncertain cases. Write `price_unavailable` evidence with the vendor, part number, reason, and source URL if available. Keep retail vendors such as 10Gtek, SFPcables, ShopFiber24, ATGBICS, Vcelink, FS.com and Flexoptix out of no-public-price policies because they should have real prices."}]}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user