feat: resolve OEM price status and part details
This commit is contained in:
parent
5819eb5eb0
commit
b58f7cee41
@ -17,6 +17,10 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
||||
const verStats = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE price_verified) AS price_verified,
|
||||
COUNT(*) FILTER (WHERE price_status = 'public_price') AS price_public_price,
|
||||
COUNT(*) FILTER (WHERE price_status = 'no_public_price') AS price_no_public_price,
|
||||
COUNT(*) FILTER (WHERE price_status = 'ambiguous') AS price_ambiguous,
|
||||
COUNT(*) FILTER (WHERE price_status IN ('unknown', 'needs_research')) AS price_needs_research,
|
||||
COUNT(*) FILTER (WHERE image_verified) AS image_verified,
|
||||
COUNT(*) FILTER (WHERE details_verified) AS details_verified,
|
||||
COUNT(*) FILTER (WHERE fully_verified) AS fully_verified,
|
||||
@ -101,6 +105,12 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
||||
},
|
||||
verification: {
|
||||
price_verified: Number(v.price_verified || 0),
|
||||
price_status: {
|
||||
public_price: Number(v.price_public_price || 0),
|
||||
no_public_price: Number(v.price_no_public_price || 0),
|
||||
ambiguous: Number(v.price_ambiguous || 0),
|
||||
needs_research: Number(v.price_needs_research || 0),
|
||||
},
|
||||
image_verified: Number(v.image_verified || 0),
|
||||
details_verified: Number(v.details_verified || 0),
|
||||
fully_verified: Number(v.fully_verified || 0),
|
||||
|
||||
@ -19,8 +19,10 @@
|
||||
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
|
||||
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
|
||||
"verify:product-page-assets": "tsx src/utils/verify-product-page-assets.ts",
|
||||
"verify:part-number-details": "tsx src/utils/verify-part-number-details.ts",
|
||||
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
|
||||
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
|
||||
"verify:price-availability": "tsx src/utils/resolve-price-availability.ts",
|
||||
"verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts",
|
||||
"verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts",
|
||||
"scrape:cisco": "tsx src/scrapers/cisco-tmg.ts",
|
||||
|
||||
@ -21,7 +21,7 @@ export const db = pool;
|
||||
|
||||
export async function recordVerificationEvidence(params: {
|
||||
transceiverId: string;
|
||||
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine";
|
||||
verificationType: "price" | "price_unavailable" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine";
|
||||
sourceUrl?: string;
|
||||
sourceVendorId?: string;
|
||||
evidenceValue?: Record<string, unknown>;
|
||||
|
||||
194
packages/scraper/src/utils/resolve-price-availability.ts
Normal file
194
packages/scraper/src/utils/resolve-price-availability.ts
Normal file
@ -0,0 +1,194 @@
|
||||
import { pool, recordVerificationEvidence } from "./db";
|
||||
|
||||
const EXCLUDED_CATEGORIES = [
|
||||
"NonTransceiver",
|
||||
"Accessory",
|
||||
"Adapter / Converter",
|
||||
"Switch / Media Converter",
|
||||
"Switch / Network Infrastructure",
|
||||
"NIC / Adapter",
|
||||
"Mux / Passive Optical",
|
||||
"Product Family",
|
||||
"Loopback / Test Module",
|
||||
];
|
||||
|
||||
const NO_PUBLIC_PRICE_VENDOR_POLICIES: Record<string, string> = {
|
||||
"ADTRAN": "oem_catalog_quote_only",
|
||||
"Alcatel-Lucent Enterprise": "oem_catalog_quote_only",
|
||||
"Allied Telesis": "oem_catalog_quote_only",
|
||||
"Arista Networks": "oem_catalog_quote_only",
|
||||
"Ascent Optics": "manufacturer_catalog_no_public_checkout_price",
|
||||
"Black Box": "enterprise_catalog_no_public_checkout_price",
|
||||
"Brocade": "oem_catalog_quote_only",
|
||||
"Calix": "oem_catalog_quote_only",
|
||||
"Cambium Networks": "oem_catalog_quote_only",
|
||||
"Check Point": "oem_catalog_quote_only",
|
||||
"Ciena": "oem_catalog_quote_only",
|
||||
"Cisco Systems": "oem_catalog_quote_only",
|
||||
"Coherent Corp": "manufacturer_catalog_no_public_checkout_price",
|
||||
"Dell Technologies": "oem_catalog_quote_only",
|
||||
"D-LINK": "oem_catalog_quote_only",
|
||||
"DZS": "oem_catalog_quote_only",
|
||||
"Eoptolink": "manufacturer_catalog_no_public_checkout_price",
|
||||
"Ericsson": "oem_catalog_quote_only",
|
||||
"EXFO": "test_equipment_vendor_quote_only",
|
||||
"Extreme Networks": "oem_catalog_quote_only",
|
||||
"F5 Networks": "oem_catalog_quote_only",
|
||||
"Fortinet": "oem_catalog_quote_only",
|
||||
"GAO Tek": "quote_request_only_no_public_checkout_price",
|
||||
"HPE Aruba Networking": "oem_catalog_quote_only",
|
||||
"Huawei": "oem_catalog_quote_only",
|
||||
"Infinera": "oem_catalog_quote_only",
|
||||
"Intel": "oem_catalog_quote_only",
|
||||
"II-VI / Coherent": "manufacturer_catalog_no_public_checkout_price",
|
||||
"Juniper Networks": "oem_catalog_quote_only",
|
||||
"Keysight Technologies": "test_equipment_vendor_quote_only",
|
||||
"LANCOM Systems": "oem_catalog_quote_only",
|
||||
"Lumentum": "manufacturer_catalog_no_public_checkout_price",
|
||||
"Marvell": "component_vendor_no_public_checkout_price",
|
||||
"MikroTik": "oem_catalog_quote_only",
|
||||
"Netgear": "oem_catalog_quote_only",
|
||||
"NVIDIA Mellanox": "oem_catalog_quote_only",
|
||||
"NVIDIA Networking": "oem_catalog_quote_only",
|
||||
"Nokia": "oem_catalog_quote_only",
|
||||
"Palo Alto Networks": "oem_catalog_quote_only",
|
||||
"Ribbon Communications": "oem_catalog_quote_only",
|
||||
"Ruckus Networks (CommScope)": "oem_catalog_quote_only",
|
||||
"SmartOptics": "manufacturer_catalog_no_public_checkout_price",
|
||||
"Solarflare": "oem_catalog_quote_only",
|
||||
"Spirent Communications": "test_equipment_vendor_quote_only",
|
||||
"Stordis": "oem_catalog_quote_only",
|
||||
"T&S Communication": "quote_request_only_or_price_zero_no_public_checkout_price",
|
||||
"Turbolink": "manufacturer_catalog_no_public_checkout_price",
|
||||
"Ubiquiti Networks": "oem_catalog_quote_only",
|
||||
"Viavi Solutions": "test_equipment_vendor_quote_only",
|
||||
"Zyxel": "oem_catalog_quote_only",
|
||||
"ZTE": "oem_catalog_quote_only",
|
||||
};
|
||||
|
||||
const NO_PUBLIC_PRICE_VENDOR_PATTERNS: Array<[RegExp, string]> = [
|
||||
[/\b(cisco|juniper|arista|nokia|huawei|hpe|hewlett|dell|extreme|brocade|foundry|ruckus|commscope|commScope|arris|aruba|viptela|pure storage|hitachi vantara|ibm storage)\b/i, "oem_catalog_quote_only"],
|
||||
[/\b(check point|palo alto|fortinet|sonicwall|sophos|watchguard|barracuda|hillstone)\b/i, "security_vendor_catalog_quote_only"],
|
||||
[/\b(hirschmann|siemens|ruggedcom|antaira|etherwan|red lion|korenix|perle|transition networks|rad data|moxa|omron|abb|beckhoff|belden|ge grid|phoenix contact)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
|
||||
[/\b(packetlight|harmonic|casa systems|fujitsu|fiberhome|nec|h3c|dlink|d-link|tp-link|netgear|zyxel|ubiquiti|lancom|mikrotik|senao|engenius|telco systems|tejas|teleste|packetfront|westermo|supermicro|coriant|sycamore|dragonwave|radiflow|datang|vecima|cradlepoint|ruijie|utstarcom|edgewater|grass valley|pica8|centec|ceragon|clearfield|cumulus networks|samsung networks|telrad|siklu|drivenets|pluribus|viasat|haivision|mavenir|audiocodes)\b/i, "oem_catalog_quote_only"],
|
||||
[/\b(anritsu|keysight|viavi|exfo|ixia|spirent|tektronix|rohde|netscout|teledyne lecroy)\b/i, "test_equipment_vendor_quote_only"],
|
||||
[/\b(intel|marvell|qlogic|chelsio|solarflare|emulex|lanner|kontron|advantech|source photonics|innolight|o-net|emcore|applied optoelectronics|cimc|sumitomo|lumentum|coherent|reflex photonics|ofs|corning|hisense broadband|oplink|accelink|neophotonics|skylane optics)\b/i, "component_vendor_no_public_checkout_price"],
|
||||
[/\b(adva|ekinops|infinera|ciena|ribbon|nortel|3com|avaya|comnet|comtrend|evertz|ip infusion|stordis|calix|dzs|adtran|allied telesis|eci telecom|zte access|netapp|isol?an|curtiss-wright|l3harris|sierra wireless|black box|ericsson radio)\b/i, "oem_catalog_quote_only"],
|
||||
[/\b(google cloud|microsoft azure|amazon web services|meta)\b/i, "hyperscaler_no_public_checkout_price"],
|
||||
[/\b(rockwell automation|schneider electric|schweitzer engineering)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
|
||||
[/\b(sonic|vmware|versa networks)\b/i, "software_platform_no_public_transceiver_price"],
|
||||
];
|
||||
|
||||
const PUBLIC_PRICE_VENDOR_PATTERNS = [
|
||||
/\b(10gtek|sfpcables|shopfiber24|gbics|fs\.com|flexoptix|naddod|qsfptek|atgbics|fiber24)\b/i,
|
||||
];
|
||||
|
||||
function getNoPublicPriceReason(vendorName: string): string | undefined {
|
||||
if (NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName]) return NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName];
|
||||
if (PUBLIC_PRICE_VENDOR_PATTERNS.some((regex) => regex.test(vendorName))) return undefined;
|
||||
for (const [regex, reason] of NO_PUBLIC_PRICE_VENDOR_PATTERNS) {
|
||||
if (regex.test(vendorName)) return reason;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
type Candidate = {
|
||||
id: string;
|
||||
vendorName: string;
|
||||
partNumber: string;
|
||||
productUrl: string | null;
|
||||
priceStatus: string | null;
|
||||
};
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const vendorFilter = (process.env["PRICE_AVAILABILITY_VENDOR"] || "").trim();
|
||||
const limit = parseInt(process.env["PRICE_AVAILABILITY_LIMIT"] || "1000", 10);
|
||||
const apply = process.env["PRICE_AVAILABILITY_APPLY"] === "1";
|
||||
const vendorNames = vendorFilter
|
||||
? vendorFilter.split(",").map((v) => v.trim()).filter(Boolean)
|
||||
: [];
|
||||
|
||||
const vendorWhere = vendorNames.length > 0 ? "AND v.name = ANY($3)" : "";
|
||||
const params: unknown[] = [EXCLUDED_CATEGORIES, limit];
|
||||
if (vendorNames.length > 0) params.push(vendorNames);
|
||||
|
||||
const result = await pool.query<Candidate>(
|
||||
`SELECT t.id,
|
||||
v.name AS "vendorName",
|
||||
t.part_number AS "partNumber",
|
||||
t.product_page_url AS "productUrl",
|
||||
COALESCE(t.price_status, 'unknown') AS "priceStatus"
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON v.id = t.vendor_id
|
||||
WHERE COALESCE(t.category, '') <> ALL($1)
|
||||
AND COALESCE(t.data_confidence, 'unknown') != 'garbage'
|
||||
AND COALESCE(t.price_verified, false) = false
|
||||
AND COALESCE(t.price_status, 'needs_research') IN ('unknown', 'needs_research', 'ambiguous')
|
||||
${vendorWhere}
|
||||
ORDER BY v.name, t.part_number
|
||||
LIMIT $2`,
|
||||
params,
|
||||
);
|
||||
|
||||
let noPublicPrice = 0;
|
||||
let skipped = 0;
|
||||
|
||||
console.log("=== Price availability resolver ===", { vendorNames, limit, apply, count: result.rows.length });
|
||||
|
||||
for (const row of result.rows) {
|
||||
const reason = getNoPublicPriceReason(row.vendorName);
|
||||
if (!reason) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!apply) {
|
||||
console.log("dry-run price availability", {
|
||||
vendor: row.vendorName,
|
||||
partNumber: row.partNumber,
|
||||
outcome: "no_public_price",
|
||||
reason,
|
||||
});
|
||||
noPublicPrice++;
|
||||
continue;
|
||||
}
|
||||
|
||||
await pool.query(
|
||||
`UPDATE transceivers
|
||||
SET price_status = 'no_public_price',
|
||||
price_status_updated_at = NOW(),
|
||||
price_unavailable_verified_at = COALESCE(price_unavailable_verified_at, NOW()),
|
||||
price_unavailable_reason = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[row.id, reason],
|
||||
);
|
||||
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: row.id,
|
||||
verificationType: "price_unavailable",
|
||||
sourceUrl: row.productUrl || undefined,
|
||||
evidenceValue: {
|
||||
outcome: "no_public_price",
|
||||
reason,
|
||||
vendor: row.vendorName,
|
||||
partNumber: row.partNumber,
|
||||
},
|
||||
robotName: "verify:price-availability",
|
||||
confidence: 0.9,
|
||||
});
|
||||
noPublicPrice++;
|
||||
}
|
||||
|
||||
console.log("Price availability resolver complete", { noPublicPrice, skipped, apply });
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => {
|
||||
console.error("Fatal:", err);
|
||||
pool.end();
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
160
packages/scraper/src/utils/verify-part-number-details.ts
Normal file
160
packages/scraper/src/utils/verify-part-number-details.ts
Normal file
@ -0,0 +1,160 @@
|
||||
import { pool, markDetailsVerified, recordVerificationEvidence } from "./db";
|
||||
|
||||
const EXCLUDED_CATEGORIES = [
|
||||
"NonTransceiver",
|
||||
"Accessory",
|
||||
"Adapter / Converter",
|
||||
"Switch / Media Converter",
|
||||
"Switch / Network Infrastructure",
|
||||
"NIC / Adapter",
|
||||
"Mux / Passive Optical",
|
||||
"Product Family",
|
||||
"Loopback / Test Module",
|
||||
];
|
||||
|
||||
type Candidate = {
|
||||
id: string;
|
||||
vendorName: string;
|
||||
partNumber: string;
|
||||
productUrl: string | null;
|
||||
formFactor: string | null;
|
||||
speedGbps: string | number | null;
|
||||
reachLabel: string | null;
|
||||
fiberType: string | null;
|
||||
};
|
||||
|
||||
function inferSpeed(partNumber: string): { speed: string; speedGbps: number; rule: string } | null {
|
||||
const pn = partNumber.toUpperCase();
|
||||
const compact = pn.replace(/[^A-Z0-9]+/g, "-");
|
||||
|
||||
const rules: Array<[RegExp, string, number, string]> = [
|
||||
[/\b(800G|800GE|800GBE)\b|JCO800|8X100G/, "800G", 800, "800G token"],
|
||||
[/\b(400G|400GE|400GBE|400ZR)\b|JCO400|4X100G/, "400G", 400, "400G token"],
|
||||
[/\b(200G|200GE|200GBE)\b|2X100G/, "200G", 200, "200G token"],
|
||||
[/\b(128GFC)\b/, "128G FC", 128, "128GFC token"],
|
||||
[/\b(100G|100GE|100GBE|CGE)\b|100GBASE|4X25G|2X50G|10X10G/, "100G", 100, "100G token"],
|
||||
[/\b(50G|50GE|50GBE)\b/, "50G", 50, "50G token"],
|
||||
[/\b(40G|40GE|40GBE)\b|40GBASE/, "40G", 40, "40G token"],
|
||||
[/\b(32GFC)\b/, "32G FC", 32, "32GFC token"],
|
||||
[/\b(25G|25GE|25GBE)\b|25GBASE/, "25G", 25, "25G token"],
|
||||
[/\b(16GFC)\b/, "16G FC", 16, "16GFC token"],
|
||||
[/\b(10G|10GE|10GBE)\b|10GBASE|SFP10G|XFP-10GE/, "10G", 10, "10G token"],
|
||||
[/\b(2[.-]?5G|2[.-]?5GE|2[.-]?5GBE)\b/, "2.5G", 2.5, "2.5G token"],
|
||||
[/\b(1G|1GE|1000BASE)\b|CTP-SFP-1GE|SFP-GE|GLC-(SX|LX|LH|ZX|EX|BX|TE|T)\b/, "1G", 1, "1G token"],
|
||||
];
|
||||
|
||||
for (const [regex, speed, speedGbps, rule] of rules) {
|
||||
if (regex.test(compact) || regex.test(pn)) {
|
||||
return { speed, speedGbps, rule };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const vendorFilter = (process.env["PART_DETAILS_VENDOR"] || "").trim();
|
||||
const limit = parseInt(process.env["PART_DETAILS_LIMIT"] || "1000", 10);
|
||||
const apply = process.env["PART_DETAILS_APPLY"] === "1";
|
||||
const vendorNames = vendorFilter.split(",").map((v) => v.trim()).filter(Boolean);
|
||||
|
||||
const vendorWhere = vendorNames.length > 0 ? "AND v.name = ANY($3)" : "";
|
||||
const params: unknown[] = [EXCLUDED_CATEGORIES, limit];
|
||||
if (vendorNames.length > 0) params.push(vendorNames);
|
||||
|
||||
const result = await pool.query<Candidate>(
|
||||
`SELECT t.id,
|
||||
v.name AS "vendorName",
|
||||
t.part_number AS "partNumber",
|
||||
t.product_page_url AS "productUrl",
|
||||
t.form_factor AS "formFactor",
|
||||
t.speed_gbps AS "speedGbps",
|
||||
t.reach_label AS "reachLabel",
|
||||
t.fiber_type AS "fiberType"
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON v.id = t.vendor_id
|
||||
WHERE COALESCE(t.category, '') <> ALL($1)
|
||||
AND COALESCE(t.data_confidence, 'unknown') != 'garbage'
|
||||
AND COALESCE(t.part_number, '') != ''
|
||||
AND COALESCE(t.form_factor, '') != ''
|
||||
AND COALESCE(t.reach_label, '') != ''
|
||||
AND COALESCE(t.fiber_type, '') != ''
|
||||
AND (t.speed_gbps IS NULL OR t.speed_gbps = 0 OR COALESCE(t.details_verified, false) = false)
|
||||
${vendorWhere}
|
||||
ORDER BY v.name, t.part_number
|
||||
LIMIT $2`,
|
||||
params,
|
||||
);
|
||||
|
||||
let candidates = 0;
|
||||
let updated = 0;
|
||||
let details = 0;
|
||||
let skipped = 0;
|
||||
|
||||
console.log("=== Part-number details verifier ===", { vendorNames, limit, apply, count: result.rows.length });
|
||||
|
||||
for (const row of result.rows) {
|
||||
const inferred = inferSpeed(row.partNumber);
|
||||
if (!inferred) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
candidates++;
|
||||
const currentSpeed = Number(row.speedGbps || 0);
|
||||
const shouldUpdateSpeed = currentSpeed === 0 || currentSpeed === null;
|
||||
if (!apply) {
|
||||
console.log("dry-run part details", {
|
||||
vendor: row.vendorName,
|
||||
partNumber: row.partNumber,
|
||||
currentSpeed,
|
||||
inferred,
|
||||
canMarkDetails: Boolean(row.formFactor && row.reachLabel && row.fiberType),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shouldUpdateSpeed) {
|
||||
await pool.query(
|
||||
`UPDATE transceivers
|
||||
SET speed = $2,
|
||||
speed_gbps = $3,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[row.id, inferred.speed, inferred.speedGbps],
|
||||
);
|
||||
updated++;
|
||||
}
|
||||
|
||||
const marked = await markDetailsVerified({
|
||||
transceiverId: row.id,
|
||||
sourceUrl: row.productUrl || undefined,
|
||||
});
|
||||
if (marked) details++;
|
||||
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: row.id,
|
||||
verificationType: "details",
|
||||
sourceUrl: row.productUrl || undefined,
|
||||
evidenceValue: {
|
||||
rule: "part_number_speed_inference",
|
||||
partNumber: row.partNumber,
|
||||
speed: inferred.speed,
|
||||
speedGbps: inferred.speedGbps,
|
||||
inferenceRule: inferred.rule,
|
||||
},
|
||||
robotName: "verify:part-number-details",
|
||||
confidence: 0.95,
|
||||
});
|
||||
}
|
||||
|
||||
console.log("Part-number details verifier complete", { candidates, updated, details, skipped, apply });
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => {
|
||||
console.error("Fatal:", err);
|
||||
pool.end();
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
@ -156,9 +156,9 @@ function inferDetails(text: string): InferredDetails {
|
||||
[/\b32\s*gfc\b/i, "32G FC", 32],
|
||||
[/\b16\s*gfc\b/i, "16G FC", 16],
|
||||
[/\b10\s*g(?:base)?\b/i, "10G", 10],
|
||||
[/\b1\.?25\s*g(?:base)?\b|\b1000base\b|\b1\s*g(?:base)?\b/i, "1G", 1],
|
||||
[/\b1[\s.-]?25\s*g(?:b(?:ase)?|bps)?\b|\b1000base\b|\b1\s*g(?:base)?\b/i, "1G", 1],
|
||||
[/(^|[^.\d-])25\s*g(?:base)?\b/i, "25G", 25],
|
||||
[/\b2[.-]?5\s*g(?:base)?\b/i, "2.5G", 2.5],
|
||||
[/\b2[\s.-]?5\s*g(?:base)?\b/i, "2.5G", 2.5],
|
||||
];
|
||||
for (const [regex, speed, speedGbps] of speedPatterns) {
|
||||
if (regex.test(text)) {
|
||||
|
||||
73
sql/105-price-status-and-unavailable-evidence.sql
Normal file
73
sql/105-price-status-and-unavailable-evidence.sql
Normal file
@ -0,0 +1,73 @@
|
||||
-- Migration 105: Explicit price availability state
|
||||
--
|
||||
-- Public price verification remains strict: price_verified requires a real
|
||||
-- source-backed price observation. Quote-only/OEM/no-public-price cases are
|
||||
-- represented separately so TIP can close research loops without inventing
|
||||
-- prices.
|
||||
|
||||
ALTER TABLE transceivers
|
||||
ADD COLUMN IF NOT EXISTS price_status VARCHAR(32) NOT NULL DEFAULT 'unknown',
|
||||
ADD COLUMN IF NOT EXISTS price_status_updated_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS price_unavailable_verified_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS price_unavailable_reason TEXT;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'transceivers_price_status_check'
|
||||
) THEN
|
||||
ALTER TABLE transceivers DROP CONSTRAINT transceivers_price_status_check;
|
||||
END IF;
|
||||
|
||||
ALTER TABLE transceivers
|
||||
ADD CONSTRAINT transceivers_price_status_check
|
||||
CHECK (price_status IN (
|
||||
'unknown',
|
||||
'public_price',
|
||||
'no_public_price',
|
||||
'needs_research',
|
||||
'ambiguous'
|
||||
));
|
||||
END $$;
|
||||
|
||||
UPDATE transceivers
|
||||
SET price_status = CASE
|
||||
WHEN price_verified = true THEN 'public_price'
|
||||
WHEN price_status = 'unknown' THEN 'needs_research'
|
||||
ELSE price_status
|
||||
END,
|
||||
price_status_updated_at = COALESCE(price_status_updated_at, NOW())
|
||||
WHERE price_status IS NULL
|
||||
OR price_status = 'unknown'
|
||||
OR price_verified = true;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_transceivers_price_status
|
||||
ON transceivers (price_status);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_transceivers_no_public_price
|
||||
ON transceivers (price_unavailable_verified_at)
|
||||
WHERE price_status = 'no_public_price';
|
||||
|
||||
ALTER TABLE transceiver_verification_evidence
|
||||
DROP CONSTRAINT IF EXISTS transceiver_verification_evidence_verification_type_check;
|
||||
|
||||
ALTER TABLE transceiver_verification_evidence
|
||||
ADD CONSTRAINT transceiver_verification_evidence_verification_type_check
|
||||
CHECK (
|
||||
verification_type::text = ANY (
|
||||
ARRAY[
|
||||
'price',
|
||||
'price_unavailable',
|
||||
'image',
|
||||
'details',
|
||||
'competitor_match',
|
||||
'competitor_no_match',
|
||||
'competitor_ambiguous',
|
||||
'artifact_quarantine'
|
||||
]
|
||||
)
|
||||
);
|
||||
|
||||
COMMENT ON COLUMN transceivers.price_status IS
|
||||
'Resolution state for price evidence: public_price, no_public_price, needs_research, ambiguous, unknown. price_verified remains true only for real public price observations.';
|
||||
@ -1,9 +1,50 @@
|
||||
# Current TIP Sync State
|
||||
|
||||
Updated: 2026-05-09 23:09 UTC
|
||||
Updated: 2026-05-09 23:15 UTC
|
||||
|
||||
## Newest Work
|
||||
|
||||
- TIP continuation on 2026-05-10 local / 2026-05-09 UTC:
|
||||
- added `verify:part-number-details`
|
||||
- deterministic part-number speed inference for rows where form factor/reach/fiber already exist but `speed_gbps=0`
|
||||
- dry-run caught Cisco `GLC-FE-*` as Fast Ethernet trap; rule hardened before apply
|
||||
- live apply:
|
||||
- Juniper Networks: `375` speed updates/details verified
|
||||
- Cisco Systems: `176` speed updates/details verified
|
||||
- evidence count `verify:part-number-details`: `551`
|
||||
- health detail count moved to `16913`
|
||||
- added migration `sql/105-price-status-and-unavailable-evidence.sql`
|
||||
- new `transceivers.price_status`
|
||||
- new `price_unavailable` evidence type
|
||||
- strict rule remains: `price_verified` only means a real public price observation exists
|
||||
- added `verify:price-availability`
|
||||
- resolves quote-only/OEM/manufacturer/test-equipment/hyperscaler vendors to `price_status=no_public_price`
|
||||
- writes `price_unavailable` evidence, does not fabricate price rows
|
||||
- preserves real retail/source-discovery cases as `needs_research`
|
||||
- Health API now exposes price status buckets
|
||||
- live price-status result:
|
||||
- `public_price=11414`
|
||||
- `no_public_price=5595`
|
||||
- `needs_research=186`
|
||||
- `ambiguous=0`
|
||||
- remaining price research is now limited to real retail/source-discovery vendors:
|
||||
- `10Gtek=126`
|
||||
- `SFPcables=31`
|
||||
- `ShopFiber24=24`
|
||||
- `ATGBICS=3`
|
||||
- `Vcelink=2`
|
||||
- SFPcables search tests for 10Gtek part numbers did not return reliable direct hits; remaining 10Gtek work is source/alias discovery, not no-public-price classification
|
||||
- live health:
|
||||
- active products: `17195`
|
||||
- price verified: `11414`
|
||||
- image verified: `12104`
|
||||
- details verified: `16913`
|
||||
- fully verified: `10505`
|
||||
- competitor status: `matched=10775`, `no_valid_match=74`, `ambiguous=556`, `needs_research=5790`
|
||||
- TIPLLM training pool updated with:
|
||||
- part-number details verifier rules
|
||||
- price_status/no-public-price model
|
||||
|
||||
- MAGATAMA all-lane RunPod training block started on 2026-05-09:
|
||||
- user requested all trainable LLM lanes via RunPod
|
||||
- lanes in scope:
|
||||
|
||||
@ -0,0 +1,108 @@
|
||||
# 2026-05-10 — TIP Price Status + OEM Detail Resolution
|
||||
|
||||
## Part-Number Details Verifier
|
||||
|
||||
Added `packages/scraper/src/utils/verify-part-number-details.ts`.
|
||||
|
||||
Purpose:
|
||||
|
||||
- close detail gaps where form factor, reach and fiber are already present but `speed_gbps=0`
|
||||
- use conservative part-number inference only
|
||||
- write canonical `details` evidence with robot `verify:part-number-details`
|
||||
|
||||
Safety catch:
|
||||
|
||||
- Cisco `GLC-FE-*` appeared in dry-run
|
||||
- these are Fast Ethernet, so the generic `GLC-* => 1G` rule was rejected
|
||||
- final rule only infers 1G for safe patterns such as `GLC-SX`, `GLC-LX`, `GLC-LH`, `GLC-ZX`, `GLC-EX`, `GLC-BX`, `GLC-T`, `GLC-TE`
|
||||
|
||||
Live apply:
|
||||
|
||||
- Juniper Networks:
|
||||
- `375` candidates updated
|
||||
- `375` details verified
|
||||
- Cisco Systems:
|
||||
- `176` candidates updated
|
||||
- `176` details verified
|
||||
- evidence:
|
||||
- `details | verify:part-number-details | 551`
|
||||
|
||||
Result:
|
||||
|
||||
- `details_verified` increased to `16913`
|
||||
- Juniper detail gaps dropped from `173` to `43`
|
||||
- Cisco detail gaps dropped from `146` to `80`
|
||||
|
||||
## Price Status Model
|
||||
|
||||
Added migration `sql/105-price-status-and-unavailable-evidence.sql`.
|
||||
|
||||
New fields:
|
||||
|
||||
- `transceivers.price_status`
|
||||
- `transceivers.price_status_updated_at`
|
||||
- `transceivers.price_unavailable_verified_at`
|
||||
- `transceivers.price_unavailable_reason`
|
||||
|
||||
Allowed states:
|
||||
|
||||
- `public_price`
|
||||
- `no_public_price`
|
||||
- `needs_research`
|
||||
- `ambiguous`
|
||||
- `unknown`
|
||||
|
||||
Important semantic rule:
|
||||
|
||||
- `price_verified=true` still requires a real public price observation
|
||||
- quote-only/OEM/no-checkout cases use `price_status=no_public_price`
|
||||
- no price rows are fabricated
|
||||
|
||||
Added `price_unavailable` evidence type.
|
||||
|
||||
Added `packages/scraper/src/utils/resolve-price-availability.ts`.
|
||||
|
||||
Live apply:
|
||||
|
||||
- resolved quote-only/OEM/manufacturer/test-equipment/hyperscaler vendors to `no_public_price`
|
||||
- wrote `5361` `price_unavailable` evidence rows
|
||||
- preserved real retail/source-discovery vendors as `needs_research`
|
||||
|
||||
Health API now exposes:
|
||||
|
||||
- `price_status.public_price`
|
||||
- `price_status.no_public_price`
|
||||
- `price_status.ambiguous`
|
||||
- `price_status.needs_research`
|
||||
|
||||
Live health after this pass:
|
||||
|
||||
- active products: `17195`
|
||||
- price verified: `11414`
|
||||
- price status:
|
||||
- `public_price=11414`
|
||||
- `no_public_price=5595`
|
||||
- `needs_research=186`
|
||||
- `ambiguous=0`
|
||||
- image verified: `12104`
|
||||
- details verified: `16913`
|
||||
- fully verified: `10505`
|
||||
- competitor status:
|
||||
- `matched=10775`
|
||||
- `no_valid_match=74`
|
||||
- `ambiguous=556`
|
||||
- `needs_research=5790`
|
||||
|
||||
Remaining price research:
|
||||
|
||||
- `10Gtek=126`
|
||||
- `SFPcables=31`
|
||||
- `ShopFiber24=24`
|
||||
- `ATGBICS=3`
|
||||
- `Vcelink=2`
|
||||
|
||||
Interpretation:
|
||||
|
||||
- remaining price work is now real retail/source discovery
|
||||
- SFPcables search tests for 10Gtek part numbers did not return reliable direct hits
|
||||
- 10Gtek requires a source/alias discovery robot rather than no-public-price classification
|
||||
@ -49,3 +49,5 @@
|
||||
{"id":"tip-20260509-gbics-ts-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Keep active transceiver counts honest by quarantining vendor navigation, category, PDF, cable, and infrastructure artifacts."},{"role":"user","content":"What did the GBICS and T&S cleanup teach the crawler robots?"},{"role":"assistant","content":"GBICS rows such as category labels, patch leads, active optical cables, direct-attach cables, Choose By pages, cart pages, vendor pages, NVIDIA/ConnectX/Cisco/WatchGuard/HP pages, network adapters, media converters, AOC/DAC and MTP/MPO cable pages are not active transceiver SKUs. T&S rows with non-/products URLs, PDFs, generic fiber-optic-transceivers family URLs, Product-Brochure names, or broad 100G/200G/400G-Transceiver family labels are artifacts. Quarantine these outside the active verification base, clear verification flags, and write `artifact_quarantine` evidence before recalculating health."}]}
|
||||
{"id":"tip-20260509-product-page-asset-verifier-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design deterministic product-page verifiers that improve evidence without broad crawling or invented values."},{"role":"user","content":"How should TIP safely backfill images and details from existing vendor product URLs?"},{"role":"assistant","content":"Use a targeted product-page asset verifier. Select active rows with an existing product_page_url and missing image/details; fetch one page per row with low concurrency; extract only likely product images from og:image, twitter:image, Magento product image IDs, or filtered product image URLs; infer details only from part number, product URL, and page title, not from full body navigation. Always dry-run first to catch navigation pollution such as T&S 800G menu links or 1.25G being misread as 25G. Apply through `markImageVerified` and `markDetailsVerified` so evidence is recorded."}]}
|
||||
{"id":"tip-20260509-quote-only-tscom-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Public-price verification requires a real public price."},{"role":"user","content":"T&S Communication product pages have valid images and details but expose no real public prices, only no price or price 0.00. What should TIP do?"},{"role":"assistant","content":"Do not mark price_verified. Treat T&S as image/detail-verifiable but public-price-missing until a real price observation exists. `price: 0.00`, quote/request flows, missing amount fields, or SEO filenames containing `price` are not price evidence. Keep image_verified/details_verified true when source-backed, keep price_verified false, and document the vendor as quote-only/public-price-unavailable rather than fabricating a market price."}]}
|
||||
{"id":"tip-20260510-part-number-details-verifier-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Use deterministic part-number inference only for conservative, auditable detail repair."},{"role":"user","content":"How should TIP close OEM detail gaps when form factor, reach and fiber are known but speed_gbps is zero?"},{"role":"assistant","content":"Use a part-number details verifier. Infer speed only from unambiguous tokens such as 800G, 400G, 100GBASE, 40GBASE, SFP10G, XFP-10GE, 4X25G, 2X50G, 10X10G, CTP-SFP-1GE, and known FC tokens. Dry-run first and exclude traps such as Cisco GLC-FE Fast Ethernet; do not treat all GLC-* as 1G. Update speed/speed_gbps, then mark details through the canonical evidence path with robot_name `verify:part-number-details`."}]}
|
||||
{"id":"tip-20260510-price-status-model-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Separate real public prices from resolved no-public-price states."},{"role":"user","content":"How should TIP handle OEM, component, hyperscaler, test-equipment, and quote-only vendors with no public checkout price?"},{"role":"assistant","content":"Do not set `price_verified` unless there is a real public price observation. Add or use `price_status`: `public_price` for true price observations, `no_public_price` for source/policy-backed quote-only or OEM catalog cases, `needs_research` for real retail/source discovery work, and `ambiguous` for uncertain cases. Write `price_unavailable` evidence with the vendor, part number, reason, and source URL if available. Keep retail vendors such as 10Gtek, SFPcables, ShopFiber24, ATGBICS, Vcelink, FS.com and Flexoptix out of no-public-price policies because they should have real prices."}]}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user