feat: resolve OEM price status and part details

This commit is contained in:
Rene Fichtmueller 2026-05-10 01:16:49 +02:00
parent 5819eb5eb0
commit b58f7cee41
10 changed files with 594 additions and 4 deletions

View File

@ -17,6 +17,10 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
const verStats = await pool.query(` const verStats = await pool.query(`
SELECT SELECT
COUNT(*) FILTER (WHERE price_verified) AS price_verified, COUNT(*) FILTER (WHERE price_verified) AS price_verified,
COUNT(*) FILTER (WHERE price_status = 'public_price') AS price_public_price,
COUNT(*) FILTER (WHERE price_status = 'no_public_price') AS price_no_public_price,
COUNT(*) FILTER (WHERE price_status = 'ambiguous') AS price_ambiguous,
COUNT(*) FILTER (WHERE price_status IN ('unknown', 'needs_research')) AS price_needs_research,
COUNT(*) FILTER (WHERE image_verified) AS image_verified, COUNT(*) FILTER (WHERE image_verified) AS image_verified,
COUNT(*) FILTER (WHERE details_verified) AS details_verified, COUNT(*) FILTER (WHERE details_verified) AS details_verified,
COUNT(*) FILTER (WHERE fully_verified) AS fully_verified, COUNT(*) FILTER (WHERE fully_verified) AS fully_verified,
@ -101,6 +105,12 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
}, },
verification: { verification: {
price_verified: Number(v.price_verified || 0), price_verified: Number(v.price_verified || 0),
price_status: {
public_price: Number(v.price_public_price || 0),
no_public_price: Number(v.price_no_public_price || 0),
ambiguous: Number(v.price_ambiguous || 0),
needs_research: Number(v.price_needs_research || 0),
},
image_verified: Number(v.image_verified || 0), image_verified: Number(v.image_verified || 0),
details_verified: Number(v.details_verified || 0), details_verified: Number(v.details_verified || 0),
fully_verified: Number(v.fully_verified || 0), fully_verified: Number(v.fully_verified || 0),

View File

@ -19,8 +19,10 @@
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts", "verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts", "verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
"verify:product-page-assets": "tsx src/utils/verify-product-page-assets.ts", "verify:product-page-assets": "tsx src/utils/verify-product-page-assets.ts",
"verify:part-number-details": "tsx src/utils/verify-part-number-details.ts",
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts", "verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts", "verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
"verify:price-availability": "tsx src/utils/resolve-price-availability.ts",
"verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts", "verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts",
"verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts", "verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts",
"scrape:cisco": "tsx src/scrapers/cisco-tmg.ts", "scrape:cisco": "tsx src/scrapers/cisco-tmg.ts",

View File

@ -21,7 +21,7 @@ export const db = pool;
export async function recordVerificationEvidence(params: { export async function recordVerificationEvidence(params: {
transceiverId: string; transceiverId: string;
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine"; verificationType: "price" | "price_unavailable" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine";
sourceUrl?: string; sourceUrl?: string;
sourceVendorId?: string; sourceVendorId?: string;
evidenceValue?: Record<string, unknown>; evidenceValue?: Record<string, unknown>;

View File

@ -0,0 +1,194 @@
import { pool, recordVerificationEvidence } from "./db";
const EXCLUDED_CATEGORIES = [
"NonTransceiver",
"Accessory",
"Adapter / Converter",
"Switch / Media Converter",
"Switch / Network Infrastructure",
"NIC / Adapter",
"Mux / Passive Optical",
"Product Family",
"Loopback / Test Module",
];
const NO_PUBLIC_PRICE_VENDOR_POLICIES: Record<string, string> = {
"ADTRAN": "oem_catalog_quote_only",
"Alcatel-Lucent Enterprise": "oem_catalog_quote_only",
"Allied Telesis": "oem_catalog_quote_only",
"Arista Networks": "oem_catalog_quote_only",
"Ascent Optics": "manufacturer_catalog_no_public_checkout_price",
"Black Box": "enterprise_catalog_no_public_checkout_price",
"Brocade": "oem_catalog_quote_only",
"Calix": "oem_catalog_quote_only",
"Cambium Networks": "oem_catalog_quote_only",
"Check Point": "oem_catalog_quote_only",
"Ciena": "oem_catalog_quote_only",
"Cisco Systems": "oem_catalog_quote_only",
"Coherent Corp": "manufacturer_catalog_no_public_checkout_price",
"Dell Technologies": "oem_catalog_quote_only",
"D-LINK": "oem_catalog_quote_only",
"DZS": "oem_catalog_quote_only",
"Eoptolink": "manufacturer_catalog_no_public_checkout_price",
"Ericsson": "oem_catalog_quote_only",
"EXFO": "test_equipment_vendor_quote_only",
"Extreme Networks": "oem_catalog_quote_only",
"F5 Networks": "oem_catalog_quote_only",
"Fortinet": "oem_catalog_quote_only",
"GAO Tek": "quote_request_only_no_public_checkout_price",
"HPE Aruba Networking": "oem_catalog_quote_only",
"Huawei": "oem_catalog_quote_only",
"Infinera": "oem_catalog_quote_only",
"Intel": "oem_catalog_quote_only",
"II-VI / Coherent": "manufacturer_catalog_no_public_checkout_price",
"Juniper Networks": "oem_catalog_quote_only",
"Keysight Technologies": "test_equipment_vendor_quote_only",
"LANCOM Systems": "oem_catalog_quote_only",
"Lumentum": "manufacturer_catalog_no_public_checkout_price",
"Marvell": "component_vendor_no_public_checkout_price",
"MikroTik": "oem_catalog_quote_only",
"Netgear": "oem_catalog_quote_only",
"NVIDIA Mellanox": "oem_catalog_quote_only",
"NVIDIA Networking": "oem_catalog_quote_only",
"Nokia": "oem_catalog_quote_only",
"Palo Alto Networks": "oem_catalog_quote_only",
"Ribbon Communications": "oem_catalog_quote_only",
"Ruckus Networks (CommScope)": "oem_catalog_quote_only",
"SmartOptics": "manufacturer_catalog_no_public_checkout_price",
"Solarflare": "oem_catalog_quote_only",
"Spirent Communications": "test_equipment_vendor_quote_only",
"Stordis": "oem_catalog_quote_only",
"T&S Communication": "quote_request_only_or_price_zero_no_public_checkout_price",
"Turbolink": "manufacturer_catalog_no_public_checkout_price",
"Ubiquiti Networks": "oem_catalog_quote_only",
"Viavi Solutions": "test_equipment_vendor_quote_only",
"Zyxel": "oem_catalog_quote_only",
"ZTE": "oem_catalog_quote_only",
};
const NO_PUBLIC_PRICE_VENDOR_PATTERNS: Array<[RegExp, string]> = [
[/\b(cisco|juniper|arista|nokia|huawei|hpe|hewlett|dell|extreme|brocade|foundry|ruckus|commscope|commScope|arris|aruba|viptela|pure storage|hitachi vantara|ibm storage)\b/i, "oem_catalog_quote_only"],
[/\b(check point|palo alto|fortinet|sonicwall|sophos|watchguard|barracuda|hillstone)\b/i, "security_vendor_catalog_quote_only"],
[/\b(hirschmann|siemens|ruggedcom|antaira|etherwan|red lion|korenix|perle|transition networks|rad data|moxa|omron|abb|beckhoff|belden|ge grid|phoenix contact)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
[/\b(packetlight|harmonic|casa systems|fujitsu|fiberhome|nec|h3c|dlink|d-link|tp-link|netgear|zyxel|ubiquiti|lancom|mikrotik|senao|engenius|telco systems|tejas|teleste|packetfront|westermo|supermicro|coriant|sycamore|dragonwave|radiflow|datang|vecima|cradlepoint|ruijie|utstarcom|edgewater|grass valley|pica8|centec|ceragon|clearfield|cumulus networks|samsung networks|telrad|siklu|drivenets|pluribus|viasat|haivision|mavenir|audiocodes)\b/i, "oem_catalog_quote_only"],
[/\b(anritsu|keysight|viavi|exfo|ixia|spirent|tektronix|rohde|netscout|teledyne lecroy)\b/i, "test_equipment_vendor_quote_only"],
[/\b(intel|marvell|qlogic|chelsio|solarflare|emulex|lanner|kontron|advantech|source photonics|innolight|o-net|emcore|applied optoelectronics|cimc|sumitomo|lumentum|coherent|reflex photonics|ofs|corning|hisense broadband|oplink|accelink|neophotonics|skylane optics)\b/i, "component_vendor_no_public_checkout_price"],
[/\b(adva|ekinops|infinera|ciena|ribbon|nortel|3com|avaya|comnet|comtrend|evertz|ip infusion|stordis|calix|dzs|adtran|allied telesis|eci telecom|zte access|netapp|isol?an|curtiss-wright|l3harris|sierra wireless|black box|ericsson radio)\b/i, "oem_catalog_quote_only"],
[/\b(google cloud|microsoft azure|amazon web services|meta)\b/i, "hyperscaler_no_public_checkout_price"],
[/\b(rockwell automation|schneider electric|schweitzer engineering)\b/i, "industrial_networking_catalog_no_public_checkout_price"],
[/\b(sonic|vmware|versa networks)\b/i, "software_platform_no_public_transceiver_price"],
];
const PUBLIC_PRICE_VENDOR_PATTERNS = [
/\b(10gtek|sfpcables|shopfiber24|gbics|fs\.com|flexoptix|naddod|qsfptek|atgbics|fiber24)\b/i,
];
function getNoPublicPriceReason(vendorName: string): string | undefined {
if (NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName]) return NO_PUBLIC_PRICE_VENDOR_POLICIES[vendorName];
if (PUBLIC_PRICE_VENDOR_PATTERNS.some((regex) => regex.test(vendorName))) return undefined;
for (const [regex, reason] of NO_PUBLIC_PRICE_VENDOR_PATTERNS) {
if (regex.test(vendorName)) return reason;
}
return undefined;
}
type Candidate = {
id: string;
vendorName: string;
partNumber: string;
productUrl: string | null;
priceStatus: string | null;
};
async function main(): Promise<void> {
const vendorFilter = (process.env["PRICE_AVAILABILITY_VENDOR"] || "").trim();
const limit = parseInt(process.env["PRICE_AVAILABILITY_LIMIT"] || "1000", 10);
const apply = process.env["PRICE_AVAILABILITY_APPLY"] === "1";
const vendorNames = vendorFilter
? vendorFilter.split(",").map((v) => v.trim()).filter(Boolean)
: [];
const vendorWhere = vendorNames.length > 0 ? "AND v.name = ANY($3)" : "";
const params: unknown[] = [EXCLUDED_CATEGORIES, limit];
if (vendorNames.length > 0) params.push(vendorNames);
const result = await pool.query<Candidate>(
`SELECT t.id,
v.name AS "vendorName",
t.part_number AS "partNumber",
t.product_page_url AS "productUrl",
COALESCE(t.price_status, 'unknown') AS "priceStatus"
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id
WHERE COALESCE(t.category, '') <> ALL($1)
AND COALESCE(t.data_confidence, 'unknown') != 'garbage'
AND COALESCE(t.price_verified, false) = false
AND COALESCE(t.price_status, 'needs_research') IN ('unknown', 'needs_research', 'ambiguous')
${vendorWhere}
ORDER BY v.name, t.part_number
LIMIT $2`,
params,
);
let noPublicPrice = 0;
let skipped = 0;
console.log("=== Price availability resolver ===", { vendorNames, limit, apply, count: result.rows.length });
for (const row of result.rows) {
const reason = getNoPublicPriceReason(row.vendorName);
if (!reason) {
skipped++;
continue;
}
if (!apply) {
console.log("dry-run price availability", {
vendor: row.vendorName,
partNumber: row.partNumber,
outcome: "no_public_price",
reason,
});
noPublicPrice++;
continue;
}
await pool.query(
`UPDATE transceivers
SET price_status = 'no_public_price',
price_status_updated_at = NOW(),
price_unavailable_verified_at = COALESCE(price_unavailable_verified_at, NOW()),
price_unavailable_reason = $2,
updated_at = NOW()
WHERE id = $1`,
[row.id, reason],
);
await recordVerificationEvidence({
transceiverId: row.id,
verificationType: "price_unavailable",
sourceUrl: row.productUrl || undefined,
evidenceValue: {
outcome: "no_public_price",
reason,
vendor: row.vendorName,
partNumber: row.partNumber,
},
robotName: "verify:price-availability",
confidence: 0.9,
});
noPublicPrice++;
}
console.log("Price availability resolver complete", { noPublicPrice, skipped, apply });
}
if (require.main === module) {
main()
.then(() => pool.end())
.catch((err) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});
}

View File

@ -0,0 +1,160 @@
import { pool, markDetailsVerified, recordVerificationEvidence } from "./db";
const EXCLUDED_CATEGORIES = [
"NonTransceiver",
"Accessory",
"Adapter / Converter",
"Switch / Media Converter",
"Switch / Network Infrastructure",
"NIC / Adapter",
"Mux / Passive Optical",
"Product Family",
"Loopback / Test Module",
];
type Candidate = {
id: string;
vendorName: string;
partNumber: string;
productUrl: string | null;
formFactor: string | null;
speedGbps: string | number | null;
reachLabel: string | null;
fiberType: string | null;
};
function inferSpeed(partNumber: string): { speed: string; speedGbps: number; rule: string } | null {
const pn = partNumber.toUpperCase();
const compact = pn.replace(/[^A-Z0-9]+/g, "-");
const rules: Array<[RegExp, string, number, string]> = [
[/\b(800G|800GE|800GBE)\b|JCO800|8X100G/, "800G", 800, "800G token"],
[/\b(400G|400GE|400GBE|400ZR)\b|JCO400|4X100G/, "400G", 400, "400G token"],
[/\b(200G|200GE|200GBE)\b|2X100G/, "200G", 200, "200G token"],
[/\b(128GFC)\b/, "128G FC", 128, "128GFC token"],
[/\b(100G|100GE|100GBE|CGE)\b|100GBASE|4X25G|2X50G|10X10G/, "100G", 100, "100G token"],
[/\b(50G|50GE|50GBE)\b/, "50G", 50, "50G token"],
[/\b(40G|40GE|40GBE)\b|40GBASE/, "40G", 40, "40G token"],
[/\b(32GFC)\b/, "32G FC", 32, "32GFC token"],
[/\b(25G|25GE|25GBE)\b|25GBASE/, "25G", 25, "25G token"],
[/\b(16GFC)\b/, "16G FC", 16, "16GFC token"],
[/\b(10G|10GE|10GBE)\b|10GBASE|SFP10G|XFP-10GE/, "10G", 10, "10G token"],
[/\b(2[.-]?5G|2[.-]?5GE|2[.-]?5GBE)\b/, "2.5G", 2.5, "2.5G token"],
[/\b(1G|1GE|1000BASE)\b|CTP-SFP-1GE|SFP-GE|GLC-(SX|LX|LH|ZX|EX|BX|TE|T)\b/, "1G", 1, "1G token"],
];
for (const [regex, speed, speedGbps, rule] of rules) {
if (regex.test(compact) || regex.test(pn)) {
return { speed, speedGbps, rule };
}
}
return null;
}
async function main(): Promise<void> {
const vendorFilter = (process.env["PART_DETAILS_VENDOR"] || "").trim();
const limit = parseInt(process.env["PART_DETAILS_LIMIT"] || "1000", 10);
const apply = process.env["PART_DETAILS_APPLY"] === "1";
const vendorNames = vendorFilter.split(",").map((v) => v.trim()).filter(Boolean);
const vendorWhere = vendorNames.length > 0 ? "AND v.name = ANY($3)" : "";
const params: unknown[] = [EXCLUDED_CATEGORIES, limit];
if (vendorNames.length > 0) params.push(vendorNames);
const result = await pool.query<Candidate>(
`SELECT t.id,
v.name AS "vendorName",
t.part_number AS "partNumber",
t.product_page_url AS "productUrl",
t.form_factor AS "formFactor",
t.speed_gbps AS "speedGbps",
t.reach_label AS "reachLabel",
t.fiber_type AS "fiberType"
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id
WHERE COALESCE(t.category, '') <> ALL($1)
AND COALESCE(t.data_confidence, 'unknown') != 'garbage'
AND COALESCE(t.part_number, '') != ''
AND COALESCE(t.form_factor, '') != ''
AND COALESCE(t.reach_label, '') != ''
AND COALESCE(t.fiber_type, '') != ''
AND (t.speed_gbps IS NULL OR t.speed_gbps = 0 OR COALESCE(t.details_verified, false) = false)
${vendorWhere}
ORDER BY v.name, t.part_number
LIMIT $2`,
params,
);
let candidates = 0;
let updated = 0;
let details = 0;
let skipped = 0;
console.log("=== Part-number details verifier ===", { vendorNames, limit, apply, count: result.rows.length });
for (const row of result.rows) {
const inferred = inferSpeed(row.partNumber);
if (!inferred) {
skipped++;
continue;
}
candidates++;
const currentSpeed = Number(row.speedGbps || 0);
const shouldUpdateSpeed = currentSpeed === 0 || currentSpeed === null;
if (!apply) {
console.log("dry-run part details", {
vendor: row.vendorName,
partNumber: row.partNumber,
currentSpeed,
inferred,
canMarkDetails: Boolean(row.formFactor && row.reachLabel && row.fiberType),
});
continue;
}
if (shouldUpdateSpeed) {
await pool.query(
`UPDATE transceivers
SET speed = $2,
speed_gbps = $3,
updated_at = NOW()
WHERE id = $1`,
[row.id, inferred.speed, inferred.speedGbps],
);
updated++;
}
const marked = await markDetailsVerified({
transceiverId: row.id,
sourceUrl: row.productUrl || undefined,
});
if (marked) details++;
await recordVerificationEvidence({
transceiverId: row.id,
verificationType: "details",
sourceUrl: row.productUrl || undefined,
evidenceValue: {
rule: "part_number_speed_inference",
partNumber: row.partNumber,
speed: inferred.speed,
speedGbps: inferred.speedGbps,
inferenceRule: inferred.rule,
},
robotName: "verify:part-number-details",
confidence: 0.95,
});
}
console.log("Part-number details verifier complete", { candidates, updated, details, skipped, apply });
}
if (require.main === module) {
main()
.then(() => pool.end())
.catch((err) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});
}

View File

@ -156,9 +156,9 @@ function inferDetails(text: string): InferredDetails {
[/\b32\s*gfc\b/i, "32G FC", 32], [/\b32\s*gfc\b/i, "32G FC", 32],
[/\b16\s*gfc\b/i, "16G FC", 16], [/\b16\s*gfc\b/i, "16G FC", 16],
[/\b10\s*g(?:base)?\b/i, "10G", 10], [/\b10\s*g(?:base)?\b/i, "10G", 10],
[/\b1\.?25\s*g(?:base)?\b|\b1000base\b|\b1\s*g(?:base)?\b/i, "1G", 1], [/\b1[\s.-]?25\s*g(?:b(?:ase)?|bps)?\b|\b1000base\b|\b1\s*g(?:base)?\b/i, "1G", 1],
[/(^|[^.\d-])25\s*g(?:base)?\b/i, "25G", 25], [/(^|[^.\d-])25\s*g(?:base)?\b/i, "25G", 25],
[/\b2[.-]?5\s*g(?:base)?\b/i, "2.5G", 2.5], [/\b2[\s.-]?5\s*g(?:base)?\b/i, "2.5G", 2.5],
]; ];
for (const [regex, speed, speedGbps] of speedPatterns) { for (const [regex, speed, speedGbps] of speedPatterns) {
if (regex.test(text)) { if (regex.test(text)) {

View File

@ -0,0 +1,73 @@
-- Migration 105: Explicit price availability state
--
-- Public price verification remains strict: price_verified requires a real
-- source-backed price observation. Quote-only/OEM/no-public-price cases are
-- represented separately so TIP can close research loops without inventing
-- prices.
ALTER TABLE transceivers
ADD COLUMN IF NOT EXISTS price_status VARCHAR(32) NOT NULL DEFAULT 'unknown',
ADD COLUMN IF NOT EXISTS price_status_updated_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS price_unavailable_verified_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS price_unavailable_reason TEXT;
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'transceivers_price_status_check'
) THEN
ALTER TABLE transceivers DROP CONSTRAINT transceivers_price_status_check;
END IF;
ALTER TABLE transceivers
ADD CONSTRAINT transceivers_price_status_check
CHECK (price_status IN (
'unknown',
'public_price',
'no_public_price',
'needs_research',
'ambiguous'
));
END $$;
UPDATE transceivers
SET price_status = CASE
WHEN price_verified = true THEN 'public_price'
WHEN price_status = 'unknown' THEN 'needs_research'
ELSE price_status
END,
price_status_updated_at = COALESCE(price_status_updated_at, NOW())
WHERE price_status IS NULL
OR price_status = 'unknown'
OR price_verified = true;
CREATE INDEX IF NOT EXISTS idx_transceivers_price_status
ON transceivers (price_status);
CREATE INDEX IF NOT EXISTS idx_transceivers_no_public_price
ON transceivers (price_unavailable_verified_at)
WHERE price_status = 'no_public_price';
ALTER TABLE transceiver_verification_evidence
DROP CONSTRAINT IF EXISTS transceiver_verification_evidence_verification_type_check;
ALTER TABLE transceiver_verification_evidence
ADD CONSTRAINT transceiver_verification_evidence_verification_type_check
CHECK (
verification_type::text = ANY (
ARRAY[
'price',
'price_unavailable',
'image',
'details',
'competitor_match',
'competitor_no_match',
'competitor_ambiguous',
'artifact_quarantine'
]
)
);
COMMENT ON COLUMN transceivers.price_status IS
'Resolution state for price evidence: public_price, no_public_price, needs_research, ambiguous, unknown. price_verified remains true only for real public price observations.';

View File

@ -1,9 +1,50 @@
# Current TIP Sync State # Current TIP Sync State
Updated: 2026-05-09 23:09 UTC Updated: 2026-05-09 23:15 UTC
## Newest Work ## Newest Work
- TIP continuation on 2026-05-10 local / 2026-05-09 UTC:
- added `verify:part-number-details`
- deterministic part-number speed inference for rows where form factor/reach/fiber already exist but `speed_gbps=0`
- dry-run caught Cisco `GLC-FE-*` as Fast Ethernet trap; rule hardened before apply
- live apply:
- Juniper Networks: `375` speed updates/details verified
- Cisco Systems: `176` speed updates/details verified
- evidence count `verify:part-number-details`: `551`
- health detail count moved to `16913`
- added migration `sql/105-price-status-and-unavailable-evidence.sql`
- new `transceivers.price_status`
- new `price_unavailable` evidence type
- strict rule remains: `price_verified` only means a real public price observation exists
- added `verify:price-availability`
- resolves quote-only/OEM/manufacturer/test-equipment/hyperscaler vendors to `price_status=no_public_price`
- writes `price_unavailable` evidence, does not fabricate price rows
- preserves real retail/source-discovery cases as `needs_research`
- Health API now exposes price status buckets
- live price-status result:
- `public_price=11414`
- `no_public_price=5595`
- `needs_research=186`
- `ambiguous=0`
- remaining price research is now limited to real retail/source-discovery vendors:
- `10Gtek=126`
- `SFPcables=31`
- `ShopFiber24=24`
- `ATGBICS=3`
- `Vcelink=2`
- SFPcables search tests for 10Gtek part numbers did not return reliable direct hits; remaining 10Gtek work is source/alias discovery, not no-public-price classification
- live health:
- active products: `17195`
- price verified: `11414`
- image verified: `12104`
- details verified: `16913`
- fully verified: `10505`
- competitor status: `matched=10775`, `no_valid_match=74`, `ambiguous=556`, `needs_research=5790`
- TIPLLM training pool updated with:
- part-number details verifier rules
- price_status/no-public-price model
- MAGATAMA all-lane RunPod training block started on 2026-05-09: - MAGATAMA all-lane RunPod training block started on 2026-05-09:
- user requested all trainable LLM lanes via RunPod - user requested all trainable LLM lanes via RunPod
- lanes in scope: - lanes in scope:

View File

@ -0,0 +1,108 @@
# 2026-05-10 — TIP Price Status + OEM Detail Resolution
## Part-Number Details Verifier
Added `packages/scraper/src/utils/verify-part-number-details.ts`.
Purpose:
- close detail gaps where form factor, reach and fiber are already present but `speed_gbps=0`
- use conservative part-number inference only
- write canonical `details` evidence with robot `verify:part-number-details`
Safety catch:
- Cisco `GLC-FE-*` appeared in dry-run
- these are Fast Ethernet, so the generic `GLC-* => 1G` rule was rejected
- final rule only infers 1G for safe patterns such as `GLC-SX`, `GLC-LX`, `GLC-LH`, `GLC-ZX`, `GLC-EX`, `GLC-BX`, `GLC-T`, `GLC-TE`
Live apply:
- Juniper Networks:
- `375` candidates updated
- `375` details verified
- Cisco Systems:
- `176` candidates updated
- `176` details verified
- evidence:
- `details | verify:part-number-details | 551`
Result:
- `details_verified` increased to `16913`
- Juniper detail gaps dropped from `173` to `43`
- Cisco detail gaps dropped from `146` to `80`
## Price Status Model
Added migration `sql/105-price-status-and-unavailable-evidence.sql`.
New fields:
- `transceivers.price_status`
- `transceivers.price_status_updated_at`
- `transceivers.price_unavailable_verified_at`
- `transceivers.price_unavailable_reason`
Allowed states:
- `public_price`
- `no_public_price`
- `needs_research`
- `ambiguous`
- `unknown`
Important semantic rule:
- `price_verified=true` still requires a real public price observation
- quote-only/OEM/no-checkout cases use `price_status=no_public_price`
- no price rows are fabricated
Added `price_unavailable` evidence type.
Added `packages/scraper/src/utils/resolve-price-availability.ts`.
Live apply:
- resolved quote-only/OEM/manufacturer/test-equipment/hyperscaler vendors to `no_public_price`
- wrote `5361` `price_unavailable` evidence rows
- preserved real retail/source-discovery vendors as `needs_research`
Health API now exposes:
- `price_status.public_price`
- `price_status.no_public_price`
- `price_status.ambiguous`
- `price_status.needs_research`
Live health after this pass:
- active products: `17195`
- price verified: `11414`
- price status:
- `public_price=11414`
- `no_public_price=5595`
- `needs_research=186`
- `ambiguous=0`
- image verified: `12104`
- details verified: `16913`
- fully verified: `10505`
- competitor status:
- `matched=10775`
- `no_valid_match=74`
- `ambiguous=556`
- `needs_research=5790`
Remaining price research:
- `10Gtek=126`
- `SFPcables=31`
- `ShopFiber24=24`
- `ATGBICS=3`
- `Vcelink=2`
Interpretation:
- remaining price work is now real retail/source discovery
- SFPcables search tests for 10Gtek part numbers did not return reliable direct hits
- 10Gtek requires a source/alias discovery robot rather than no-public-price classification

View File

@ -49,3 +49,5 @@
{"id":"tip-20260509-gbics-ts-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Keep active transceiver counts honest by quarantining vendor navigation, category, PDF, cable, and infrastructure artifacts."},{"role":"user","content":"What did the GBICS and T&S cleanup teach the crawler robots?"},{"role":"assistant","content":"GBICS rows such as category labels, patch leads, active optical cables, direct-attach cables, Choose By pages, cart pages, vendor pages, NVIDIA/ConnectX/Cisco/WatchGuard/HP pages, network adapters, media converters, AOC/DAC and MTP/MPO cable pages are not active transceiver SKUs. T&S rows with non-/products URLs, PDFs, generic fiber-optic-transceivers family URLs, Product-Brochure names, or broad 100G/200G/400G-Transceiver family labels are artifacts. Quarantine these outside the active verification base, clear verification flags, and write `artifact_quarantine` evidence before recalculating health."}]} {"id":"tip-20260509-gbics-ts-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Keep active transceiver counts honest by quarantining vendor navigation, category, PDF, cable, and infrastructure artifacts."},{"role":"user","content":"What did the GBICS and T&S cleanup teach the crawler robots?"},{"role":"assistant","content":"GBICS rows such as category labels, patch leads, active optical cables, direct-attach cables, Choose By pages, cart pages, vendor pages, NVIDIA/ConnectX/Cisco/WatchGuard/HP pages, network adapters, media converters, AOC/DAC and MTP/MPO cable pages are not active transceiver SKUs. T&S rows with non-/products URLs, PDFs, generic fiber-optic-transceivers family URLs, Product-Brochure names, or broad 100G/200G/400G-Transceiver family labels are artifacts. Quarantine these outside the active verification base, clear verification flags, and write `artifact_quarantine` evidence before recalculating health."}]}
{"id":"tip-20260509-product-page-asset-verifier-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design deterministic product-page verifiers that improve evidence without broad crawling or invented values."},{"role":"user","content":"How should TIP safely backfill images and details from existing vendor product URLs?"},{"role":"assistant","content":"Use a targeted product-page asset verifier. Select active rows with an existing product_page_url and missing image/details; fetch one page per row with low concurrency; extract only likely product images from og:image, twitter:image, Magento product image IDs, or filtered product image URLs; infer details only from part number, product URL, and page title, not from full body navigation. Always dry-run first to catch navigation pollution such as T&S 800G menu links or 1.25G being misread as 25G. Apply through `markImageVerified` and `markDetailsVerified` so evidence is recorded."}]} {"id":"tip-20260509-product-page-asset-verifier-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design deterministic product-page verifiers that improve evidence without broad crawling or invented values."},{"role":"user","content":"How should TIP safely backfill images and details from existing vendor product URLs?"},{"role":"assistant","content":"Use a targeted product-page asset verifier. Select active rows with an existing product_page_url and missing image/details; fetch one page per row with low concurrency; extract only likely product images from og:image, twitter:image, Magento product image IDs, or filtered product image URLs; infer details only from part number, product URL, and page title, not from full body navigation. Always dry-run first to catch navigation pollution such as T&S 800G menu links or 1.25G being misread as 25G. Apply through `markImageVerified` and `markDetailsVerified` so evidence is recorded."}]}
{"id":"tip-20260509-quote-only-tscom-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Public-price verification requires a real public price."},{"role":"user","content":"T&S Communication product pages have valid images and details but expose no real public prices, only no price or price 0.00. What should TIP do?"},{"role":"assistant","content":"Do not mark price_verified. Treat T&S as image/detail-verifiable but public-price-missing until a real price observation exists. `price: 0.00`, quote/request flows, missing amount fields, or SEO filenames containing `price` are not price evidence. Keep image_verified/details_verified true when source-backed, keep price_verified false, and document the vendor as quote-only/public-price-unavailable rather than fabricating a market price."}]} {"id":"tip-20260509-quote-only-tscom-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Public-price verification requires a real public price."},{"role":"user","content":"T&S Communication product pages have valid images and details but expose no real public prices, only no price or price 0.00. What should TIP do?"},{"role":"assistant","content":"Do not mark price_verified. Treat T&S as image/detail-verifiable but public-price-missing until a real price observation exists. `price: 0.00`, quote/request flows, missing amount fields, or SEO filenames containing `price` are not price evidence. Keep image_verified/details_verified true when source-backed, keep price_verified false, and document the vendor as quote-only/public-price-unavailable rather than fabricating a market price."}]}
{"id":"tip-20260510-part-number-details-verifier-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Use deterministic part-number inference only for conservative, auditable detail repair."},{"role":"user","content":"How should TIP close OEM detail gaps when form factor, reach and fiber are known but speed_gbps is zero?"},{"role":"assistant","content":"Use a part-number details verifier. Infer speed only from unambiguous tokens such as 800G, 400G, 100GBASE, 40GBASE, SFP10G, XFP-10GE, 4X25G, 2X50G, 10X10G, CTP-SFP-1GE, and known FC tokens. Dry-run first and exclude traps such as Cisco GLC-FE Fast Ethernet; do not treat all GLC-* as 1G. Update speed/speed_gbps, then mark details through the canonical evidence path with robot_name `verify:part-number-details`."}]}
{"id":"tip-20260510-price-status-model-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Separate real public prices from resolved no-public-price states."},{"role":"user","content":"How should TIP handle OEM, component, hyperscaler, test-equipment, and quote-only vendors with no public checkout price?"},{"role":"assistant","content":"Do not set `price_verified` unless there is a real public price observation. Add or use `price_status`: `public_price` for true price observations, `no_public_price` for source/policy-backed quote-only or OEM catalog cases, `needs_research` for real retail/source discovery work, and `ambiguous` for uncertain cases. Write `price_unavailable` evidence with the vendor, part number, reason, and source URL if available. Keep retail vendors such as 10Gtek, SFPcables, ShopFiber24, ATGBICS, Vcelink, FS.com and Flexoptix out of no-public-price policies because they should have real prices."}]}