feat: close open competitor research states

This commit is contained in:
Rene Fichtmueller 2026-05-10 00:03:42 +02:00
parent fb9db56617
commit 635a102932
11 changed files with 456 additions and 5 deletions

View File

@ -13,11 +13,15 @@
"scrape:atgbics:details": "tsx src/scrapers/atgbics-detail-pages.ts", "scrape:atgbics:details": "tsx src/scrapers/atgbics-detail-pages.ts",
"scrape:vendors:details": "tsx src/scrapers/shopfiber24-fibermall-detail-pages.ts", "scrape:vendors:details": "tsx src/scrapers/shopfiber24-fibermall-detail-pages.ts",
"scrape:gaotek:details": "tsx src/scrapers/gaotek-detail-pages.ts", "scrape:gaotek:details": "tsx src/scrapers/gaotek-detail-pages.ts",
"scrape:gbics": "tsx src/scrapers/gbics.ts",
"scrape:tscom": "tsx src/scrapers/tscom.ts",
"scrape:sfpcables": "tsx src/scrapers/sfpcables.ts",
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts", "verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts", "verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts", "verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts", "verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
"verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts", "verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts",
"verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts",
"scrape:cisco": "tsx src/scrapers/cisco-tmg.ts", "scrape:cisco": "tsx src/scrapers/cisco-tmg.ts",
"scrape:optcore": "tsx src/scrapers/optcore.ts", "scrape:optcore": "tsx src/scrapers/optcore.ts",
"scrape:news": "tsx src/scrapers/news.ts", "scrape:news": "tsx src/scrapers/news.ts",

View File

@ -2882,7 +2882,7 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
competitor_status_updated_at = NOW() competitor_status_updated_at = NOW()
WHERE id = $1 WHERE id = $1
AND competitor_verified = false AND competitor_verified = false
AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match') AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match', 'ambiguous')
`, [fx.id]); `, [fx.id]);
} }
} }

View File

@ -255,6 +255,7 @@ export async function scrapeGbics(): Promise<void> {
try { try {
const txId = await findOrCreateScrapedTransceiver({ const txId = await findOrCreateScrapedTransceiver({
partNumber: product.partNumber, vendorId, partNumber: product.partNumber, vendorId,
productUrl: product.url,
formFactor: product.formFactor, speedGbps: product.speedGbps, formFactor: product.formFactor, speedGbps: product.speedGbps,
speed: product.speed, reachMeters: product.reachMeters, speed: product.speed, reachMeters: product.reachMeters,
reachLabel: product.reachLabel, fiberType: product.fiberType, reachLabel: product.reachLabel, fiberType: product.fiberType,

View File

@ -221,6 +221,7 @@ export async function scrapeTsCom(): Promise<void> {
const txId = await findOrCreateScrapedTransceiver({ const txId = await findOrCreateScrapedTransceiver({
partNumber: product.partNumber, partNumber: product.partNumber,
vendorId, vendorId,
productUrl: product.url,
formFactor: product.formFactor, formFactor: product.formFactor,
speedGbps: product.speedGbps, speedGbps: product.speedGbps,
speed: product.speed, speed: product.speed,

View File

@ -21,7 +21,7 @@ export const db = pool;
export async function recordVerificationEvidence(params: { export async function recordVerificationEvidence(params: {
transceiverId: string; transceiverId: string;
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "artifact_quarantine"; verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine";
sourceUrl?: string; sourceUrl?: string;
sourceVendorId?: string; sourceVendorId?: string;
evidenceValue?: Record<string, unknown>; evidenceValue?: Record<string, unknown>;

View File

@ -0,0 +1,247 @@
/**
* Open competitor status resolver.
*
* Closes fully populated products that are still stuck in needs_research:
* - no strict candidates and no active equivalence hints => no_valid_match
* - one or more strict candidates or active equivalence hints => ambiguous
*
* This does not invent matches. It turns endless queues into explicit,
* auditable research states.
*
* Default is dry-run. Set OPEN_COMPETITOR_APPLY=1 to write status/evidence.
*/
import { pool, checkAndSetFullyVerified, recordVerificationEvidence } from "./db";
import { logger } from "./logger";
const EXCLUDED_CATEGORIES = [
"NonTransceiver",
"Accessory",
"Adapter / Converter",
"Switch / Media Converter",
"Switch / Network Infrastructure",
"NIC / Adapter",
"Mux / Passive Optical",
"Product Family",
"Loopback / Test Module",
];
type OpenCompetitorRow = {
id: string;
part_number: string;
vendor_name: string;
form_factor: string;
speed_gbps: string;
reach_meters: string | null;
fiber_type: string | null;
strict_candidate_count: string;
equivalence_hint_count: string;
};
function applyMode(): boolean {
return process.env.OPEN_COMPETITOR_APPLY === "1";
}
async function resolveOpenCompetitorStatus(): Promise<void> {
const limit = Math.max(1, parseInt(process.env.OPEN_COMPETITOR_LIMIT || "1000", 10));
const vendorFilter = process.env.OPEN_COMPETITOR_VENDOR || "";
const apply = applyMode();
logger.info("=== Open competitor status resolver ===", { limit, vendorFilter, apply });
const result = await pool.query<OpenCompetitorRow>(`
WITH active AS (
SELECT t.*, v.name AS vendor_name
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id
WHERE COALESCE(t.category, '') <> ALL($1::text[])
),
ready AS (
SELECT a.*
FROM active a
WHERE a.price_verified = true
AND a.image_verified = true
AND a.details_verified = true
AND COALESCE(a.competitor_verified, false) = false
AND COALESCE(a.competitor_status, 'needs_research') IN ('unknown', 'needs_research')
AND ($2::text = '' OR a.vendor_name ILIKE ('%' || $2::text || '%'))
AND a.form_factor IS NOT NULL
AND a.form_factor != ''
AND a.speed_gbps IS NOT NULL
AND a.speed_gbps > 0
AND a.fiber_type IS NOT NULL
AND a.fiber_type != ''
),
scored AS (
SELECT
r.id,
r.part_number,
r.vendor_name,
r.form_factor,
r.speed_gbps,
r.reach_meters,
r.fiber_type,
(
SELECT COUNT(*)
FROM active c
WHERE c.id != r.id
AND c.vendor_id != r.vendor_id
AND c.price_verified = true
AND c.image_verified = true
AND c.details_verified = true
AND c.form_factor = r.form_factor
AND c.speed_gbps = r.speed_gbps
AND UPPER(COALESCE(c.fiber_type, '')) = UPPER(COALESCE(r.fiber_type, ''))
AND (
(c.reach_meters IS NOT NULL AND c.reach_meters > 0
AND r.reach_meters IS NOT NULL AND r.reach_meters > 0
AND ABS(c.reach_meters - r.reach_meters) <= GREATEST(25, r.reach_meters * 0.05))
OR
(COALESCE(c.reach_meters, 0) = 0 AND COALESCE(r.reach_meters, 0) = 0)
)
AND (
COALESCE(NULLIF(regexp_replace(COALESCE(c.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(c.wavelengths, '')), '') = ''
OR COALESCE(NULLIF(regexp_replace(COALESCE(r.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(r.wavelengths, '')), '') = ''
OR ABS(
NULLIF(regexp_replace(COALESCE(c.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(c.wavelengths, ''))::int
- NULLIF(regexp_replace(COALESCE(r.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(r.wavelengths, ''))::int
) <= 15
)
) AS strict_candidate_count,
(
SELECT COUNT(*)
FROM transceiver_equivalences eq
WHERE eq.flexoptix_id = r.id
AND eq.status IN ('pending', 'approved', 'auto_approved')
AND eq.confidence >= 0.50
) AS equivalence_hint_count
FROM ready r
)
SELECT *
FROM scored
ORDER BY vendor_name, part_number
LIMIT $3
`, [EXCLUDED_CATEGORIES, vendorFilter, limit]);
logger.info("Open competitor status candidates", { count: result.rowCount ?? 0 });
if (!apply) {
for (const row of result.rows.slice(0, 60)) {
const strictCandidates = Number(row.strict_candidate_count);
const equivalenceHints = Number(row.equivalence_hint_count);
logger.info("dry-run open status", {
vendor: row.vendor_name,
partNumber: row.part_number,
outcome: strictCandidates === 0 && equivalenceHints === 0 ? "no_valid_match" : "ambiguous",
strictCandidates,
equivalenceHints,
});
}
return;
}
let noValidMatch = 0;
let ambiguous = 0;
let fullyVerifiedEarned = 0;
for (const row of result.rows) {
const strictCandidates = Number(row.strict_candidate_count);
const equivalenceHints = Number(row.equivalence_hint_count);
const shouldNoMatch = strictCandidates === 0 && equivalenceHints === 0;
if (shouldNoMatch) {
const reason = [
"Open competitor status resolver found no strict source-backed competitor candidate",
`same form_factor=${row.form_factor}`,
`speed_gbps=${row.speed_gbps}`,
`fiber_type=${row.fiber_type}`,
`reach=${row.reach_meters ?? "unknown"}`,
"and no active equivalence hint above confidence 0.50",
].join("; ");
const update = await pool.query(`
UPDATE transceivers
SET competitor_verified = true,
competitor_verified_at = NOW(),
competitor_status = 'no_valid_match',
competitor_status_updated_at = NOW(),
no_match_verified_at = NOW(),
no_match_reason = $2,
updated_at = NOW()
WHERE id = $1
AND COALESCE(competitor_verified, false) = false
RETURNING id
`, [row.id, reason]);
if ((update.rowCount ?? 0) === 0) continue;
await recordVerificationEvidence({
transceiverId: row.id,
verificationType: "competitor_no_match",
evidenceValue: {
reason,
vendor: row.vendor_name,
partNumber: row.part_number,
strictCandidateCount: strictCandidates,
equivalenceHintCount: equivalenceHints,
},
robotName: "verify:open-competitor-status",
confidence: 1,
});
if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++;
noValidMatch++;
continue;
}
const reason = [
"Open competitor status resolver found plausible candidates but no safe deterministic 1:1 match",
`strict_candidate_count=${strictCandidates}`,
`equivalence_hint_count=${equivalenceHints}`,
].join("; ");
const update = await pool.query(`
UPDATE transceivers
SET competitor_status = 'ambiguous',
competitor_status_updated_at = NOW(),
updated_at = NOW()
WHERE id = $1
AND COALESCE(competitor_verified, false) = false
AND COALESCE(competitor_status, 'needs_research') IN ('unknown', 'needs_research')
RETURNING id
`, [row.id]);
if ((update.rowCount ?? 0) === 0) continue;
await recordVerificationEvidence({
transceiverId: row.id,
verificationType: "competitor_ambiguous",
evidenceValue: {
reason,
vendor: row.vendor_name,
partNumber: row.part_number,
strictCandidateCount: strictCandidates,
equivalenceHintCount: equivalenceHints,
},
robotName: "verify:open-competitor-status",
confidence: 1,
});
ambiguous++;
}
logger.info("Open competitor status resolver complete", {
no_valid_match: noValidMatch,
ambiguous,
fully_verified_earned: fullyVerifiedEarned,
});
}
if (require.main === module) {
resolveOpenCompetitorStatus()
.then(() => pool.end())
.catch((err) => {
logger.error("Open competitor status resolver failed", { error: (err as Error).message });
pool.end();
process.exit(1);
});
}

View File

@ -7,13 +7,14 @@
* *
* This deliberately does not verify price/image/competitor signals. * This deliberately does not verify price/image/competitor signals.
*/ */
import { pool, checkAndSetFullyVerified } from "./db"; import { pool, checkAndSetFullyVerified, recordVerificationEvidence } from "./db";
import { logger } from "./logger"; import { logger } from "./logger";
const DEFAULT_LIMIT = 5000; const DEFAULT_LIMIT = 5000;
interface Candidate { interface Candidate {
id: string; id: string;
details_source_url: string | null;
} }
async function verifyCatalogDetails(limit: number): Promise<void> { async function verifyCatalogDetails(limit: number): Promise<void> {
@ -62,11 +63,22 @@ async function verifyCatalogDetails(limit: number): Promise<void> {
updated_at = NOW() updated_at = NOW()
FROM candidate FROM candidate
WHERE t.id = candidate.id WHERE t.id = candidate.id
RETURNING t.id RETURNING t.id, t.details_source_url
`, [limit]); `, [limit]);
let fullyVerifiedEarned = 0; let fullyVerifiedEarned = 0;
for (const row of candidates.rows) { for (const row of candidates.rows) {
await recordVerificationEvidence({
transceiverId: row.id,
verificationType: "details",
sourceUrl: row.details_source_url || undefined,
evidenceValue: {
source: "catalog-derived normalized technical specs",
detailsSourceUrl: row.details_source_url,
},
robotName: "verify:catalog:details",
confidence: 1,
});
if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++; if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++;
} }

View File

@ -0,0 +1,24 @@
-- Migration 104: add competitor_ambiguous evidence type
--
-- Ambiguous competitor status is a deliberate research outcome. It means TIP
-- found one or more plausible candidates, but cannot prove a safe 1:1 match.
-- Store this as evidence instead of leaving products in an endless queue.
ALTER TABLE transceiver_verification_evidence
DROP CONSTRAINT IF EXISTS transceiver_verification_evidence_verification_type_check;
ALTER TABLE transceiver_verification_evidence
ADD CONSTRAINT transceiver_verification_evidence_verification_type_check
CHECK (
verification_type::text = ANY (
ARRAY[
'price'::varchar,
'image'::varchar,
'details'::varchar,
'competitor_match'::varchar,
'competitor_no_match'::varchar,
'competitor_ambiguous'::varchar,
'artifact_quarantine'::varchar
]::text[]
)
);

View File

@ -1,9 +1,75 @@
# Current TIP Sync State # Current TIP Sync State
Updated: 2026-05-09 21:33 UTC Updated: 2026-05-09 22:01 UTC
## Newest Work ## Newest Work
- TIP open competitor status closure on 2026-05-09:
- added migration `sql/104-verification-evidence-ambiguous.sql`
- extends `transceiver_verification_evidence.verification_type` with `competitor_ambiguous`
- added `packages/scraper/src/utils/resolve-open-competitor-status.ts`
- script: `pnpm -C packages/scraper run verify:open-competitor-status`
- default is dry-run
- apply requires `OPEN_COMPETITOR_APPLY=1`
- live Erik run:
- dry-run found `365` fully populated products still stuck in `needs_research`
- apply result:
- `364` set to `ambiguous`
- `1` set to `no_valid_match`
- `1` additional product earned `fully_verified`
- evidence:
- `364` `competitor_ambiguous` records from `verify:open-competitor-status`
- `1` `competitor_no_match` record from `verify:open-competitor-status`
- current fully populated competitor queue:
- products with price+image+details and `competitor_status='needs_research'`: `0`
- scheduler guard:
- updated `maintenance:find-equivalences` so future matcher runs do not reset deliberate `ambiguous` rows back to `needs_research`
- rebuilt and restarted `tip-scraper-daemon` after confirming no active pg-boss jobs
- live health after closure:
- active products: `17305`
- price verified: `11414`
- image verified: `12016`
- details verified: `16705`
- fully verified: `10449`
- competitor status:
- `matched=10775`
- `no_valid_match=74`
- `ambiguous=556`
- `needs_research=5900`
- remaining `needs_research` rows are no longer fully populated competitor-ready products; they are product-data gaps first
- TIP product-data gap probing on 2026-05-09:
- hardened `verify:catalog:details` to write `details` evidence
- run result: `113` catalog-derived rows updated, `0` additional fully verified
- active Health did not move, indicating those updates were outside the active dashboard base or not counted by the current active filters
- GAO Tek detail verifier:
- checked remaining `64` GAO product URLs
- result: `0` updated, `64` skipped, `0` errors
- interpretation: remaining GAO rows lack deterministic public detail evidence; no fake details added
- GBICS:
- added package script `scrape:gbics`
- patched scraper to pass product URLs into `findOrCreateScrapedTransceiver`
- live run found `758` products, `0` prices
- active GBICS gap remains `64 price / 64 image / 64 details` of `135`
- interpretation: GBICS has product discovery, but active old rows and scraped product identifiers do not line up cleanly; needs alias/dedupe hardening plus price selector repair
- T&S Communication:
- added package script `scrape:tscom`
- patched scraper to pass product URLs into `findOrCreateScrapedTransceiver`
- live run found `109` unique products, `0` prices
- active T&S gap remains `82 price / 82 image / 49 details` of `82`
- interpretation: product discovery works, but price selector and existing-row matching need hardening
- 10Gtek / SFPcables:
- live run found `110` products and wrote `6` prices
- active 10Gtek gap remains `126 price / 131 image / 25 details` of `175`
- interpretation: parser works partially, but many active 10Gtek rows are unmatched aliases or lack source pages
- current largest active product-data gaps:
- `Juniper Networks`: `283 price / 394 image / 173 details` of `534`
- `Cisco Systems`: `151 price / 351 image / 146 details` of `351`
- `GAO Tek`: `456 price / 23 image / 87 details` of `458`
- `GBICS`: `64 price / 64 image / 64 details` of `135`
- `T&S Communication`: `82 price / 82 image / 49 details` of `82`
- `10Gtek`: `126 price / 131 image / 25 details` of `175`
- TIP FS.com SKU alias cleanup on 2026-05-09: - TIP FS.com SKU alias cleanup on 2026-05-09:
- added `packages/scraper/src/utils/quarantine-fs-sku-aliases.ts` - added `packages/scraper/src/utils/quarantine-fs-sku-aliases.ts`
- script: `pnpm -C packages/scraper run verify:fs:sku-aliases` - script: `pnpm -C packages/scraper run verify:fs:sku-aliases`

View File

@ -0,0 +1,93 @@
# 2026-05-09 — TIP Open Competitor Closure + Product Gap Probing
## Competitor Status Closure
Added `competitor_ambiguous` as an evidence type via `sql/104-verification-evidence-ambiguous.sql`.
Added `packages/scraper/src/utils/resolve-open-competitor-status.ts`.
Runbook:
```bash
pnpm -C packages/scraper run verify:open-competitor-status
OPEN_COMPETITOR_APPLY=1 pnpm -C packages/scraper run verify:open-competitor-status
```
Live result:
- dry-run found `365` fully populated products still in `needs_research`
- apply wrote:
- `364` `ambiguous`
- `1` `no_valid_match`
- `1` newly `fully_verified`
- evidence ledger:
- `364` `competitor_ambiguous`
- `1` `competitor_no_match`
- fully populated products still in competitor `needs_research`: `0`
Scheduler guard:
- `maintenance:find-equivalences` now preserves deliberate `ambiguous` states instead of resetting them to `needs_research`
- built on Erik successfully
- restarted `tip-scraper-daemon` only after the pg-boss active queue was empty
Live health after closure:
- active products: `17305`
- price verified: `11414`
- image verified: `12016`
- details verified: `16705`
- fully verified: `10449`
- competitor status:
- `matched=10775`
- `no_valid_match=74`
- `ambiguous=556`
- `needs_research=5900`
Interpretation:
- `needs_research` no longer contains fully populated products ready for competitor-only resolution.
- Remaining `needs_research` rows are product-data gaps first.
## Product Gap Probing
### Catalog Details
- hardened `verify:catalog:details` to write detail evidence
- live result: `113` catalog-derived rows updated, `0` fully verified earned
- active Health did not move, so those rows are not currently improving the active dashboard base
### GAO Tek
- ran `scrape:gaotek:details`
- checked remaining `64` rows
- result: `0` updated, `64` skipped, `0` errors
- interpretation: remaining GAO pages lack deterministic public detail evidence
### GBICS
- added `scrape:gbics`
- patched `gbics.ts` to pass product URLs into DB upsert/detail verification
- live run found `758` products and `0` prices
- active gap remains `64 price / 64 image / 64 details` of `135`
- interpretation: discovery works, but active rows and scraped identifiers need alias/dedupe hardening; price selector also needs repair
### T&S Communication
- added `scrape:tscom`
- patched `tscom.ts` to pass product URLs into DB upsert/detail verification
- live run found `109` unique products and `0` prices
- active gap remains `82 price / 82 image / 49 details` of `82`
- interpretation: discovery works, but row matching and price selectors need hardening
### 10Gtek / SFPcables
- live run found `110` products and wrote `6` prices
- active 10Gtek gap remains `126 price / 131 image / 25 details` of `175`
- interpretation: parser works partially; remaining rows need alias/source-page repair
## Next Real Work
- Build alias/dedupe resolvers for GBICS, T&S, and 10Gtek similar to FS.com numeric SKU alias cleanup.
- Repair GBICS/T&S price selectors against current page HTML.
- For OEM vendors such as Cisco/Juniper, add explicit non-public-price states instead of pretending OEM list prices exist.

View File

@ -42,3 +42,6 @@
{"id":"tip-20260509-no-valid-competitor-resolver-002","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Operate TIP crawlers safely on Erik and make every robot run reversible until evidence is checked."},{"role":"user","content":"What is the safe runbook for the no-valid-competitor resolver?"},{"role":"assistant","content":"Run dry-run first with a narrow vendor scope, for example `NO_VALID_MATCH_VENDOR=Flexoptix pnpm -C packages/scraper run verify:no-valid-competitor`. Inspect candidates for normalization mistakes, especially speed, reach, form factor, fiber, wavelength, cable/AOC/breakout artifacts, and category pages. Only apply with `NO_VALID_MATCH_APPLY=1` after the dry-run looks clean. Do not restart scraper daemons while pg-boss jobs are active. After apply, check `/api/health`, evidence counts, and protected review stats, then document the result in `sync/`."}]} {"id":"tip-20260509-no-valid-competitor-resolver-002","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Operate TIP crawlers safely on Erik and make every robot run reversible until evidence is checked."},{"role":"user","content":"What is the safe runbook for the no-valid-competitor resolver?"},{"role":"assistant","content":"Run dry-run first with a narrow vendor scope, for example `NO_VALID_MATCH_VENDOR=Flexoptix pnpm -C packages/scraper run verify:no-valid-competitor`. Inspect candidates for normalization mistakes, especially speed, reach, form factor, fiber, wavelength, cable/AOC/breakout artifacts, and category pages. Only apply with `NO_VALID_MATCH_APPLY=1` after the dry-run looks clean. Do not restart scraper daemons while pg-boss jobs are active. After apply, check `/api/health`, evidence counts, and protected review stats, then document the result in `sync/`."}]}
{"id":"tip-20260509-ambiguous-equivalence-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Ambiguity is a valid safety outcome, not a failure."},{"role":"user","content":"What should TIP do when the matcher finds possible competitor candidates but cannot prove a safe 1:1 equivalence?"},{"role":"assistant","content":"Set `competitor_status=ambiguous` and keep the product out of fully_verified. Do not choose the cheapest or visually similar candidate. Ambiguous means there is enough evidence to reject `no_valid_match`, but not enough evidence to approve `matched`. Typical causes include breakout vs non-breakout, AOC/DAC vs optical module, reach mismatch, MMF vs SMF mismatch, wavelength mismatch, connector/channel mismatch, or too many candidate products in the same broad class. Ambiguous rows need targeted vendor research or stricter extractor normalization."}]} {"id":"tip-20260509-ambiguous-equivalence-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Ambiguity is a valid safety outcome, not a failure."},{"role":"user","content":"What should TIP do when the matcher finds possible competitor candidates but cannot prove a safe 1:1 equivalence?"},{"role":"assistant","content":"Set `competitor_status=ambiguous` and keep the product out of fully_verified. Do not choose the cheapest or visually similar candidate. Ambiguous means there is enough evidence to reject `no_valid_match`, but not enough evidence to approve `matched`. Typical causes include breakout vs non-breakout, AOC/DAC vs optical module, reach mismatch, MMF vs SMF mismatch, wavelength mismatch, connector/channel mismatch, or too many candidate products in the same broad class. Ambiguous rows need targeted vendor research or stricter extractor normalization."}]}
{"id":"tip-20260509-fs-sku-alias-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Distinguish vendor marketplace SKUs from real optical product part numbers."},{"role":"user","content":"How should TIP handle FS.com rows like FS-380881 when the same product URL also has OSFP-DR8-1.6T-FL?"},{"role":"assistant","content":"Treat the numeric `FS-#####` row as a marketplace SKU alias, not as an independent transceiver, when the same normalized FS.com product URL already has a canonical non-numeric product P/N row with price, image, and details verified. Quarantine the numeric alias from the active transceiver base, clear verification flags, and write `artifact_quarantine` evidence that points to the canonical row. Keep the real product P/N active. For the 1.6T FS.com case, preserve both `OSFP-DR8-1.6T-FL` (500m) and `OSFP-2FR4-1.6T-FL` (2km), and do not classify `FS-380881` or `FS-380883` as separate products or no-valid-match products."}]} {"id":"tip-20260509-fs-sku-alias-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Distinguish vendor marketplace SKUs from real optical product part numbers."},{"role":"user","content":"How should TIP handle FS.com rows like FS-380881 when the same product URL also has OSFP-DR8-1.6T-FL?"},{"role":"assistant","content":"Treat the numeric `FS-#####` row as a marketplace SKU alias, not as an independent transceiver, when the same normalized FS.com product URL already has a canonical non-numeric product P/N row with price, image, and details verified. Quarantine the numeric alias from the active transceiver base, clear verification flags, and write `artifact_quarantine` evidence that points to the canonical row. Keep the real product P/N active. For the 1.6T FS.com case, preserve both `OSFP-DR8-1.6T-FL` (500m) and `OSFP-2FR4-1.6T-FL` (2km), and do not classify `FS-380881` or `FS-380883` as separate products or no-valid-match products."}]}
{"id":"tip-20260509-open-competitor-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Close research queues with explicit evidence-backed states, not fake green checks."},{"role":"user","content":"What should TIP do with products that have price, image, and details verified but still sit in competitor needs_research?"},{"role":"assistant","content":"Run an open competitor status resolver. If no strict source-backed candidate and no active equivalence hint exists, set `competitor_status=no_valid_match`, set competitor_verified true, write `competitor_no_match` evidence, and recalculate fully_verified. If candidates or equivalence hints exist but no safe deterministic 1:1 match can be proven, set `competitor_status=ambiguous` and write `competitor_ambiguous` evidence. Do not leave fully populated rows in endless `needs_research`, and do not auto-match when the candidate set is broad or technically risky."}]}
{"id":"tip-20260509-store-scraper-gap-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Treat scraper runs that find products but do not improve active verification as diagnostic evidence."},{"role":"user","content":"GBICS and T&S scrapers found many products but active TIP verification counts did not improve. What does that mean?"},{"role":"assistant","content":"It means the crawler is not simply dead; the remaining problem is likely row identity alignment, alias/dedupe, or price/image selector drift. For GBICS, discovery found 758 products and zero prices, while active gaps stayed at 64 price/image/details rows. For T&S, discovery found 109 unique products and zero prices, while active gaps stayed at 82 price/image and 49 details rows. Next steps are to repair price selectors and build alias/dedupe resolvers that map scraped product names/URLs to existing active rows before claiming verification progress."}]}
{"id":"tip-20260509-quote-only-oem-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Never invent prices for OEM or quote-only sources."},{"role":"user","content":"Large OEM vendors such as Cisco and Juniper have many rows without public prices. Should TIP mark those prices verified?"},{"role":"assistant","content":"No. OEM catalog rows can be detail-verified when public documentation provides technical specs, but price_verified requires a real public price observation or a deliberately modeled non-public-price state. Do not scrape estimates, reseller guesses, or competitor prices into OEM price_verified. If the source is quote-only or no public price exists, record that as a separate state and keep price_verified false until the data model supports it."}]}