diff --git a/packages/scraper/package.json b/packages/scraper/package.json index 709d0fd..606f386 100644 --- a/packages/scraper/package.json +++ b/packages/scraper/package.json @@ -13,11 +13,15 @@ "scrape:atgbics:details": "tsx src/scrapers/atgbics-detail-pages.ts", "scrape:vendors:details": "tsx src/scrapers/shopfiber24-fibermall-detail-pages.ts", "scrape:gaotek:details": "tsx src/scrapers/gaotek-detail-pages.ts", + "scrape:gbics": "tsx src/scrapers/gbics.ts", + "scrape:tscom": "tsx src/scrapers/tscom.ts", + "scrape:sfpcables": "tsx src/scrapers/sfpcables.ts", "verify:catalog:details": "tsx src/utils/verify-catalog-details.ts", "verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts", "verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts", "verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts", "verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts", + "verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts", "scrape:cisco": "tsx src/scrapers/cisco-tmg.ts", "scrape:optcore": "tsx src/scrapers/optcore.ts", "scrape:news": "tsx src/scrapers/news.ts", diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 5f5ffd2..9b37d92 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -2882,7 +2882,7 @@ export async function registerWorkers(boss: PgBoss): Promise { competitor_status_updated_at = NOW() WHERE id = $1 AND competitor_verified = false - AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match') + AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match', 'ambiguous') `, [fx.id]); } } diff --git a/packages/scraper/src/scrapers/gbics.ts b/packages/scraper/src/scrapers/gbics.ts index 472ca4f..8edc794 100644 --- a/packages/scraper/src/scrapers/gbics.ts +++ b/packages/scraper/src/scrapers/gbics.ts @@ -255,6 +255,7 @@ export async function scrapeGbics(): Promise { try { const txId = await findOrCreateScrapedTransceiver({ partNumber: product.partNumber, vendorId, + productUrl: product.url, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, reachMeters: product.reachMeters, reachLabel: product.reachLabel, fiberType: product.fiberType, diff --git a/packages/scraper/src/scrapers/tscom.ts b/packages/scraper/src/scrapers/tscom.ts index 7301328..8ad3315 100644 --- a/packages/scraper/src/scrapers/tscom.ts +++ b/packages/scraper/src/scrapers/tscom.ts @@ -221,6 +221,7 @@ export async function scrapeTsCom(): Promise { const txId = await findOrCreateScrapedTransceiver({ partNumber: product.partNumber, vendorId, + productUrl: product.url, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, diff --git a/packages/scraper/src/utils/db.ts b/packages/scraper/src/utils/db.ts index 88a5971..f5b16ef 100644 --- a/packages/scraper/src/utils/db.ts +++ b/packages/scraper/src/utils/db.ts @@ -21,7 +21,7 @@ export const db = pool; export async function recordVerificationEvidence(params: { transceiverId: string; - verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "artifact_quarantine"; + verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine"; sourceUrl?: string; sourceVendorId?: string; evidenceValue?: Record; diff --git a/packages/scraper/src/utils/resolve-open-competitor-status.ts b/packages/scraper/src/utils/resolve-open-competitor-status.ts new file mode 100644 index 0000000..f35fae2 --- /dev/null +++ b/packages/scraper/src/utils/resolve-open-competitor-status.ts @@ -0,0 +1,247 @@ +/** + * Open competitor status resolver. + * + * Closes fully populated products that are still stuck in needs_research: + * - no strict candidates and no active equivalence hints => no_valid_match + * - one or more strict candidates or active equivalence hints => ambiguous + * + * This does not invent matches. It turns endless queues into explicit, + * auditable research states. + * + * Default is dry-run. Set OPEN_COMPETITOR_APPLY=1 to write status/evidence. + */ +import { pool, checkAndSetFullyVerified, recordVerificationEvidence } from "./db"; +import { logger } from "./logger"; + +const EXCLUDED_CATEGORIES = [ + "NonTransceiver", + "Accessory", + "Adapter / Converter", + "Switch / Media Converter", + "Switch / Network Infrastructure", + "NIC / Adapter", + "Mux / Passive Optical", + "Product Family", + "Loopback / Test Module", +]; + +type OpenCompetitorRow = { + id: string; + part_number: string; + vendor_name: string; + form_factor: string; + speed_gbps: string; + reach_meters: string | null; + fiber_type: string | null; + strict_candidate_count: string; + equivalence_hint_count: string; +}; + +function applyMode(): boolean { + return process.env.OPEN_COMPETITOR_APPLY === "1"; +} + +async function resolveOpenCompetitorStatus(): Promise { + const limit = Math.max(1, parseInt(process.env.OPEN_COMPETITOR_LIMIT || "1000", 10)); + const vendorFilter = process.env.OPEN_COMPETITOR_VENDOR || ""; + const apply = applyMode(); + + logger.info("=== Open competitor status resolver ===", { limit, vendorFilter, apply }); + + const result = await pool.query(` + WITH active AS ( + SELECT t.*, v.name AS vendor_name + FROM transceivers t + JOIN vendors v ON v.id = t.vendor_id + WHERE COALESCE(t.category, '') <> ALL($1::text[]) + ), + ready AS ( + SELECT a.* + FROM active a + WHERE a.price_verified = true + AND a.image_verified = true + AND a.details_verified = true + AND COALESCE(a.competitor_verified, false) = false + AND COALESCE(a.competitor_status, 'needs_research') IN ('unknown', 'needs_research') + AND ($2::text = '' OR a.vendor_name ILIKE ('%' || $2::text || '%')) + AND a.form_factor IS NOT NULL + AND a.form_factor != '' + AND a.speed_gbps IS NOT NULL + AND a.speed_gbps > 0 + AND a.fiber_type IS NOT NULL + AND a.fiber_type != '' + ), + scored AS ( + SELECT + r.id, + r.part_number, + r.vendor_name, + r.form_factor, + r.speed_gbps, + r.reach_meters, + r.fiber_type, + ( + SELECT COUNT(*) + FROM active c + WHERE c.id != r.id + AND c.vendor_id != r.vendor_id + AND c.price_verified = true + AND c.image_verified = true + AND c.details_verified = true + AND c.form_factor = r.form_factor + AND c.speed_gbps = r.speed_gbps + AND UPPER(COALESCE(c.fiber_type, '')) = UPPER(COALESCE(r.fiber_type, '')) + AND ( + (c.reach_meters IS NOT NULL AND c.reach_meters > 0 + AND r.reach_meters IS NOT NULL AND r.reach_meters > 0 + AND ABS(c.reach_meters - r.reach_meters) <= GREATEST(25, r.reach_meters * 0.05)) + OR + (COALESCE(c.reach_meters, 0) = 0 AND COALESCE(r.reach_meters, 0) = 0) + ) + AND ( + COALESCE(NULLIF(regexp_replace(COALESCE(c.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(c.wavelengths, '')), '') = '' + OR COALESCE(NULLIF(regexp_replace(COALESCE(r.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(r.wavelengths, '')), '') = '' + OR ABS( + NULLIF(regexp_replace(COALESCE(c.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(c.wavelengths, ''))::int + - NULLIF(regexp_replace(COALESCE(r.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(r.wavelengths, ''))::int + ) <= 15 + ) + ) AS strict_candidate_count, + ( + SELECT COUNT(*) + FROM transceiver_equivalences eq + WHERE eq.flexoptix_id = r.id + AND eq.status IN ('pending', 'approved', 'auto_approved') + AND eq.confidence >= 0.50 + ) AS equivalence_hint_count + FROM ready r + ) + SELECT * + FROM scored + ORDER BY vendor_name, part_number + LIMIT $3 + `, [EXCLUDED_CATEGORIES, vendorFilter, limit]); + + logger.info("Open competitor status candidates", { count: result.rowCount ?? 0 }); + + if (!apply) { + for (const row of result.rows.slice(0, 60)) { + const strictCandidates = Number(row.strict_candidate_count); + const equivalenceHints = Number(row.equivalence_hint_count); + logger.info("dry-run open status", { + vendor: row.vendor_name, + partNumber: row.part_number, + outcome: strictCandidates === 0 && equivalenceHints === 0 ? "no_valid_match" : "ambiguous", + strictCandidates, + equivalenceHints, + }); + } + return; + } + + let noValidMatch = 0; + let ambiguous = 0; + let fullyVerifiedEarned = 0; + + for (const row of result.rows) { + const strictCandidates = Number(row.strict_candidate_count); + const equivalenceHints = Number(row.equivalence_hint_count); + const shouldNoMatch = strictCandidates === 0 && equivalenceHints === 0; + + if (shouldNoMatch) { + const reason = [ + "Open competitor status resolver found no strict source-backed competitor candidate", + `same form_factor=${row.form_factor}`, + `speed_gbps=${row.speed_gbps}`, + `fiber_type=${row.fiber_type}`, + `reach=${row.reach_meters ?? "unknown"}`, + "and no active equivalence hint above confidence 0.50", + ].join("; "); + + const update = await pool.query(` + UPDATE transceivers + SET competitor_verified = true, + competitor_verified_at = NOW(), + competitor_status = 'no_valid_match', + competitor_status_updated_at = NOW(), + no_match_verified_at = NOW(), + no_match_reason = $2, + updated_at = NOW() + WHERE id = $1 + AND COALESCE(competitor_verified, false) = false + RETURNING id + `, [row.id, reason]); + + if ((update.rowCount ?? 0) === 0) continue; + + await recordVerificationEvidence({ + transceiverId: row.id, + verificationType: "competitor_no_match", + evidenceValue: { + reason, + vendor: row.vendor_name, + partNumber: row.part_number, + strictCandidateCount: strictCandidates, + equivalenceHintCount: equivalenceHints, + }, + robotName: "verify:open-competitor-status", + confidence: 1, + }); + + if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++; + noValidMatch++; + continue; + } + + const reason = [ + "Open competitor status resolver found plausible candidates but no safe deterministic 1:1 match", + `strict_candidate_count=${strictCandidates}`, + `equivalence_hint_count=${equivalenceHints}`, + ].join("; "); + + const update = await pool.query(` + UPDATE transceivers + SET competitor_status = 'ambiguous', + competitor_status_updated_at = NOW(), + updated_at = NOW() + WHERE id = $1 + AND COALESCE(competitor_verified, false) = false + AND COALESCE(competitor_status, 'needs_research') IN ('unknown', 'needs_research') + RETURNING id + `, [row.id]); + + if ((update.rowCount ?? 0) === 0) continue; + + await recordVerificationEvidence({ + transceiverId: row.id, + verificationType: "competitor_ambiguous", + evidenceValue: { + reason, + vendor: row.vendor_name, + partNumber: row.part_number, + strictCandidateCount: strictCandidates, + equivalenceHintCount: equivalenceHints, + }, + robotName: "verify:open-competitor-status", + confidence: 1, + }); + + ambiguous++; + } + + logger.info("Open competitor status resolver complete", { + no_valid_match: noValidMatch, + ambiguous, + fully_verified_earned: fullyVerifiedEarned, + }); +} + +if (require.main === module) { + resolveOpenCompetitorStatus() + .then(() => pool.end()) + .catch((err) => { + logger.error("Open competitor status resolver failed", { error: (err as Error).message }); + pool.end(); + process.exit(1); + }); +} diff --git a/packages/scraper/src/utils/verify-catalog-details.ts b/packages/scraper/src/utils/verify-catalog-details.ts index f2d7c5c..4952bac 100644 --- a/packages/scraper/src/utils/verify-catalog-details.ts +++ b/packages/scraper/src/utils/verify-catalog-details.ts @@ -7,13 +7,14 @@ * * This deliberately does not verify price/image/competitor signals. */ -import { pool, checkAndSetFullyVerified } from "./db"; +import { pool, checkAndSetFullyVerified, recordVerificationEvidence } from "./db"; import { logger } from "./logger"; const DEFAULT_LIMIT = 5000; interface Candidate { id: string; + details_source_url: string | null; } async function verifyCatalogDetails(limit: number): Promise { @@ -62,11 +63,22 @@ async function verifyCatalogDetails(limit: number): Promise { updated_at = NOW() FROM candidate WHERE t.id = candidate.id - RETURNING t.id + RETURNING t.id, t.details_source_url `, [limit]); let fullyVerifiedEarned = 0; for (const row of candidates.rows) { + await recordVerificationEvidence({ + transceiverId: row.id, + verificationType: "details", + sourceUrl: row.details_source_url || undefined, + evidenceValue: { + source: "catalog-derived normalized technical specs", + detailsSourceUrl: row.details_source_url, + }, + robotName: "verify:catalog:details", + confidence: 1, + }); if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++; } diff --git a/sql/104-verification-evidence-ambiguous.sql b/sql/104-verification-evidence-ambiguous.sql new file mode 100644 index 0000000..4b4e0e7 --- /dev/null +++ b/sql/104-verification-evidence-ambiguous.sql @@ -0,0 +1,24 @@ +-- Migration 104: add competitor_ambiguous evidence type +-- +-- Ambiguous competitor status is a deliberate research outcome. It means TIP +-- found one or more plausible candidates, but cannot prove a safe 1:1 match. +-- Store this as evidence instead of leaving products in an endless queue. + +ALTER TABLE transceiver_verification_evidence + DROP CONSTRAINT IF EXISTS transceiver_verification_evidence_verification_type_check; + +ALTER TABLE transceiver_verification_evidence + ADD CONSTRAINT transceiver_verification_evidence_verification_type_check + CHECK ( + verification_type::text = ANY ( + ARRAY[ + 'price'::varchar, + 'image'::varchar, + 'details'::varchar, + 'competitor_match'::varchar, + 'competitor_no_match'::varchar, + 'competitor_ambiguous'::varchar, + 'artifact_quarantine'::varchar + ]::text[] + ) + ); diff --git a/sync/CURRENT.md b/sync/CURRENT.md index 8133503..66e672e 100644 --- a/sync/CURRENT.md +++ b/sync/CURRENT.md @@ -1,9 +1,75 @@ # Current TIP Sync State -Updated: 2026-05-09 21:33 UTC +Updated: 2026-05-09 22:01 UTC ## Newest Work +- TIP open competitor status closure on 2026-05-09: + - added migration `sql/104-verification-evidence-ambiguous.sql` + - extends `transceiver_verification_evidence.verification_type` with `competitor_ambiguous` + - added `packages/scraper/src/utils/resolve-open-competitor-status.ts` + - script: `pnpm -C packages/scraper run verify:open-competitor-status` + - default is dry-run + - apply requires `OPEN_COMPETITOR_APPLY=1` + - live Erik run: + - dry-run found `365` fully populated products still stuck in `needs_research` + - apply result: + - `364` set to `ambiguous` + - `1` set to `no_valid_match` + - `1` additional product earned `fully_verified` + - evidence: + - `364` `competitor_ambiguous` records from `verify:open-competitor-status` + - `1` `competitor_no_match` record from `verify:open-competitor-status` + - current fully populated competitor queue: + - products with price+image+details and `competitor_status='needs_research'`: `0` + - scheduler guard: + - updated `maintenance:find-equivalences` so future matcher runs do not reset deliberate `ambiguous` rows back to `needs_research` + - rebuilt and restarted `tip-scraper-daemon` after confirming no active pg-boss jobs + - live health after closure: + - active products: `17305` + - price verified: `11414` + - image verified: `12016` + - details verified: `16705` + - fully verified: `10449` + - competitor status: + - `matched=10775` + - `no_valid_match=74` + - `ambiguous=556` + - `needs_research=5900` + - remaining `needs_research` rows are no longer fully populated competitor-ready products; they are product-data gaps first + +- TIP product-data gap probing on 2026-05-09: + - hardened `verify:catalog:details` to write `details` evidence + - run result: `113` catalog-derived rows updated, `0` additional fully verified + - active Health did not move, indicating those updates were outside the active dashboard base or not counted by the current active filters + - GAO Tek detail verifier: + - checked remaining `64` GAO product URLs + - result: `0` updated, `64` skipped, `0` errors + - interpretation: remaining GAO rows lack deterministic public detail evidence; no fake details added + - GBICS: + - added package script `scrape:gbics` + - patched scraper to pass product URLs into `findOrCreateScrapedTransceiver` + - live run found `758` products, `0` prices + - active GBICS gap remains `64 price / 64 image / 64 details` of `135` + - interpretation: GBICS has product discovery, but active old rows and scraped product identifiers do not line up cleanly; needs alias/dedupe hardening plus price selector repair + - T&S Communication: + - added package script `scrape:tscom` + - patched scraper to pass product URLs into `findOrCreateScrapedTransceiver` + - live run found `109` unique products, `0` prices + - active T&S gap remains `82 price / 82 image / 49 details` of `82` + - interpretation: product discovery works, but price selector and existing-row matching need hardening + - 10Gtek / SFPcables: + - live run found `110` products and wrote `6` prices + - active 10Gtek gap remains `126 price / 131 image / 25 details` of `175` + - interpretation: parser works partially, but many active 10Gtek rows are unmatched aliases or lack source pages + - current largest active product-data gaps: + - `Juniper Networks`: `283 price / 394 image / 173 details` of `534` + - `Cisco Systems`: `151 price / 351 image / 146 details` of `351` + - `GAO Tek`: `456 price / 23 image / 87 details` of `458` + - `GBICS`: `64 price / 64 image / 64 details` of `135` + - `T&S Communication`: `82 price / 82 image / 49 details` of `82` + - `10Gtek`: `126 price / 131 image / 25 details` of `175` + - TIP FS.com SKU alias cleanup on 2026-05-09: - added `packages/scraper/src/utils/quarantine-fs-sku-aliases.ts` - script: `pnpm -C packages/scraper run verify:fs:sku-aliases` diff --git a/sync/history/2026-05-09-tip-open-competitor-and-product-gap-probing.md b/sync/history/2026-05-09-tip-open-competitor-and-product-gap-probing.md new file mode 100644 index 0000000..b541bf2 --- /dev/null +++ b/sync/history/2026-05-09-tip-open-competitor-and-product-gap-probing.md @@ -0,0 +1,93 @@ +# 2026-05-09 — TIP Open Competitor Closure + Product Gap Probing + +## Competitor Status Closure + +Added `competitor_ambiguous` as an evidence type via `sql/104-verification-evidence-ambiguous.sql`. + +Added `packages/scraper/src/utils/resolve-open-competitor-status.ts`. + +Runbook: + +```bash +pnpm -C packages/scraper run verify:open-competitor-status +OPEN_COMPETITOR_APPLY=1 pnpm -C packages/scraper run verify:open-competitor-status +``` + +Live result: + +- dry-run found `365` fully populated products still in `needs_research` +- apply wrote: + - `364` `ambiguous` + - `1` `no_valid_match` + - `1` newly `fully_verified` +- evidence ledger: + - `364` `competitor_ambiguous` + - `1` `competitor_no_match` +- fully populated products still in competitor `needs_research`: `0` + +Scheduler guard: + +- `maintenance:find-equivalences` now preserves deliberate `ambiguous` states instead of resetting them to `needs_research` +- built on Erik successfully +- restarted `tip-scraper-daemon` only after the pg-boss active queue was empty + +Live health after closure: + +- active products: `17305` +- price verified: `11414` +- image verified: `12016` +- details verified: `16705` +- fully verified: `10449` +- competitor status: + - `matched=10775` + - `no_valid_match=74` + - `ambiguous=556` + - `needs_research=5900` + +Interpretation: + +- `needs_research` no longer contains fully populated products ready for competitor-only resolution. +- Remaining `needs_research` rows are product-data gaps first. + +## Product Gap Probing + +### Catalog Details + +- hardened `verify:catalog:details` to write detail evidence +- live result: `113` catalog-derived rows updated, `0` fully verified earned +- active Health did not move, so those rows are not currently improving the active dashboard base + +### GAO Tek + +- ran `scrape:gaotek:details` +- checked remaining `64` rows +- result: `0` updated, `64` skipped, `0` errors +- interpretation: remaining GAO pages lack deterministic public detail evidence + +### GBICS + +- added `scrape:gbics` +- patched `gbics.ts` to pass product URLs into DB upsert/detail verification +- live run found `758` products and `0` prices +- active gap remains `64 price / 64 image / 64 details` of `135` +- interpretation: discovery works, but active rows and scraped identifiers need alias/dedupe hardening; price selector also needs repair + +### T&S Communication + +- added `scrape:tscom` +- patched `tscom.ts` to pass product URLs into DB upsert/detail verification +- live run found `109` unique products and `0` prices +- active gap remains `82 price / 82 image / 49 details` of `82` +- interpretation: discovery works, but row matching and price selectors need hardening + +### 10Gtek / SFPcables + +- live run found `110` products and wrote `6` prices +- active 10Gtek gap remains `126 price / 131 image / 25 details` of `175` +- interpretation: parser works partially; remaining rows need alias/source-page repair + +## Next Real Work + +- Build alias/dedupe resolvers for GBICS, T&S, and 10Gtek similar to FS.com numeric SKU alias cleanup. +- Repair GBICS/T&S price selectors against current page HTML. +- For OEM vendors such as Cisco/Juniper, add explicit non-public-price states instead of pretending OEM list prices exist. diff --git a/training-data/tip-llm-capabilities-v1.jsonl b/training-data/tip-llm-capabilities-v1.jsonl index d2c1dd9..01ebf5a 100644 --- a/training-data/tip-llm-capabilities-v1.jsonl +++ b/training-data/tip-llm-capabilities-v1.jsonl @@ -42,3 +42,6 @@ {"id":"tip-20260509-no-valid-competitor-resolver-002","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Operate TIP crawlers safely on Erik and make every robot run reversible until evidence is checked."},{"role":"user","content":"What is the safe runbook for the no-valid-competitor resolver?"},{"role":"assistant","content":"Run dry-run first with a narrow vendor scope, for example `NO_VALID_MATCH_VENDOR=Flexoptix pnpm -C packages/scraper run verify:no-valid-competitor`. Inspect candidates for normalization mistakes, especially speed, reach, form factor, fiber, wavelength, cable/AOC/breakout artifacts, and category pages. Only apply with `NO_VALID_MATCH_APPLY=1` after the dry-run looks clean. Do not restart scraper daemons while pg-boss jobs are active. After apply, check `/api/health`, evidence counts, and protected review stats, then document the result in `sync/`."}]} {"id":"tip-20260509-ambiguous-equivalence-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Ambiguity is a valid safety outcome, not a failure."},{"role":"user","content":"What should TIP do when the matcher finds possible competitor candidates but cannot prove a safe 1:1 equivalence?"},{"role":"assistant","content":"Set `competitor_status=ambiguous` and keep the product out of fully_verified. Do not choose the cheapest or visually similar candidate. Ambiguous means there is enough evidence to reject `no_valid_match`, but not enough evidence to approve `matched`. Typical causes include breakout vs non-breakout, AOC/DAC vs optical module, reach mismatch, MMF vs SMF mismatch, wavelength mismatch, connector/channel mismatch, or too many candidate products in the same broad class. Ambiguous rows need targeted vendor research or stricter extractor normalization."}]} {"id":"tip-20260509-fs-sku-alias-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Distinguish vendor marketplace SKUs from real optical product part numbers."},{"role":"user","content":"How should TIP handle FS.com rows like FS-380881 when the same product URL also has OSFP-DR8-1.6T-FL?"},{"role":"assistant","content":"Treat the numeric `FS-#####` row as a marketplace SKU alias, not as an independent transceiver, when the same normalized FS.com product URL already has a canonical non-numeric product P/N row with price, image, and details verified. Quarantine the numeric alias from the active transceiver base, clear verification flags, and write `artifact_quarantine` evidence that points to the canonical row. Keep the real product P/N active. For the 1.6T FS.com case, preserve both `OSFP-DR8-1.6T-FL` (500m) and `OSFP-2FR4-1.6T-FL` (2km), and do not classify `FS-380881` or `FS-380883` as separate products or no-valid-match products."}]} +{"id":"tip-20260509-open-competitor-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Close research queues with explicit evidence-backed states, not fake green checks."},{"role":"user","content":"What should TIP do with products that have price, image, and details verified but still sit in competitor needs_research?"},{"role":"assistant","content":"Run an open competitor status resolver. If no strict source-backed candidate and no active equivalence hint exists, set `competitor_status=no_valid_match`, set competitor_verified true, write `competitor_no_match` evidence, and recalculate fully_verified. If candidates or equivalence hints exist but no safe deterministic 1:1 match can be proven, set `competitor_status=ambiguous` and write `competitor_ambiguous` evidence. Do not leave fully populated rows in endless `needs_research`, and do not auto-match when the candidate set is broad or technically risky."}]} +{"id":"tip-20260509-store-scraper-gap-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Treat scraper runs that find products but do not improve active verification as diagnostic evidence."},{"role":"user","content":"GBICS and T&S scrapers found many products but active TIP verification counts did not improve. What does that mean?"},{"role":"assistant","content":"It means the crawler is not simply dead; the remaining problem is likely row identity alignment, alias/dedupe, or price/image selector drift. For GBICS, discovery found 758 products and zero prices, while active gaps stayed at 64 price/image/details rows. For T&S, discovery found 109 unique products and zero prices, while active gaps stayed at 82 price/image and 49 details rows. Next steps are to repair price selectors and build alias/dedupe resolvers that map scraped product names/URLs to existing active rows before claiming verification progress."}]} +{"id":"tip-20260509-quote-only-oem-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Never invent prices for OEM or quote-only sources."},{"role":"user","content":"Large OEM vendors such as Cisco and Juniper have many rows without public prices. Should TIP mark those prices verified?"},{"role":"assistant","content":"No. OEM catalog rows can be detail-verified when public documentation provides technical specs, but price_verified requires a real public price observation or a deliberately modeled non-public-price state. Do not scrape estimates, reseller guesses, or competitor prices into OEM price_verified. If the source is quote-only or no public price exists, record that as a separate state and keep price_verified false until the data model supports it."}]}