feat: close open competitor research states
This commit is contained in:
parent
fb9db56617
commit
635a102932
@ -13,11 +13,15 @@
|
||||
"scrape:atgbics:details": "tsx src/scrapers/atgbics-detail-pages.ts",
|
||||
"scrape:vendors:details": "tsx src/scrapers/shopfiber24-fibermall-detail-pages.ts",
|
||||
"scrape:gaotek:details": "tsx src/scrapers/gaotek-detail-pages.ts",
|
||||
"scrape:gbics": "tsx src/scrapers/gbics.ts",
|
||||
"scrape:tscom": "tsx src/scrapers/tscom.ts",
|
||||
"scrape:sfpcables": "tsx src/scrapers/sfpcables.ts",
|
||||
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
|
||||
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
|
||||
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
|
||||
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
|
||||
"verify:no-valid-competitor": "tsx src/utils/resolve-no-valid-competitor.ts",
|
||||
"verify:open-competitor-status": "tsx src/utils/resolve-open-competitor-status.ts",
|
||||
"scrape:cisco": "tsx src/scrapers/cisco-tmg.ts",
|
||||
"scrape:optcore": "tsx src/scrapers/optcore.ts",
|
||||
"scrape:news": "tsx src/scrapers/news.ts",
|
||||
|
||||
@ -2882,7 +2882,7 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1
|
||||
AND competitor_verified = false
|
||||
AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match')
|
||||
AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match', 'ambiguous')
|
||||
`, [fx.id]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -255,6 +255,7 @@ export async function scrapeGbics(): Promise<void> {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber, vendorId,
|
||||
productUrl: product.url,
|
||||
formFactor: product.formFactor, speedGbps: product.speedGbps,
|
||||
speed: product.speed, reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel, fiberType: product.fiberType,
|
||||
|
||||
@ -221,6 +221,7 @@ export async function scrapeTsCom(): Promise<void> {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
productUrl: product.url,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
|
||||
@ -21,7 +21,7 @@ export const db = pool;
|
||||
|
||||
export async function recordVerificationEvidence(params: {
|
||||
transceiverId: string;
|
||||
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "artifact_quarantine";
|
||||
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "competitor_ambiguous" | "artifact_quarantine";
|
||||
sourceUrl?: string;
|
||||
sourceVendorId?: string;
|
||||
evidenceValue?: Record<string, unknown>;
|
||||
|
||||
247
packages/scraper/src/utils/resolve-open-competitor-status.ts
Normal file
247
packages/scraper/src/utils/resolve-open-competitor-status.ts
Normal file
@ -0,0 +1,247 @@
|
||||
/**
|
||||
* Open competitor status resolver.
|
||||
*
|
||||
* Closes fully populated products that are still stuck in needs_research:
|
||||
* - no strict candidates and no active equivalence hints => no_valid_match
|
||||
* - one or more strict candidates or active equivalence hints => ambiguous
|
||||
*
|
||||
* This does not invent matches. It turns endless queues into explicit,
|
||||
* auditable research states.
|
||||
*
|
||||
* Default is dry-run. Set OPEN_COMPETITOR_APPLY=1 to write status/evidence.
|
||||
*/
|
||||
import { pool, checkAndSetFullyVerified, recordVerificationEvidence } from "./db";
|
||||
import { logger } from "./logger";
|
||||
|
||||
const EXCLUDED_CATEGORIES = [
|
||||
"NonTransceiver",
|
||||
"Accessory",
|
||||
"Adapter / Converter",
|
||||
"Switch / Media Converter",
|
||||
"Switch / Network Infrastructure",
|
||||
"NIC / Adapter",
|
||||
"Mux / Passive Optical",
|
||||
"Product Family",
|
||||
"Loopback / Test Module",
|
||||
];
|
||||
|
||||
type OpenCompetitorRow = {
|
||||
id: string;
|
||||
part_number: string;
|
||||
vendor_name: string;
|
||||
form_factor: string;
|
||||
speed_gbps: string;
|
||||
reach_meters: string | null;
|
||||
fiber_type: string | null;
|
||||
strict_candidate_count: string;
|
||||
equivalence_hint_count: string;
|
||||
};
|
||||
|
||||
function applyMode(): boolean {
|
||||
return process.env.OPEN_COMPETITOR_APPLY === "1";
|
||||
}
|
||||
|
||||
async function resolveOpenCompetitorStatus(): Promise<void> {
|
||||
const limit = Math.max(1, parseInt(process.env.OPEN_COMPETITOR_LIMIT || "1000", 10));
|
||||
const vendorFilter = process.env.OPEN_COMPETITOR_VENDOR || "";
|
||||
const apply = applyMode();
|
||||
|
||||
logger.info("=== Open competitor status resolver ===", { limit, vendorFilter, apply });
|
||||
|
||||
const result = await pool.query<OpenCompetitorRow>(`
|
||||
WITH active AS (
|
||||
SELECT t.*, v.name AS vendor_name
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON v.id = t.vendor_id
|
||||
WHERE COALESCE(t.category, '') <> ALL($1::text[])
|
||||
),
|
||||
ready AS (
|
||||
SELECT a.*
|
||||
FROM active a
|
||||
WHERE a.price_verified = true
|
||||
AND a.image_verified = true
|
||||
AND a.details_verified = true
|
||||
AND COALESCE(a.competitor_verified, false) = false
|
||||
AND COALESCE(a.competitor_status, 'needs_research') IN ('unknown', 'needs_research')
|
||||
AND ($2::text = '' OR a.vendor_name ILIKE ('%' || $2::text || '%'))
|
||||
AND a.form_factor IS NOT NULL
|
||||
AND a.form_factor != ''
|
||||
AND a.speed_gbps IS NOT NULL
|
||||
AND a.speed_gbps > 0
|
||||
AND a.fiber_type IS NOT NULL
|
||||
AND a.fiber_type != ''
|
||||
),
|
||||
scored AS (
|
||||
SELECT
|
||||
r.id,
|
||||
r.part_number,
|
||||
r.vendor_name,
|
||||
r.form_factor,
|
||||
r.speed_gbps,
|
||||
r.reach_meters,
|
||||
r.fiber_type,
|
||||
(
|
||||
SELECT COUNT(*)
|
||||
FROM active c
|
||||
WHERE c.id != r.id
|
||||
AND c.vendor_id != r.vendor_id
|
||||
AND c.price_verified = true
|
||||
AND c.image_verified = true
|
||||
AND c.details_verified = true
|
||||
AND c.form_factor = r.form_factor
|
||||
AND c.speed_gbps = r.speed_gbps
|
||||
AND UPPER(COALESCE(c.fiber_type, '')) = UPPER(COALESCE(r.fiber_type, ''))
|
||||
AND (
|
||||
(c.reach_meters IS NOT NULL AND c.reach_meters > 0
|
||||
AND r.reach_meters IS NOT NULL AND r.reach_meters > 0
|
||||
AND ABS(c.reach_meters - r.reach_meters) <= GREATEST(25, r.reach_meters * 0.05))
|
||||
OR
|
||||
(COALESCE(c.reach_meters, 0) = 0 AND COALESCE(r.reach_meters, 0) = 0)
|
||||
)
|
||||
AND (
|
||||
COALESCE(NULLIF(regexp_replace(COALESCE(c.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(c.wavelengths, '')), '') = ''
|
||||
OR COALESCE(NULLIF(regexp_replace(COALESCE(r.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(r.wavelengths, '')), '') = ''
|
||||
OR ABS(
|
||||
NULLIF(regexp_replace(COALESCE(c.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(c.wavelengths, ''))::int
|
||||
- NULLIF(regexp_replace(COALESCE(r.wavelengths, ''), '^.*?(\\d{3,4}).*$', '\\1'), COALESCE(r.wavelengths, ''))::int
|
||||
) <= 15
|
||||
)
|
||||
) AS strict_candidate_count,
|
||||
(
|
||||
SELECT COUNT(*)
|
||||
FROM transceiver_equivalences eq
|
||||
WHERE eq.flexoptix_id = r.id
|
||||
AND eq.status IN ('pending', 'approved', 'auto_approved')
|
||||
AND eq.confidence >= 0.50
|
||||
) AS equivalence_hint_count
|
||||
FROM ready r
|
||||
)
|
||||
SELECT *
|
||||
FROM scored
|
||||
ORDER BY vendor_name, part_number
|
||||
LIMIT $3
|
||||
`, [EXCLUDED_CATEGORIES, vendorFilter, limit]);
|
||||
|
||||
logger.info("Open competitor status candidates", { count: result.rowCount ?? 0 });
|
||||
|
||||
if (!apply) {
|
||||
for (const row of result.rows.slice(0, 60)) {
|
||||
const strictCandidates = Number(row.strict_candidate_count);
|
||||
const equivalenceHints = Number(row.equivalence_hint_count);
|
||||
logger.info("dry-run open status", {
|
||||
vendor: row.vendor_name,
|
||||
partNumber: row.part_number,
|
||||
outcome: strictCandidates === 0 && equivalenceHints === 0 ? "no_valid_match" : "ambiguous",
|
||||
strictCandidates,
|
||||
equivalenceHints,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let noValidMatch = 0;
|
||||
let ambiguous = 0;
|
||||
let fullyVerifiedEarned = 0;
|
||||
|
||||
for (const row of result.rows) {
|
||||
const strictCandidates = Number(row.strict_candidate_count);
|
||||
const equivalenceHints = Number(row.equivalence_hint_count);
|
||||
const shouldNoMatch = strictCandidates === 0 && equivalenceHints === 0;
|
||||
|
||||
if (shouldNoMatch) {
|
||||
const reason = [
|
||||
"Open competitor status resolver found no strict source-backed competitor candidate",
|
||||
`same form_factor=${row.form_factor}`,
|
||||
`speed_gbps=${row.speed_gbps}`,
|
||||
`fiber_type=${row.fiber_type}`,
|
||||
`reach=${row.reach_meters ?? "unknown"}`,
|
||||
"and no active equivalence hint above confidence 0.50",
|
||||
].join("; ");
|
||||
|
||||
const update = await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = true,
|
||||
competitor_verified_at = NOW(),
|
||||
competitor_status = 'no_valid_match',
|
||||
competitor_status_updated_at = NOW(),
|
||||
no_match_verified_at = NOW(),
|
||||
no_match_reason = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
AND COALESCE(competitor_verified, false) = false
|
||||
RETURNING id
|
||||
`, [row.id, reason]);
|
||||
|
||||
if ((update.rowCount ?? 0) === 0) continue;
|
||||
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: row.id,
|
||||
verificationType: "competitor_no_match",
|
||||
evidenceValue: {
|
||||
reason,
|
||||
vendor: row.vendor_name,
|
||||
partNumber: row.part_number,
|
||||
strictCandidateCount: strictCandidates,
|
||||
equivalenceHintCount: equivalenceHints,
|
||||
},
|
||||
robotName: "verify:open-competitor-status",
|
||||
confidence: 1,
|
||||
});
|
||||
|
||||
if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++;
|
||||
noValidMatch++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const reason = [
|
||||
"Open competitor status resolver found plausible candidates but no safe deterministic 1:1 match",
|
||||
`strict_candidate_count=${strictCandidates}`,
|
||||
`equivalence_hint_count=${equivalenceHints}`,
|
||||
].join("; ");
|
||||
|
||||
const update = await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_status = 'ambiguous',
|
||||
competitor_status_updated_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
AND COALESCE(competitor_verified, false) = false
|
||||
AND COALESCE(competitor_status, 'needs_research') IN ('unknown', 'needs_research')
|
||||
RETURNING id
|
||||
`, [row.id]);
|
||||
|
||||
if ((update.rowCount ?? 0) === 0) continue;
|
||||
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: row.id,
|
||||
verificationType: "competitor_ambiguous",
|
||||
evidenceValue: {
|
||||
reason,
|
||||
vendor: row.vendor_name,
|
||||
partNumber: row.part_number,
|
||||
strictCandidateCount: strictCandidates,
|
||||
equivalenceHintCount: equivalenceHints,
|
||||
},
|
||||
robotName: "verify:open-competitor-status",
|
||||
confidence: 1,
|
||||
});
|
||||
|
||||
ambiguous++;
|
||||
}
|
||||
|
||||
logger.info("Open competitor status resolver complete", {
|
||||
no_valid_match: noValidMatch,
|
||||
ambiguous,
|
||||
fully_verified_earned: fullyVerifiedEarned,
|
||||
});
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
resolveOpenCompetitorStatus()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => {
|
||||
logger.error("Open competitor status resolver failed", { error: (err as Error).message });
|
||||
pool.end();
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
@ -7,13 +7,14 @@
|
||||
*
|
||||
* This deliberately does not verify price/image/competitor signals.
|
||||
*/
|
||||
import { pool, checkAndSetFullyVerified } from "./db";
|
||||
import { pool, checkAndSetFullyVerified, recordVerificationEvidence } from "./db";
|
||||
import { logger } from "./logger";
|
||||
|
||||
const DEFAULT_LIMIT = 5000;
|
||||
|
||||
interface Candidate {
|
||||
id: string;
|
||||
details_source_url: string | null;
|
||||
}
|
||||
|
||||
async function verifyCatalogDetails(limit: number): Promise<void> {
|
||||
@ -62,11 +63,22 @@ async function verifyCatalogDetails(limit: number): Promise<void> {
|
||||
updated_at = NOW()
|
||||
FROM candidate
|
||||
WHERE t.id = candidate.id
|
||||
RETURNING t.id
|
||||
RETURNING t.id, t.details_source_url
|
||||
`, [limit]);
|
||||
|
||||
let fullyVerifiedEarned = 0;
|
||||
for (const row of candidates.rows) {
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: row.id,
|
||||
verificationType: "details",
|
||||
sourceUrl: row.details_source_url || undefined,
|
||||
evidenceValue: {
|
||||
source: "catalog-derived normalized technical specs",
|
||||
detailsSourceUrl: row.details_source_url,
|
||||
},
|
||||
robotName: "verify:catalog:details",
|
||||
confidence: 1,
|
||||
});
|
||||
if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++;
|
||||
}
|
||||
|
||||
|
||||
24
sql/104-verification-evidence-ambiguous.sql
Normal file
24
sql/104-verification-evidence-ambiguous.sql
Normal file
@ -0,0 +1,24 @@
|
||||
-- Migration 104: add competitor_ambiguous evidence type
|
||||
--
|
||||
-- Ambiguous competitor status is a deliberate research outcome. It means TIP
|
||||
-- found one or more plausible candidates, but cannot prove a safe 1:1 match.
|
||||
-- Store this as evidence instead of leaving products in an endless queue.
|
||||
|
||||
ALTER TABLE transceiver_verification_evidence
|
||||
DROP CONSTRAINT IF EXISTS transceiver_verification_evidence_verification_type_check;
|
||||
|
||||
ALTER TABLE transceiver_verification_evidence
|
||||
ADD CONSTRAINT transceiver_verification_evidence_verification_type_check
|
||||
CHECK (
|
||||
verification_type::text = ANY (
|
||||
ARRAY[
|
||||
'price'::varchar,
|
||||
'image'::varchar,
|
||||
'details'::varchar,
|
||||
'competitor_match'::varchar,
|
||||
'competitor_no_match'::varchar,
|
||||
'competitor_ambiguous'::varchar,
|
||||
'artifact_quarantine'::varchar
|
||||
]::text[]
|
||||
)
|
||||
);
|
||||
@ -1,9 +1,75 @@
|
||||
# Current TIP Sync State
|
||||
|
||||
Updated: 2026-05-09 21:33 UTC
|
||||
Updated: 2026-05-09 22:01 UTC
|
||||
|
||||
## Newest Work
|
||||
|
||||
- TIP open competitor status closure on 2026-05-09:
|
||||
- added migration `sql/104-verification-evidence-ambiguous.sql`
|
||||
- extends `transceiver_verification_evidence.verification_type` with `competitor_ambiguous`
|
||||
- added `packages/scraper/src/utils/resolve-open-competitor-status.ts`
|
||||
- script: `pnpm -C packages/scraper run verify:open-competitor-status`
|
||||
- default is dry-run
|
||||
- apply requires `OPEN_COMPETITOR_APPLY=1`
|
||||
- live Erik run:
|
||||
- dry-run found `365` fully populated products still stuck in `needs_research`
|
||||
- apply result:
|
||||
- `364` set to `ambiguous`
|
||||
- `1` set to `no_valid_match`
|
||||
- `1` additional product earned `fully_verified`
|
||||
- evidence:
|
||||
- `364` `competitor_ambiguous` records from `verify:open-competitor-status`
|
||||
- `1` `competitor_no_match` record from `verify:open-competitor-status`
|
||||
- current fully populated competitor queue:
|
||||
- products with price+image+details and `competitor_status='needs_research'`: `0`
|
||||
- scheduler guard:
|
||||
- updated `maintenance:find-equivalences` so future matcher runs do not reset deliberate `ambiguous` rows back to `needs_research`
|
||||
- rebuilt and restarted `tip-scraper-daemon` after confirming no active pg-boss jobs
|
||||
- live health after closure:
|
||||
- active products: `17305`
|
||||
- price verified: `11414`
|
||||
- image verified: `12016`
|
||||
- details verified: `16705`
|
||||
- fully verified: `10449`
|
||||
- competitor status:
|
||||
- `matched=10775`
|
||||
- `no_valid_match=74`
|
||||
- `ambiguous=556`
|
||||
- `needs_research=5900`
|
||||
- remaining `needs_research` rows are no longer fully populated competitor-ready products; they are product-data gaps first
|
||||
|
||||
- TIP product-data gap probing on 2026-05-09:
|
||||
- hardened `verify:catalog:details` to write `details` evidence
|
||||
- run result: `113` catalog-derived rows updated, `0` additional fully verified
|
||||
- active Health did not move, indicating those updates were outside the active dashboard base or not counted by the current active filters
|
||||
- GAO Tek detail verifier:
|
||||
- checked remaining `64` GAO product URLs
|
||||
- result: `0` updated, `64` skipped, `0` errors
|
||||
- interpretation: remaining GAO rows lack deterministic public detail evidence; no fake details added
|
||||
- GBICS:
|
||||
- added package script `scrape:gbics`
|
||||
- patched scraper to pass product URLs into `findOrCreateScrapedTransceiver`
|
||||
- live run found `758` products, `0` prices
|
||||
- active GBICS gap remains `64 price / 64 image / 64 details` of `135`
|
||||
- interpretation: GBICS has product discovery, but active old rows and scraped product identifiers do not line up cleanly; needs alias/dedupe hardening plus price selector repair
|
||||
- T&S Communication:
|
||||
- added package script `scrape:tscom`
|
||||
- patched scraper to pass product URLs into `findOrCreateScrapedTransceiver`
|
||||
- live run found `109` unique products, `0` prices
|
||||
- active T&S gap remains `82 price / 82 image / 49 details` of `82`
|
||||
- interpretation: product discovery works, but price selector and existing-row matching need hardening
|
||||
- 10Gtek / SFPcables:
|
||||
- live run found `110` products and wrote `6` prices
|
||||
- active 10Gtek gap remains `126 price / 131 image / 25 details` of `175`
|
||||
- interpretation: parser works partially, but many active 10Gtek rows are unmatched aliases or lack source pages
|
||||
- current largest active product-data gaps:
|
||||
- `Juniper Networks`: `283 price / 394 image / 173 details` of `534`
|
||||
- `Cisco Systems`: `151 price / 351 image / 146 details` of `351`
|
||||
- `GAO Tek`: `456 price / 23 image / 87 details` of `458`
|
||||
- `GBICS`: `64 price / 64 image / 64 details` of `135`
|
||||
- `T&S Communication`: `82 price / 82 image / 49 details` of `82`
|
||||
- `10Gtek`: `126 price / 131 image / 25 details` of `175`
|
||||
|
||||
- TIP FS.com SKU alias cleanup on 2026-05-09:
|
||||
- added `packages/scraper/src/utils/quarantine-fs-sku-aliases.ts`
|
||||
- script: `pnpm -C packages/scraper run verify:fs:sku-aliases`
|
||||
|
||||
@ -0,0 +1,93 @@
|
||||
# 2026-05-09 — TIP Open Competitor Closure + Product Gap Probing
|
||||
|
||||
## Competitor Status Closure
|
||||
|
||||
Added `competitor_ambiguous` as an evidence type via `sql/104-verification-evidence-ambiguous.sql`.
|
||||
|
||||
Added `packages/scraper/src/utils/resolve-open-competitor-status.ts`.
|
||||
|
||||
Runbook:
|
||||
|
||||
```bash
|
||||
pnpm -C packages/scraper run verify:open-competitor-status
|
||||
OPEN_COMPETITOR_APPLY=1 pnpm -C packages/scraper run verify:open-competitor-status
|
||||
```
|
||||
|
||||
Live result:
|
||||
|
||||
- dry-run found `365` fully populated products still in `needs_research`
|
||||
- apply wrote:
|
||||
- `364` `ambiguous`
|
||||
- `1` `no_valid_match`
|
||||
- `1` newly `fully_verified`
|
||||
- evidence ledger:
|
||||
- `364` `competitor_ambiguous`
|
||||
- `1` `competitor_no_match`
|
||||
- fully populated products still in competitor `needs_research`: `0`
|
||||
|
||||
Scheduler guard:
|
||||
|
||||
- `maintenance:find-equivalences` now preserves deliberate `ambiguous` states instead of resetting them to `needs_research`
|
||||
- built on Erik successfully
|
||||
- restarted `tip-scraper-daemon` only after the pg-boss active queue was empty
|
||||
|
||||
Live health after closure:
|
||||
|
||||
- active products: `17305`
|
||||
- price verified: `11414`
|
||||
- image verified: `12016`
|
||||
- details verified: `16705`
|
||||
- fully verified: `10449`
|
||||
- competitor status:
|
||||
- `matched=10775`
|
||||
- `no_valid_match=74`
|
||||
- `ambiguous=556`
|
||||
- `needs_research=5900`
|
||||
|
||||
Interpretation:
|
||||
|
||||
- `needs_research` no longer contains fully populated products ready for competitor-only resolution.
|
||||
- Remaining `needs_research` rows are product-data gaps first.
|
||||
|
||||
## Product Gap Probing
|
||||
|
||||
### Catalog Details
|
||||
|
||||
- hardened `verify:catalog:details` to write detail evidence
|
||||
- live result: `113` catalog-derived rows updated, `0` fully verified earned
|
||||
- active Health did not move, so those rows are not currently improving the active dashboard base
|
||||
|
||||
### GAO Tek
|
||||
|
||||
- ran `scrape:gaotek:details`
|
||||
- checked remaining `64` rows
|
||||
- result: `0` updated, `64` skipped, `0` errors
|
||||
- interpretation: remaining GAO pages lack deterministic public detail evidence
|
||||
|
||||
### GBICS
|
||||
|
||||
- added `scrape:gbics`
|
||||
- patched `gbics.ts` to pass product URLs into DB upsert/detail verification
|
||||
- live run found `758` products and `0` prices
|
||||
- active gap remains `64 price / 64 image / 64 details` of `135`
|
||||
- interpretation: discovery works, but active rows and scraped identifiers need alias/dedupe hardening; price selector also needs repair
|
||||
|
||||
### T&S Communication
|
||||
|
||||
- added `scrape:tscom`
|
||||
- patched `tscom.ts` to pass product URLs into DB upsert/detail verification
|
||||
- live run found `109` unique products and `0` prices
|
||||
- active gap remains `82 price / 82 image / 49 details` of `82`
|
||||
- interpretation: discovery works, but row matching and price selectors need hardening
|
||||
|
||||
### 10Gtek / SFPcables
|
||||
|
||||
- live run found `110` products and wrote `6` prices
|
||||
- active 10Gtek gap remains `126 price / 131 image / 25 details` of `175`
|
||||
- interpretation: parser works partially; remaining rows need alias/source-page repair
|
||||
|
||||
## Next Real Work
|
||||
|
||||
- Build alias/dedupe resolvers for GBICS, T&S, and 10Gtek similar to FS.com numeric SKU alias cleanup.
|
||||
- Repair GBICS/T&S price selectors against current page HTML.
|
||||
- For OEM vendors such as Cisco/Juniper, add explicit non-public-price states instead of pretending OEM list prices exist.
|
||||
@ -42,3 +42,6 @@
|
||||
{"id":"tip-20260509-no-valid-competitor-resolver-002","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Operate TIP crawlers safely on Erik and make every robot run reversible until evidence is checked."},{"role":"user","content":"What is the safe runbook for the no-valid-competitor resolver?"},{"role":"assistant","content":"Run dry-run first with a narrow vendor scope, for example `NO_VALID_MATCH_VENDOR=Flexoptix pnpm -C packages/scraper run verify:no-valid-competitor`. Inspect candidates for normalization mistakes, especially speed, reach, form factor, fiber, wavelength, cable/AOC/breakout artifacts, and category pages. Only apply with `NO_VALID_MATCH_APPLY=1` after the dry-run looks clean. Do not restart scraper daemons while pg-boss jobs are active. After apply, check `/api/health`, evidence counts, and protected review stats, then document the result in `sync/`."}]}
|
||||
{"id":"tip-20260509-ambiguous-equivalence-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Ambiguity is a valid safety outcome, not a failure."},{"role":"user","content":"What should TIP do when the matcher finds possible competitor candidates but cannot prove a safe 1:1 equivalence?"},{"role":"assistant","content":"Set `competitor_status=ambiguous` and keep the product out of fully_verified. Do not choose the cheapest or visually similar candidate. Ambiguous means there is enough evidence to reject `no_valid_match`, but not enough evidence to approve `matched`. Typical causes include breakout vs non-breakout, AOC/DAC vs optical module, reach mismatch, MMF vs SMF mismatch, wavelength mismatch, connector/channel mismatch, or too many candidate products in the same broad class. Ambiguous rows need targeted vendor research or stricter extractor normalization."}]}
|
||||
{"id":"tip-20260509-fs-sku-alias-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Distinguish vendor marketplace SKUs from real optical product part numbers."},{"role":"user","content":"How should TIP handle FS.com rows like FS-380881 when the same product URL also has OSFP-DR8-1.6T-FL?"},{"role":"assistant","content":"Treat the numeric `FS-#####` row as a marketplace SKU alias, not as an independent transceiver, when the same normalized FS.com product URL already has a canonical non-numeric product P/N row with price, image, and details verified. Quarantine the numeric alias from the active transceiver base, clear verification flags, and write `artifact_quarantine` evidence that points to the canonical row. Keep the real product P/N active. For the 1.6T FS.com case, preserve both `OSFP-DR8-1.6T-FL` (500m) and `OSFP-2FR4-1.6T-FL` (2km), and do not classify `FS-380881` or `FS-380883` as separate products or no-valid-match products."}]}
|
||||
{"id":"tip-20260509-open-competitor-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Close research queues with explicit evidence-backed states, not fake green checks."},{"role":"user","content":"What should TIP do with products that have price, image, and details verified but still sit in competitor needs_research?"},{"role":"assistant","content":"Run an open competitor status resolver. If no strict source-backed candidate and no active equivalence hint exists, set `competitor_status=no_valid_match`, set competitor_verified true, write `competitor_no_match` evidence, and recalculate fully_verified. If candidates or equivalence hints exist but no safe deterministic 1:1 match can be proven, set `competitor_status=ambiguous` and write `competitor_ambiguous` evidence. Do not leave fully populated rows in endless `needs_research`, and do not auto-match when the candidate set is broad or technically risky."}]}
|
||||
{"id":"tip-20260509-store-scraper-gap-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Treat scraper runs that find products but do not improve active verification as diagnostic evidence."},{"role":"user","content":"GBICS and T&S scrapers found many products but active TIP verification counts did not improve. What does that mean?"},{"role":"assistant","content":"It means the crawler is not simply dead; the remaining problem is likely row identity alignment, alias/dedupe, or price/image selector drift. For GBICS, discovery found 758 products and zero prices, while active gaps stayed at 64 price/image/details rows. For T&S, discovery found 109 unique products and zero prices, while active gaps stayed at 82 price/image and 49 details rows. Next steps are to repair price selectors and build alias/dedupe resolvers that map scraped product names/URLs to existing active rows before claiming verification progress."}]}
|
||||
{"id":"tip-20260509-quote-only-oem-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Never invent prices for OEM or quote-only sources."},{"role":"user","content":"Large OEM vendors such as Cisco and Juniper have many rows without public prices. Should TIP mark those prices verified?"},{"role":"assistant","content":"No. OEM catalog rows can be detail-verified when public documentation provides technical specs, but price_verified requires a real public price observation or a deliberately modeled non-public-price state. Do not scrape estimates, reseller guesses, or competitor prices into OEM price_verified. If the source is quote-only or no public price exists, record that as a separate state and keep price_verified false until the data model supports it."}]}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user