diff --git a/packages/api/src/routes/health.ts b/packages/api/src/routes/health.ts index 818a9d7..b349d24 100644 --- a/packages/api/src/routes/health.ts +++ b/packages/api/src/routes/health.ts @@ -20,6 +20,10 @@ healthRouter.get("/", async (_req: Request, res: Response) => { COUNT(*) FILTER (WHERE image_verified) AS image_verified, COUNT(*) FILTER (WHERE details_verified) AS details_verified, COUNT(*) FILTER (WHERE fully_verified) AS fully_verified, + COUNT(*) FILTER (WHERE competitor_status = 'matched') AS competitor_matched, + COUNT(*) FILTER (WHERE competitor_status = 'no_valid_match') AS competitor_no_valid_match, + COUNT(*) FILTER (WHERE competitor_status = 'ambiguous') AS competitor_ambiguous, + COUNT(*) FILTER (WHERE competitor_status = 'needs_research') AS competitor_needs_research, COUNT(*) AS total FROM transceivers WHERE COALESCE(data_confidence, 'unknown') != 'garbage' @@ -100,6 +104,12 @@ healthRouter.get("/", async (_req: Request, res: Response) => { image_verified: Number(v.image_verified || 0), details_verified: Number(v.details_verified || 0), fully_verified: Number(v.fully_verified || 0), + competitor_status: { + matched: Number(v.competitor_matched || 0), + no_valid_match: Number(v.competitor_no_valid_match || 0), + ambiguous: Number(v.competitor_ambiguous || 0), + needs_research: Number(v.competitor_needs_research || 0), + }, total: Number(v.total || 0), price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0, fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0, diff --git a/packages/api/src/routes/review.ts b/packages/api/src/routes/review.ts index 3efc647..fb78bcf 100644 --- a/packages/api/src/routes/review.ts +++ b/packages/api/src/routes/review.ts @@ -10,6 +10,7 @@ * POST /api/review/run-research — trigger equivalence research job immediately */ import { Router, Request, Response } from "express"; +import { createHash } from "crypto"; import { pool } from "../db/client"; /** Promote to fully_verified if all 4 flags are set — shared logic */ @@ -39,6 +40,34 @@ async function queueMaintenanceJob(name: string): Promise { ); } +function hashEvidence(value: Record): string { + return createHash("sha256").update(JSON.stringify(value, Object.keys(value).sort())).digest("hex").slice(0, 16); +} + +async function recordVerificationEvidence(params: { + transceiverId: string; + verificationType: "competitor_match" | "competitor_no_match"; + evidenceValue: Record; + robotName: string; + confidence?: number; +}): Promise { + const evidenceHash = hashEvidence(params.evidenceValue); + await pool.query(` + INSERT INTO transceiver_verification_evidence ( + transceiver_id, verification_type, evidence_value, evidence_hash, robot_name, confidence + ) + VALUES ($1, $2, $3::jsonb, $4, $5, $6) + ON CONFLICT DO NOTHING + `, [ + params.transceiverId, + params.verificationType, + JSON.stringify(params.evidenceValue), + evidenceHash, + params.robotName, + params.confidence ?? null, + ]); +} + export const reviewRouter = Router(); // ── GET /api/review/equivalences ────────────────────────────────────────────── @@ -161,6 +190,15 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) => COUNT(*) AS total FROM transceiver_equivalences `); + const productStatus = await pool.query(` + SELECT + SUM(CASE WHEN competitor_status = 'matched' THEN 1 ELSE 0 END) AS matched, + SUM(CASE WHEN competitor_status = 'no_valid_match' THEN 1 ELSE 0 END) AS no_valid_match, + SUM(CASE WHEN competitor_status = 'ambiguous' THEN 1 ELSE 0 END) AS ambiguous, + SUM(CASE WHEN competitor_status = 'needs_research' THEN 1 ELSE 0 END) AS needs_research_products, + SUM(CASE WHEN competitor_status = 'unknown' THEN 1 ELSE 0 END) AS unknown + FROM transceivers + `); const row = result.rows[0]; res.json({ @@ -172,6 +210,13 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) => rejected: parseInt(row.rejected, 10) || 0, needs_research: parseInt(row.needs_research, 10) || 0, total: parseInt(row.total, 10) || 0, + product_status: { + matched: parseInt(productStatus.rows[0].matched, 10) || 0, + no_valid_match: parseInt(productStatus.rows[0].no_valid_match, 10) || 0, + ambiguous: parseInt(productStatus.rows[0].ambiguous, 10) || 0, + needs_research_products: parseInt(productStatus.rows[0].needs_research_products, 10) || 0, + unknown: parseInt(productStatus.rows[0].unknown, 10) || 0, + }, }, }); }); @@ -204,9 +249,18 @@ reviewRouter.post("/equivalences/:id/approve", async (req: Request, res: Respons await pool.query(` UPDATE transceivers SET competitor_verified = true, - competitor_verified_at = NOW() + competitor_verified_at = NOW(), + competitor_status = 'matched', + competitor_status_updated_at = NOW() WHERE id = $1 `, [flexoptix_id]); + await recordVerificationEvidence({ + transceiverId: flexoptix_id, + verificationType: "competitor_match", + evidenceValue: { equivalence_id: id, reviewer }, + robotName: "review:approve", + confidence: Number(eq.rows[0].confidence ?? 1), + }); // Promote to fully_verified if all 4 flags are now set const fullyVerifiedEarned = await checkAndSetFullyVerified(flexoptix_id); @@ -240,6 +294,51 @@ reviewRouter.post("/equivalences/:id/reject", async (req: Request, res: Response res.json({ success: true }); }); +// ── POST /api/review/transceivers/:id/no-valid-match ───────────────────────── +// Mark a product as competitor-resolved because research found no valid 1:1 match. +// This is not a fake competitor match; the status and evidence explain the truth. +reviewRouter.post("/transceivers/:id/no-valid-match", async (req: Request, res: Response) => { + const id = String(req.params.id); + const { reason, reviewer } = req.body as { reason?: string; reviewer?: string }; + + if (!reason || reason.trim().length < 10) { + res.status(400).json({ success: false, error: "reason with at least 10 characters required" }); + return; + } + + const result = await pool.query(` + UPDATE transceivers + SET competitor_verified = true, + competitor_verified_at = NOW(), + competitor_status = 'no_valid_match', + competitor_status_updated_at = NOW(), + no_match_verified_at = NOW(), + no_match_reason = $2, + updated_at = NOW() + WHERE id = $1 + RETURNING id + `, [id, reason.trim()]); + + if (!result.rowCount) { + res.status(404).json({ success: false, error: "Not found" }); + return; + } + + await recordVerificationEvidence({ + transceiverId: id, + verificationType: "competitor_no_match", + evidenceValue: { + reason: reason.trim(), + reviewer: reviewer || "manual", + }, + robotName: "review:no-valid-match", + confidence: 1, + }); + + const fullyVerifiedEarned = await checkAndSetFullyVerified(id); + res.json({ success: true, fully_verified_earned: fullyVerifiedEarned }); +}); + // ── PATCH /api/review/equivalences/:id ──────────────────────────────────────── reviewRouter.patch("/equivalences/:id", async (req: Request, res: Response) => { const { id } = req.params; @@ -310,7 +409,10 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons await pool.query(` UPDATE transceivers - SET competitor_verified = true, competitor_verified_at = NOW() + SET competitor_verified = true, + competitor_verified_at = NOW(), + competitor_status = 'matched', + competitor_status_updated_at = NOW() WHERE id = $1 AND competitor_verified = false `, [row.flexoptix_id]); @@ -361,7 +463,10 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon await pool.query(` UPDATE transceivers - SET competitor_verified = true, competitor_verified_at = NOW() + SET competitor_verified = true, + competitor_verified_at = NOW(), + competitor_status = 'matched', + competitor_status_updated_at = NOW() WHERE id = $1 AND competitor_verified = false `, [row.flexoptix_id]); diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 02f68ff..5f5ffd2 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -2649,8 +2649,11 @@ export async function registerWorkers(boss: PgBoss): Promise { const resetComp = await pool.query(` UPDATE transceivers t SET competitor_verified = false, - competitor_verified_at = NULL + competitor_verified_at = NULL, + competitor_status = 'needs_research', + competitor_status_updated_at = NOW() WHERE competitor_verified = true + AND COALESCE(competitor_status, 'matched') != 'no_valid_match' AND NOT EXISTS ( SELECT 1 FROM price_observations po JOIN vendors v ON po.source_vendor_id = v.id @@ -2719,6 +2722,8 @@ export async function registerWorkers(boss: PgBoss): Promise { let skipped = 0; for (const fx of flexResult.rows) { + let fxMatched = false; + let fxQueued = false; // Find competitor transceivers with recent price observations and matching specs const candidates = await pool.query(` SELECT t.id AS competitor_id, t.part_number, t.standard_name, @@ -2824,14 +2829,62 @@ export async function registerWorkers(boss: PgBoss): Promise { await pool.query(` UPDATE transceivers SET competitor_verified = true, - competitor_verified_at = NOW() + competitor_verified_at = NOW(), + competitor_status = 'matched', + competitor_status_updated_at = NOW() WHERE id = $1 AND competitor_verified = false `, [fx.id]); + await pool.query(` + INSERT INTO transceiver_verification_evidence ( + transceiver_id, verification_type, source_url, source_vendor_id, + evidence_value, evidence_hash, robot_name, confidence + ) + VALUES ( + $1, 'competitor_match', NULL, NULL, + $2::jsonb, + md5($2::text), + 'maintenance:find-equivalences', + $3 + ) + ON CONFLICT DO NOTHING + `, [ + fx.id, + JSON.stringify({ + competitor_id: cand.competitor_id, + competitor_part_number: cand.part_number, + competitor_vendor: cand.vendor_name, + match_basis: basis, + notes, + }), + confidence, + ]); autoApproved++; + fxMatched = true; } else { queued++; + fxQueued = true; } } + + if (!fxMatched && fxQueued) { + await pool.query(` + UPDATE transceivers + SET competitor_status = 'ambiguous', + competitor_status_updated_at = NOW() + WHERE id = $1 + AND competitor_verified = false + AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match') + `, [fx.id]); + } else if (!fxMatched && !fxQueued) { + await pool.query(` + UPDATE transceivers + SET competitor_status = 'needs_research', + competitor_status_updated_at = NOW() + WHERE id = $1 + AND competitor_verified = false + AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match') + `, [fx.id]); + } } console.log( diff --git a/packages/scraper/src/utils/db.ts b/packages/scraper/src/utils/db.ts index ea5824c..88a5971 100644 --- a/packages/scraper/src/utils/db.ts +++ b/packages/scraper/src/utils/db.ts @@ -1,6 +1,7 @@ import { Pool } from "pg"; import { config } from "dotenv"; import { join } from "path"; +import { contentHash } from "./hash"; config({ path: join(__dirname, "..", "..", "..", "..", ".env") }); @@ -18,6 +19,43 @@ export const pool = new Pool({ // Alias — some scrapers import { db } instead of { pool } export const db = pool; +export async function recordVerificationEvidence(params: { + transceiverId: string; + verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "artifact_quarantine"; + sourceUrl?: string; + sourceVendorId?: string; + evidenceValue?: Record; + robotName: string; + confidence?: number; +}): Promise { + const evidenceValue = params.evidenceValue || {}; + const evidenceHash = contentHash({ + type: params.verificationType, + sourceUrl: params.sourceUrl || "", + sourceVendorId: params.sourceVendorId || "", + evidenceValue, + }); + + await pool.query( + `INSERT INTO transceiver_verification_evidence ( + transceiver_id, verification_type, source_url, source_vendor_id, + evidence_value, evidence_hash, robot_name, confidence + ) + VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7, $8) + ON CONFLICT DO NOTHING`, + [ + params.transceiverId, + params.verificationType, + params.sourceUrl || null, + params.sourceVendorId || null, + JSON.stringify(evidenceValue), + evidenceHash, + params.robotName, + params.confidence ?? null, + ] + ); +} + /** * After any verified flag is set, check if all 4 criteria are met and promote * the transceiver to fully_verified. Call this wherever price/image/details/ @@ -66,6 +104,14 @@ export async function markImageVerified( [transceiverId, imageUrl] ); await checkAndSetFullyVerified(transceiverId); + await recordVerificationEvidence({ + transceiverId, + verificationType: "image", + sourceUrl: imageUrl, + evidenceValue: { imageUrl }, + robotName: "markImageVerified", + confidence: 1, + }); return (result.rowCount ?? 0) > 0; } @@ -99,6 +145,16 @@ export async function markDetailsVerified(params: { [params.transceiverId, params.sourceUrl || null] ); await checkAndSetFullyVerified(params.transceiverId); + if ((result.rowCount ?? 0) > 0) { + await recordVerificationEvidence({ + transceiverId: params.transceiverId, + verificationType: "details", + sourceUrl: params.sourceUrl, + evidenceValue: { sourceUrl: params.sourceUrl || null }, + robotName: "markDetailsVerified", + confidence: 1, + }); + } return (result.rowCount ?? 0) > 0; } @@ -198,11 +254,20 @@ export async function upsertPriceObservation(params: { `UPDATE transceivers SET price_verified = true, price_verified_at = NOW() - ${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW()" : ""} + ${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW(), competitor_status = 'matched', competitor_status_updated_at = NOW()" : ""} WHERE id = $1`, [params.transceiverId] ); await checkAndSetFullyVerified(params.transceiverId); + await recordVerificationEvidence({ + transceiverId: params.transceiverId, + verificationType: "price", + sourceUrl: params.url, + sourceVendorId: params.sourceVendorId, + evidenceValue: { price: params.price, currency: params.currency, stockLevel: params.stockLevel }, + robotName: "upsertPriceObservation", + confidence: 1, + }); return false; // No change } @@ -232,7 +297,9 @@ export async function upsertPriceObservation(params: { price_verified = true, price_verified_at = NOW(), competitor_verified = true, - competitor_verified_at = NOW() + competitor_verified_at = NOW(), + competitor_status = 'matched', + competitor_status_updated_at = NOW() WHERE id = $1`, [params.transceiverId] ); @@ -246,6 +313,15 @@ export async function upsertPriceObservation(params: { ); } await checkAndSetFullyVerified(params.transceiverId); + await recordVerificationEvidence({ + transceiverId: params.transceiverId, + verificationType: "price", + sourceUrl: params.url, + sourceVendorId: params.sourceVendorId, + evidenceValue: { price: params.price, currency: params.currency, stockLevel: params.stockLevel }, + robotName: "upsertPriceObservation", + confidence: 1, + }); return true; // New observation written } diff --git a/packages/scraper/src/utils/quarantine-non-transceivers.ts b/packages/scraper/src/utils/quarantine-non-transceivers.ts index bcd487c..218035f 100644 --- a/packages/scraper/src/utils/quarantine-non-transceivers.ts +++ b/packages/scraper/src/utils/quarantine-non-transceivers.ts @@ -4,7 +4,7 @@ * Moves obvious accessories, switches, instruments, radio products and optical * transport systems out of the active transceiver verification base. */ -import { pool } from "./db"; +import { pool, recordVerificationEvidence } from "./db"; import { logger } from "./logger"; async function quarantine(): Promise { @@ -80,9 +80,24 @@ async function quarantine(): Promise { ) ) AND COALESCE(t.category, '') != 'NonTransceiver' - RETURNING t.id + RETURNING t.id, t.part_number, v.name AS vendor_name, t.product_page_url `); + for (const row of result.rows) { + await recordVerificationEvidence({ + transceiverId: row.id, + verificationType: "artifact_quarantine", + sourceUrl: row.product_page_url || undefined, + evidenceValue: { + partNumber: row.part_number, + vendor: row.vendor_name, + reason: "matched deterministic non-transceiver artifact rule", + }, + robotName: "verify:quarantine:non-transceivers", + confidence: 1, + }); + } + logger.info("Non-transceiver quarantine complete", { quarantined: result.rowCount ?? 0, }); diff --git a/sql/103-verification-evidence-and-competitor-status.sql b/sql/103-verification-evidence-and-competitor-status.sql new file mode 100644 index 0000000..e98ea9e --- /dev/null +++ b/sql/103-verification-evidence-and-competitor-status.sql @@ -0,0 +1,212 @@ +-- Migration 103: Verification evidence ledger and competitor status semantics +-- +-- Goal: +-- fully_verified should mean "source-backed and resolved", not merely +-- "a competitor row was found". A product may be fully resolved when a +-- valid 1:1 competitor exists OR when research verified that no valid +-- public 1:1 competitor is available. + +ALTER TABLE transceivers + ADD COLUMN IF NOT EXISTS competitor_status VARCHAR(32) NOT NULL DEFAULT 'unknown', + ADD COLUMN IF NOT EXISTS competitor_status_updated_at TIMESTAMPTZ, + ADD COLUMN IF NOT EXISTS no_match_verified_at TIMESTAMPTZ, + ADD COLUMN IF NOT EXISTS no_match_reason TEXT; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'transceivers_competitor_status_check' + ) THEN + ALTER TABLE transceivers + ADD CONSTRAINT transceivers_competitor_status_check + CHECK (competitor_status IN ( + 'unknown', + 'matched', + 'no_valid_match', + 'needs_research', + 'ambiguous' + )); + END IF; +END $$; + +UPDATE transceivers +SET competitor_status = CASE + WHEN competitor_verified = true THEN 'matched' + WHEN competitor_status = 'unknown' THEN 'needs_research' + ELSE competitor_status + END, + competitor_status_updated_at = COALESCE(competitor_status_updated_at, NOW()) +WHERE competitor_status IS NULL + OR competitor_status = 'unknown' + OR competitor_verified = true; + +CREATE INDEX IF NOT EXISTS idx_transceivers_competitor_status + ON transceivers (competitor_status); + +CREATE INDEX IF NOT EXISTS idx_transceivers_no_valid_match + ON transceivers (no_match_verified_at) + WHERE competitor_status = 'no_valid_match'; + +CREATE TABLE IF NOT EXISTS transceiver_verification_evidence ( + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + transceiver_id UUID NOT NULL REFERENCES transceivers(id) ON DELETE CASCADE, + verification_type VARCHAR(40) NOT NULL CHECK (verification_type IN ( + 'price', + 'image', + 'details', + 'competitor_match', + 'competitor_no_match', + 'artifact_quarantine' + )), + source_url TEXT, + source_vendor_id UUID REFERENCES vendors(id) ON DELETE SET NULL, + evidence_value JSONB NOT NULL DEFAULT '{}'::jsonb, + evidence_hash TEXT, + robot_name TEXT NOT NULL DEFAULT 'unknown', + confidence NUMERIC(4,3) CHECK (confidence IS NULL OR confidence BETWEEN 0 AND 1), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_verification_evidence_tx + ON transceiver_verification_evidence (transceiver_id, verification_type, created_at DESC); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedupe + ON transceiver_verification_evidence ( + transceiver_id, + verification_type, + COALESCE(evidence_hash, ''), + robot_name + ); + +COMMENT ON COLUMN transceivers.competitor_status IS + 'Resolution state for competitor evidence: matched, no_valid_match, needs_research, ambiguous, unknown.'; + +COMMENT ON TABLE transceiver_verification_evidence IS + 'Append-only evidence ledger for TIP verification decisions. Stores source-backed proof for price, image, details, competitor matches and verified no-match states.'; + +-- Seed the ledger from already verified rows so TIP starts with an auditable +-- baseline instead of an empty proof table. +INSERT INTO transceiver_verification_evidence ( + transceiver_id, + verification_type, + source_url, + source_vendor_id, + evidence_value, + evidence_hash, + robot_name, + confidence +) +SELECT DISTINCT ON (t.id) + t.id, + 'price', + po.url, + po.source_vendor_id, + jsonb_build_object( + 'price', po.price, + 'currency', po.currency, + 'observed_at', po.time + ), + md5(jsonb_build_object( + 'type', 'price', + 'price', po.price, + 'currency', po.currency, + 'url', COALESCE(po.url, '') + )::text), + 'migration:103:price-backfill', + 1.0 +FROM transceivers t +JOIN price_observations po ON po.transceiver_id = t.id +WHERE t.price_verified = true + AND COALESCE(po.is_verified, true) = true +ORDER BY t.id, po.time DESC +ON CONFLICT DO NOTHING; + +INSERT INTO transceiver_verification_evidence ( + transceiver_id, + verification_type, + source_url, + evidence_value, + evidence_hash, + robot_name, + confidence +) +SELECT + id, + 'image', + COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')), + jsonb_build_object('image_url', COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, ''))), + md5(jsonb_build_object('type', 'image', 'url', COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')))::text), + 'migration:103:image-backfill', + 1.0 +FROM transceivers +WHERE image_verified = true + AND COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')) IS NOT NULL +ON CONFLICT DO NOTHING; + +INSERT INTO transceiver_verification_evidence ( + transceiver_id, + verification_type, + source_url, + evidence_value, + evidence_hash, + robot_name, + confidence +) +SELECT + id, + 'details', + COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')), + jsonb_build_object( + 'form_factor', form_factor, + 'speed_gbps', speed_gbps, + 'reach_label', reach_label, + 'fiber_type', fiber_type + ), + md5(jsonb_build_object( + 'type', 'details', + 'source_url', COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')), + 'form_factor', form_factor, + 'speed_gbps', speed_gbps, + 'reach_label', reach_label, + 'fiber_type', fiber_type + )::text), + 'migration:103:details-backfill', + 1.0 +FROM transceivers +WHERE details_verified = true + AND COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')) IS NOT NULL +ON CONFLICT DO NOTHING; + +INSERT INTO transceiver_verification_evidence ( + transceiver_id, + verification_type, + evidence_value, + evidence_hash, + robot_name, + confidence +) +SELECT DISTINCT ON (eq.flexoptix_id) + eq.flexoptix_id, + 'competitor_match', + jsonb_build_object( + 'equivalence_id', eq.id, + 'competitor_id', eq.competitor_id, + 'status', eq.status, + 'match_basis', eq.match_basis, + 'match_notes', eq.match_notes + ), + md5(jsonb_build_object( + 'type', 'competitor_match', + 'equivalence_id', eq.id, + 'competitor_id', eq.competitor_id, + 'status', eq.status + )::text), + 'migration:103:competitor-match-backfill', + eq.confidence +FROM transceiver_equivalences eq +WHERE eq.status IN ('approved', 'auto_approved') +ORDER BY eq.flexoptix_id, eq.confidence DESC, eq.updated_at DESC +ON CONFLICT DO NOTHING; + diff --git a/sync/CURRENT.md b/sync/CURRENT.md index 670a3d6..48994ef 100644 --- a/sync/CURRENT.md +++ b/sync/CURRENT.md @@ -1,9 +1,53 @@ # Current TIP Sync State -Updated: 2026-05-09 20:12 UTC +Updated: 2026-05-09 21:00 UTC ## Newest Work +- TIP verification truth model on 2026-05-09: + - implemented migration `sql/103-verification-evidence-and-competitor-status.sql` + - adds `transceivers.competitor_status` + - `matched` + - `no_valid_match` + - `needs_research` + - `ambiguous` + - `unknown` + - adds `no_match_verified_at` and `no_match_reason` + - creates append-only `transceiver_verification_evidence` + - code changes: + - scraper DB helper now records evidence for price/image/details decisions + - artifact quarantine records `artifact_quarantine` evidence + - matcher writes `competitor_match` evidence for auto-approved matches + - matcher sets product status to `matched`, `ambiguous`, or `needs_research` + - Review API adds protected `POST /api/review/transceivers/:id/no-valid-match` + - Review stats now include product-level competitor status counts + - Health API now exposes active-product competitor status counts + - live migration/backfill: + - applied on Erik successfully + - status distribution after migration: + - `matched=11198` + - `needs_research=6575` + - Evidence ledger seeded from current data: + - `price=10633` + - `image=12189` + - `details=16782` + - `competitor_match=316` + - live API checks: + - `/api/health` healthy + - active health competitor status: + - `matched=11158` + - `needs_research=6256` + - `no_valid_match=0` + - `ambiguous=0` + - protected review stats with Dashboard token returned product status counts correctly + - operational note: + - `tip-api` restarted successfully + - `tip-scraper-daemon` was not restarted because `scrape:pricing:naddod` and `scrape:pricing:qsfptek` were active + - scheduler code is synced to `/opt/tip`; restart daemon after those jobs complete to load new matcher/reconcile logic + - TIPLLM training pool: + - appended lessons for competitor state machine and evidence ledger + - JSONL validated locally + - MAGATAMA MagatamaLLM RunPod training and adoption closure on 2026-05-09: - operator requirement: - RunPod success only counts after artifact exists, local Ollama import works, smoke tests pass, aliases/version switch, remote registry is updated, and live MAGATAMA reports no stale active run diff --git a/sync/history/2026-05-09-tip-verification-truth-model-and-evidence-ledger.md b/sync/history/2026-05-09-tip-verification-truth-model-and-evidence-ledger.md new file mode 100644 index 0000000..898dbb9 --- /dev/null +++ b/sync/history/2026-05-09-tip-verification-truth-model-and-evidence-ledger.md @@ -0,0 +1,87 @@ +# TIP Verification Truth Model And Evidence Ledger — 2026-05-09 + +## Scope + +- Make TIP verification explainable instead of simply green/red. +- Preserve the difference between: + - a true 1:1 competitor match + - verified absence of a valid public match + - ambiguous candidates + - unresolved research +- Start an append-only evidence ledger for source-backed verification decisions. + +## Implemented + +- Added migration `sql/103-verification-evidence-and-competitor-status.sql`. +- Added product-level competitor status: + - `matched` + - `no_valid_match` + - `needs_research` + - `ambiguous` + - `unknown` +- Added no-match fields: + - `no_match_verified_at` + - `no_match_reason` +- Added `transceiver_verification_evidence`. + - `price` + - `image` + - `details` + - `competitor_match` + - `competitor_no_match` + - `artifact_quarantine` +- Scraper DB helper now records evidence for newly verified price/image/details. +- Quarantine robot now records artifact evidence. +- Matcher records competitor-match evidence for auto-approved matches. +- Review API now supports protected no-valid-match marking: + - `POST /api/review/transceivers/:id/no-valid-match` +- Health API exposes active competitor-status counts. +- Review stats expose global product competitor-status counts. + +## Live Deployment + +- Scraper build on Erik: passed. +- API build on Erik: passed after TypeScript route param cast. +- Migration applied successfully. +- `tip-api` restarted and healthy. +- `tip-scraper-daemon` was not restarted because `scrape:pricing:naddod` and `scrape:pricing:qsfptek` were active. + +## Live Backfill + +- Product status after migration: + - `matched=11198` + - `needs_research=6575` +- Evidence ledger seeded: + - `price=10633` + - `image=12189` + - `details=16782` + - `competitor_match=316` + +## Live API Check + +- `/api/health`: healthy. +- Active-product competitor status: + - `matched=11158` + - `needs_research=6256` + - `no_valid_match=0` + - `ambiguous=0` +- Protected Review stats with Dashboard token returned: + - `matched=11198` + - `needs_research_products=6575` + - `no_valid_match=0` + - `ambiguous=0` + +## Operational Next Step + +- When `scrape:pricing:naddod` and `scrape:pricing:qsfptek` finish, restart `tip-scraper-daemon` to load the new scheduler matcher/reconcile logic. +- Then run: + - reconcile + - matcher + - Review no-valid-match workflow for products that truly have no valid public 1:1 competitor. + +## Training Pool + +- Added TIPLLM lessons for: + - competitor state machine + - evidence ledger semantics +- `training-data/tip-llm-capabilities-v1.jsonl` validated successfully. + diff --git a/training-data/tip-llm-capabilities-v1.jsonl b/training-data/tip-llm-capabilities-v1.jsonl index 28f8c33..56e2520 100644 --- a/training-data/tip-llm-capabilities-v1.jsonl +++ b/training-data/tip-llm-capabilities-v1.jsonl @@ -36,3 +36,5 @@ {"id":"tip-20260509-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Protect TIP data quality by distinguishing real transceiver product pages from category, support, cable, accessory, switch, and redirect artifacts."},{"role":"user","content":"How should TIP classify rows scraped from FS.com category URLs, Flexoptix supported-vendor pages, Flexoptix DAC/AOC or patch-cable pages, Arista switch pages, ShopFiber24 category labels, or Coherent direct-attach/dynamic-component pages?"},{"role":"assistant","content":"Classify them outside the active transceiver verification base. Set category to NonTransceiver or another excluded class and clear price/image/details/competitor/fully verified flags. Examples include fs.com /c/ category URLs, flexoptix.net supported-vendors, universal-dac-aoc, optical-patch-cables, universal-transceiver-our-voodoo, redirect URLs, arista.com products/solutions switch pages, ShopFiber24 labels like FO-TRANSCEIVER or Transceivermodule, and Coherent direct-attach-cables or dynamic-components pages. These are not 1:1 comparable transceiver SKUs."}]} {"id":"tip-20260509-erik-safe-crawler-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design crawler operations that protect Erik and prefer deterministic source extraction over blind crawling."},{"role":"user","content":"What is the safe operating pattern for TIP crawlers on Erik after the 2026-05-09 verification run?"},{"role":"assistant","content":"Use targeted robots, not broad waves. Run one focused verifier at a time, wait for pg-boss jobs to complete, and check pm2/API health between runs. Prefer lightweight fetch+cheerio detail verifiers for known product URLs. Use Playwright/Crawlee only when the source requires a browser, with concurrency 1 and short batches. Do not start additional jobs while scrape:pricing:flexoptix, discover:vendor:cisco-tmg, or maintenance:find-equivalences are active. Always reconcile and rerun the matcher after quarantine or source updates."}]} {"id":"tip-20260509-flex-fs-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Report verification status precisely and separate product-data completeness from competitor-match completeness."},{"role":"user","content":"After the 2026-05-09 cleanup, what is the correct interpretation of Flexoptix and FS.com verification status?"},{"role":"assistant","content":"For active Flexoptix and FS.com products, price, image, and details are complete after removing category, redirect, cable, support, and non-FS artifacts from the active base. Remaining not-fully-verified rows are due to competitor_verified=false, not missing product price/image/details. Do not say the whole vendor is 100% fully verified until 1:1 competitor equivalence exists or the absence of a valid competitor is explicitly represented as a verified no-match state."}]} +{"id":"tip-20260509-competitor-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Model verification as an evidence-backed state machine, not a single green checkbox."},{"role":"user","content":"How should TIP represent competitor verification when no valid 1:1 competitor product exists?"},{"role":"assistant","content":"Use a distinct competitor_status value. `matched` means a source-backed 1:1 competitor exists. `no_valid_match` means research verified that no valid public 1:1 competitor is available; this can still resolve the competitor requirement without inventing a match. `ambiguous` means candidates exist but are technically risky or below approval threshold. `needs_research` means the robot has not resolved the state yet. Keep the reason and timestamp in no_match_reason/no_match_verified_at and write a `competitor_no_match` evidence record."}]} +{"id":"tip-20260509-evidence-ledger-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Every verification decision should be auditable."},{"role":"user","content":"What should TIP write when a robot marks price, image, details, competitor match, no-match, or artifact quarantine as verified?"},{"role":"assistant","content":"Write an append-only row into transceiver_verification_evidence. Required fields: transceiver_id, verification_type, source_url when available, source_vendor_id when available, evidence_value JSON, evidence_hash, robot_name, confidence, and created_at. For artifact quarantine, record the rule reason and clear verification flags. For price/image/details, store the exact source-backed value. For competitor match/no-match, store the match basis or no-match reason. This lets TIP explain every green badge and every deliberate non-green state."}]}