feat: add verification evidence state model

This commit is contained in:
Rene Fichtmueller 2026-05-09 23:06:21 +02:00
parent de2943ea79
commit 650de6ba9a
9 changed files with 614 additions and 10 deletions

View File

@ -20,6 +20,10 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
COUNT(*) FILTER (WHERE image_verified) AS image_verified,
COUNT(*) FILTER (WHERE details_verified) AS details_verified,
COUNT(*) FILTER (WHERE fully_verified) AS fully_verified,
COUNT(*) FILTER (WHERE competitor_status = 'matched') AS competitor_matched,
COUNT(*) FILTER (WHERE competitor_status = 'no_valid_match') AS competitor_no_valid_match,
COUNT(*) FILTER (WHERE competitor_status = 'ambiguous') AS competitor_ambiguous,
COUNT(*) FILTER (WHERE competitor_status = 'needs_research') AS competitor_needs_research,
COUNT(*) AS total
FROM transceivers
WHERE COALESCE(data_confidence, 'unknown') != 'garbage'
@ -100,6 +104,12 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
image_verified: Number(v.image_verified || 0),
details_verified: Number(v.details_verified || 0),
fully_verified: Number(v.fully_verified || 0),
competitor_status: {
matched: Number(v.competitor_matched || 0),
no_valid_match: Number(v.competitor_no_valid_match || 0),
ambiguous: Number(v.competitor_ambiguous || 0),
needs_research: Number(v.competitor_needs_research || 0),
},
total: Number(v.total || 0),
price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0,
fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0,

View File

@ -10,6 +10,7 @@
* POST /api/review/run-research trigger equivalence research job immediately
*/
import { Router, Request, Response } from "express";
import { createHash } from "crypto";
import { pool } from "../db/client";
/** Promote to fully_verified if all 4 flags are set — shared logic */
@ -39,6 +40,34 @@ async function queueMaintenanceJob(name: string): Promise<void> {
);
}
function hashEvidence(value: Record<string, unknown>): string {
return createHash("sha256").update(JSON.stringify(value, Object.keys(value).sort())).digest("hex").slice(0, 16);
}
async function recordVerificationEvidence(params: {
transceiverId: string;
verificationType: "competitor_match" | "competitor_no_match";
evidenceValue: Record<string, unknown>;
robotName: string;
confidence?: number;
}): Promise<void> {
const evidenceHash = hashEvidence(params.evidenceValue);
await pool.query(`
INSERT INTO transceiver_verification_evidence (
transceiver_id, verification_type, evidence_value, evidence_hash, robot_name, confidence
)
VALUES ($1, $2, $3::jsonb, $4, $5, $6)
ON CONFLICT DO NOTHING
`, [
params.transceiverId,
params.verificationType,
JSON.stringify(params.evidenceValue),
evidenceHash,
params.robotName,
params.confidence ?? null,
]);
}
export const reviewRouter = Router();
// ── GET /api/review/equivalences ──────────────────────────────────────────────
@ -161,6 +190,15 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) =>
COUNT(*) AS total
FROM transceiver_equivalences
`);
const productStatus = await pool.query(`
SELECT
SUM(CASE WHEN competitor_status = 'matched' THEN 1 ELSE 0 END) AS matched,
SUM(CASE WHEN competitor_status = 'no_valid_match' THEN 1 ELSE 0 END) AS no_valid_match,
SUM(CASE WHEN competitor_status = 'ambiguous' THEN 1 ELSE 0 END) AS ambiguous,
SUM(CASE WHEN competitor_status = 'needs_research' THEN 1 ELSE 0 END) AS needs_research_products,
SUM(CASE WHEN competitor_status = 'unknown' THEN 1 ELSE 0 END) AS unknown
FROM transceivers
`);
const row = result.rows[0];
res.json({
@ -172,6 +210,13 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) =>
rejected: parseInt(row.rejected, 10) || 0,
needs_research: parseInt(row.needs_research, 10) || 0,
total: parseInt(row.total, 10) || 0,
product_status: {
matched: parseInt(productStatus.rows[0].matched, 10) || 0,
no_valid_match: parseInt(productStatus.rows[0].no_valid_match, 10) || 0,
ambiguous: parseInt(productStatus.rows[0].ambiguous, 10) || 0,
needs_research_products: parseInt(productStatus.rows[0].needs_research_products, 10) || 0,
unknown: parseInt(productStatus.rows[0].unknown, 10) || 0,
},
},
});
});
@ -204,9 +249,18 @@ reviewRouter.post("/equivalences/:id/approve", async (req: Request, res: Respons
await pool.query(`
UPDATE transceivers
SET competitor_verified = true,
competitor_verified_at = NOW()
competitor_verified_at = NOW(),
competitor_status = 'matched',
competitor_status_updated_at = NOW()
WHERE id = $1
`, [flexoptix_id]);
await recordVerificationEvidence({
transceiverId: flexoptix_id,
verificationType: "competitor_match",
evidenceValue: { equivalence_id: id, reviewer },
robotName: "review:approve",
confidence: Number(eq.rows[0].confidence ?? 1),
});
// Promote to fully_verified if all 4 flags are now set
const fullyVerifiedEarned = await checkAndSetFullyVerified(flexoptix_id);
@ -240,6 +294,51 @@ reviewRouter.post("/equivalences/:id/reject", async (req: Request, res: Response
res.json({ success: true });
});
// ── POST /api/review/transceivers/:id/no-valid-match ─────────────────────────
// Mark a product as competitor-resolved because research found no valid 1:1 match.
// This is not a fake competitor match; the status and evidence explain the truth.
reviewRouter.post("/transceivers/:id/no-valid-match", async (req: Request, res: Response) => {
const id = String(req.params.id);
const { reason, reviewer } = req.body as { reason?: string; reviewer?: string };
if (!reason || reason.trim().length < 10) {
res.status(400).json({ success: false, error: "reason with at least 10 characters required" });
return;
}
const result = await pool.query(`
UPDATE transceivers
SET competitor_verified = true,
competitor_verified_at = NOW(),
competitor_status = 'no_valid_match',
competitor_status_updated_at = NOW(),
no_match_verified_at = NOW(),
no_match_reason = $2,
updated_at = NOW()
WHERE id = $1
RETURNING id
`, [id, reason.trim()]);
if (!result.rowCount) {
res.status(404).json({ success: false, error: "Not found" });
return;
}
await recordVerificationEvidence({
transceiverId: id,
verificationType: "competitor_no_match",
evidenceValue: {
reason: reason.trim(),
reviewer: reviewer || "manual",
},
robotName: "review:no-valid-match",
confidence: 1,
});
const fullyVerifiedEarned = await checkAndSetFullyVerified(id);
res.json({ success: true, fully_verified_earned: fullyVerifiedEarned });
});
// ── PATCH /api/review/equivalences/:id ────────────────────────────────────────
reviewRouter.patch("/equivalences/:id", async (req: Request, res: Response) => {
const { id } = req.params;
@ -310,7 +409,10 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons
await pool.query(`
UPDATE transceivers
SET competitor_verified = true, competitor_verified_at = NOW()
SET competitor_verified = true,
competitor_verified_at = NOW(),
competitor_status = 'matched',
competitor_status_updated_at = NOW()
WHERE id = $1 AND competitor_verified = false
`, [row.flexoptix_id]);
@ -361,7 +463,10 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon
await pool.query(`
UPDATE transceivers
SET competitor_verified = true, competitor_verified_at = NOW()
SET competitor_verified = true,
competitor_verified_at = NOW(),
competitor_status = 'matched',
competitor_status_updated_at = NOW()
WHERE id = $1 AND competitor_verified = false
`, [row.flexoptix_id]);

View File

@ -2649,8 +2649,11 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
const resetComp = await pool.query(`
UPDATE transceivers t
SET competitor_verified = false,
competitor_verified_at = NULL
competitor_verified_at = NULL,
competitor_status = 'needs_research',
competitor_status_updated_at = NOW()
WHERE competitor_verified = true
AND COALESCE(competitor_status, 'matched') != 'no_valid_match'
AND NOT EXISTS (
SELECT 1 FROM price_observations po
JOIN vendors v ON po.source_vendor_id = v.id
@ -2719,6 +2722,8 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
let skipped = 0;
for (const fx of flexResult.rows) {
let fxMatched = false;
let fxQueued = false;
// Find competitor transceivers with recent price observations and matching specs
const candidates = await pool.query(`
SELECT t.id AS competitor_id, t.part_number, t.standard_name,
@ -2824,14 +2829,62 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await pool.query(`
UPDATE transceivers
SET competitor_verified = true,
competitor_verified_at = NOW()
competitor_verified_at = NOW(),
competitor_status = 'matched',
competitor_status_updated_at = NOW()
WHERE id = $1 AND competitor_verified = false
`, [fx.id]);
await pool.query(`
INSERT INTO transceiver_verification_evidence (
transceiver_id, verification_type, source_url, source_vendor_id,
evidence_value, evidence_hash, robot_name, confidence
)
VALUES (
$1, 'competitor_match', NULL, NULL,
$2::jsonb,
md5($2::text),
'maintenance:find-equivalences',
$3
)
ON CONFLICT DO NOTHING
`, [
fx.id,
JSON.stringify({
competitor_id: cand.competitor_id,
competitor_part_number: cand.part_number,
competitor_vendor: cand.vendor_name,
match_basis: basis,
notes,
}),
confidence,
]);
autoApproved++;
fxMatched = true;
} else {
queued++;
fxQueued = true;
}
}
if (!fxMatched && fxQueued) {
await pool.query(`
UPDATE transceivers
SET competitor_status = 'ambiguous',
competitor_status_updated_at = NOW()
WHERE id = $1
AND competitor_verified = false
AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match')
`, [fx.id]);
} else if (!fxMatched && !fxQueued) {
await pool.query(`
UPDATE transceivers
SET competitor_status = 'needs_research',
competitor_status_updated_at = NOW()
WHERE id = $1
AND competitor_verified = false
AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match')
`, [fx.id]);
}
}
console.log(

View File

@ -1,6 +1,7 @@
import { Pool } from "pg";
import { config } from "dotenv";
import { join } from "path";
import { contentHash } from "./hash";
config({ path: join(__dirname, "..", "..", "..", "..", ".env") });
@ -18,6 +19,43 @@ export const pool = new Pool({
// Alias — some scrapers import { db } instead of { pool }
export const db = pool;
export async function recordVerificationEvidence(params: {
transceiverId: string;
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "artifact_quarantine";
sourceUrl?: string;
sourceVendorId?: string;
evidenceValue?: Record<string, unknown>;
robotName: string;
confidence?: number;
}): Promise<void> {
const evidenceValue = params.evidenceValue || {};
const evidenceHash = contentHash({
type: params.verificationType,
sourceUrl: params.sourceUrl || "",
sourceVendorId: params.sourceVendorId || "",
evidenceValue,
});
await pool.query(
`INSERT INTO transceiver_verification_evidence (
transceiver_id, verification_type, source_url, source_vendor_id,
evidence_value, evidence_hash, robot_name, confidence
)
VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7, $8)
ON CONFLICT DO NOTHING`,
[
params.transceiverId,
params.verificationType,
params.sourceUrl || null,
params.sourceVendorId || null,
JSON.stringify(evidenceValue),
evidenceHash,
params.robotName,
params.confidence ?? null,
]
);
}
/**
* After any verified flag is set, check if all 4 criteria are met and promote
* the transceiver to fully_verified. Call this wherever price/image/details/
@ -66,6 +104,14 @@ export async function markImageVerified(
[transceiverId, imageUrl]
);
await checkAndSetFullyVerified(transceiverId);
await recordVerificationEvidence({
transceiverId,
verificationType: "image",
sourceUrl: imageUrl,
evidenceValue: { imageUrl },
robotName: "markImageVerified",
confidence: 1,
});
return (result.rowCount ?? 0) > 0;
}
@ -99,6 +145,16 @@ export async function markDetailsVerified(params: {
[params.transceiverId, params.sourceUrl || null]
);
await checkAndSetFullyVerified(params.transceiverId);
if ((result.rowCount ?? 0) > 0) {
await recordVerificationEvidence({
transceiverId: params.transceiverId,
verificationType: "details",
sourceUrl: params.sourceUrl,
evidenceValue: { sourceUrl: params.sourceUrl || null },
robotName: "markDetailsVerified",
confidence: 1,
});
}
return (result.rowCount ?? 0) > 0;
}
@ -198,11 +254,20 @@ export async function upsertPriceObservation(params: {
`UPDATE transceivers SET
price_verified = true,
price_verified_at = NOW()
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW()" : ""}
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW(), competitor_status = 'matched', competitor_status_updated_at = NOW()" : ""}
WHERE id = $1`,
[params.transceiverId]
);
await checkAndSetFullyVerified(params.transceiverId);
await recordVerificationEvidence({
transceiverId: params.transceiverId,
verificationType: "price",
sourceUrl: params.url,
sourceVendorId: params.sourceVendorId,
evidenceValue: { price: params.price, currency: params.currency, stockLevel: params.stockLevel },
robotName: "upsertPriceObservation",
confidence: 1,
});
return false; // No change
}
@ -232,7 +297,9 @@ export async function upsertPriceObservation(params: {
price_verified = true,
price_verified_at = NOW(),
competitor_verified = true,
competitor_verified_at = NOW()
competitor_verified_at = NOW(),
competitor_status = 'matched',
competitor_status_updated_at = NOW()
WHERE id = $1`,
[params.transceiverId]
);
@ -246,6 +313,15 @@ export async function upsertPriceObservation(params: {
);
}
await checkAndSetFullyVerified(params.transceiverId);
await recordVerificationEvidence({
transceiverId: params.transceiverId,
verificationType: "price",
sourceUrl: params.url,
sourceVendorId: params.sourceVendorId,
evidenceValue: { price: params.price, currency: params.currency, stockLevel: params.stockLevel },
robotName: "upsertPriceObservation",
confidence: 1,
});
return true; // New observation written
}

View File

@ -4,7 +4,7 @@
* Moves obvious accessories, switches, instruments, radio products and optical
* transport systems out of the active transceiver verification base.
*/
import { pool } from "./db";
import { pool, recordVerificationEvidence } from "./db";
import { logger } from "./logger";
async function quarantine(): Promise<void> {
@ -80,9 +80,24 @@ async function quarantine(): Promise<void> {
)
)
AND COALESCE(t.category, '') != 'NonTransceiver'
RETURNING t.id
RETURNING t.id, t.part_number, v.name AS vendor_name, t.product_page_url
`);
for (const row of result.rows) {
await recordVerificationEvidence({
transceiverId: row.id,
verificationType: "artifact_quarantine",
sourceUrl: row.product_page_url || undefined,
evidenceValue: {
partNumber: row.part_number,
vendor: row.vendor_name,
reason: "matched deterministic non-transceiver artifact rule",
},
robotName: "verify:quarantine:non-transceivers",
confidence: 1,
});
}
logger.info("Non-transceiver quarantine complete", {
quarantined: result.rowCount ?? 0,
});

View File

@ -0,0 +1,212 @@
-- Migration 103: Verification evidence ledger and competitor status semantics
--
-- Goal:
-- fully_verified should mean "source-backed and resolved", not merely
-- "a competitor row was found". A product may be fully resolved when a
-- valid 1:1 competitor exists OR when research verified that no valid
-- public 1:1 competitor is available.
ALTER TABLE transceivers
ADD COLUMN IF NOT EXISTS competitor_status VARCHAR(32) NOT NULL DEFAULT 'unknown',
ADD COLUMN IF NOT EXISTS competitor_status_updated_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS no_match_verified_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS no_match_reason TEXT;
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_constraint
WHERE conname = 'transceivers_competitor_status_check'
) THEN
ALTER TABLE transceivers
ADD CONSTRAINT transceivers_competitor_status_check
CHECK (competitor_status IN (
'unknown',
'matched',
'no_valid_match',
'needs_research',
'ambiguous'
));
END IF;
END $$;
UPDATE transceivers
SET competitor_status = CASE
WHEN competitor_verified = true THEN 'matched'
WHEN competitor_status = 'unknown' THEN 'needs_research'
ELSE competitor_status
END,
competitor_status_updated_at = COALESCE(competitor_status_updated_at, NOW())
WHERE competitor_status IS NULL
OR competitor_status = 'unknown'
OR competitor_verified = true;
CREATE INDEX IF NOT EXISTS idx_transceivers_competitor_status
ON transceivers (competitor_status);
CREATE INDEX IF NOT EXISTS idx_transceivers_no_valid_match
ON transceivers (no_match_verified_at)
WHERE competitor_status = 'no_valid_match';
CREATE TABLE IF NOT EXISTS transceiver_verification_evidence (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
transceiver_id UUID NOT NULL REFERENCES transceivers(id) ON DELETE CASCADE,
verification_type VARCHAR(40) NOT NULL CHECK (verification_type IN (
'price',
'image',
'details',
'competitor_match',
'competitor_no_match',
'artifact_quarantine'
)),
source_url TEXT,
source_vendor_id UUID REFERENCES vendors(id) ON DELETE SET NULL,
evidence_value JSONB NOT NULL DEFAULT '{}'::jsonb,
evidence_hash TEXT,
robot_name TEXT NOT NULL DEFAULT 'unknown',
confidence NUMERIC(4,3) CHECK (confidence IS NULL OR confidence BETWEEN 0 AND 1),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_verification_evidence_tx
ON transceiver_verification_evidence (transceiver_id, verification_type, created_at DESC);
CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedupe
ON transceiver_verification_evidence (
transceiver_id,
verification_type,
COALESCE(evidence_hash, ''),
robot_name
);
COMMENT ON COLUMN transceivers.competitor_status IS
'Resolution state for competitor evidence: matched, no_valid_match, needs_research, ambiguous, unknown.';
COMMENT ON TABLE transceiver_verification_evidence IS
'Append-only evidence ledger for TIP verification decisions. Stores source-backed proof for price, image, details, competitor matches and verified no-match states.';
-- Seed the ledger from already verified rows so TIP starts with an auditable
-- baseline instead of an empty proof table.
INSERT INTO transceiver_verification_evidence (
transceiver_id,
verification_type,
source_url,
source_vendor_id,
evidence_value,
evidence_hash,
robot_name,
confidence
)
SELECT DISTINCT ON (t.id)
t.id,
'price',
po.url,
po.source_vendor_id,
jsonb_build_object(
'price', po.price,
'currency', po.currency,
'observed_at', po.time
),
md5(jsonb_build_object(
'type', 'price',
'price', po.price,
'currency', po.currency,
'url', COALESCE(po.url, '')
)::text),
'migration:103:price-backfill',
1.0
FROM transceivers t
JOIN price_observations po ON po.transceiver_id = t.id
WHERE t.price_verified = true
AND COALESCE(po.is_verified, true) = true
ORDER BY t.id, po.time DESC
ON CONFLICT DO NOTHING;
INSERT INTO transceiver_verification_evidence (
transceiver_id,
verification_type,
source_url,
evidence_value,
evidence_hash,
robot_name,
confidence
)
SELECT
id,
'image',
COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')),
jsonb_build_object('image_url', COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, ''))),
md5(jsonb_build_object('type', 'image', 'url', COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')))::text),
'migration:103:image-backfill',
1.0
FROM transceivers
WHERE image_verified = true
AND COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')) IS NOT NULL
ON CONFLICT DO NOTHING;
INSERT INTO transceiver_verification_evidence (
transceiver_id,
verification_type,
source_url,
evidence_value,
evidence_hash,
robot_name,
confidence
)
SELECT
id,
'details',
COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')),
jsonb_build_object(
'form_factor', form_factor,
'speed_gbps', speed_gbps,
'reach_label', reach_label,
'fiber_type', fiber_type
),
md5(jsonb_build_object(
'type', 'details',
'source_url', COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')),
'form_factor', form_factor,
'speed_gbps', speed_gbps,
'reach_label', reach_label,
'fiber_type', fiber_type
)::text),
'migration:103:details-backfill',
1.0
FROM transceivers
WHERE details_verified = true
AND COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')) IS NOT NULL
ON CONFLICT DO NOTHING;
INSERT INTO transceiver_verification_evidence (
transceiver_id,
verification_type,
evidence_value,
evidence_hash,
robot_name,
confidence
)
SELECT DISTINCT ON (eq.flexoptix_id)
eq.flexoptix_id,
'competitor_match',
jsonb_build_object(
'equivalence_id', eq.id,
'competitor_id', eq.competitor_id,
'status', eq.status,
'match_basis', eq.match_basis,
'match_notes', eq.match_notes
),
md5(jsonb_build_object(
'type', 'competitor_match',
'equivalence_id', eq.id,
'competitor_id', eq.competitor_id,
'status', eq.status
)::text),
'migration:103:competitor-match-backfill',
eq.confidence
FROM transceiver_equivalences eq
WHERE eq.status IN ('approved', 'auto_approved')
ORDER BY eq.flexoptix_id, eq.confidence DESC, eq.updated_at DESC
ON CONFLICT DO NOTHING;

View File

@ -1,9 +1,53 @@
# Current TIP Sync State
Updated: 2026-05-09 20:12 UTC
Updated: 2026-05-09 21:00 UTC
## Newest Work
- TIP verification truth model on 2026-05-09:
- implemented migration `sql/103-verification-evidence-and-competitor-status.sql`
- adds `transceivers.competitor_status`
- `matched`
- `no_valid_match`
- `needs_research`
- `ambiguous`
- `unknown`
- adds `no_match_verified_at` and `no_match_reason`
- creates append-only `transceiver_verification_evidence`
- code changes:
- scraper DB helper now records evidence for price/image/details decisions
- artifact quarantine records `artifact_quarantine` evidence
- matcher writes `competitor_match` evidence for auto-approved matches
- matcher sets product status to `matched`, `ambiguous`, or `needs_research`
- Review API adds protected `POST /api/review/transceivers/:id/no-valid-match`
- Review stats now include product-level competitor status counts
- Health API now exposes active-product competitor status counts
- live migration/backfill:
- applied on Erik successfully
- status distribution after migration:
- `matched=11198`
- `needs_research=6575`
- Evidence ledger seeded from current data:
- `price=10633`
- `image=12189`
- `details=16782`
- `competitor_match=316`
- live API checks:
- `/api/health` healthy
- active health competitor status:
- `matched=11158`
- `needs_research=6256`
- `no_valid_match=0`
- `ambiguous=0`
- protected review stats with Dashboard token returned product status counts correctly
- operational note:
- `tip-api` restarted successfully
- `tip-scraper-daemon` was not restarted because `scrape:pricing:naddod` and `scrape:pricing:qsfptek` were active
- scheduler code is synced to `/opt/tip`; restart daemon after those jobs complete to load new matcher/reconcile logic
- TIPLLM training pool:
- appended lessons for competitor state machine and evidence ledger
- JSONL validated locally
- MAGATAMA MagatamaLLM RunPod training and adoption closure on 2026-05-09:
- operator requirement:
- RunPod success only counts after artifact exists, local Ollama import works, smoke tests pass, aliases/version switch, remote registry is updated, and live MAGATAMA reports no stale active run

View File

@ -0,0 +1,87 @@
# TIP Verification Truth Model And Evidence Ledger — 2026-05-09
## Scope
- Make TIP verification explainable instead of simply green/red.
- Preserve the difference between:
- a true 1:1 competitor match
- verified absence of a valid public match
- ambiguous candidates
- unresolved research
- Start an append-only evidence ledger for source-backed verification decisions.
## Implemented
- Added migration `sql/103-verification-evidence-and-competitor-status.sql`.
- Added product-level competitor status:
- `matched`
- `no_valid_match`
- `needs_research`
- `ambiguous`
- `unknown`
- Added no-match fields:
- `no_match_verified_at`
- `no_match_reason`
- Added `transceiver_verification_evidence`.
- `price`
- `image`
- `details`
- `competitor_match`
- `competitor_no_match`
- `artifact_quarantine`
- Scraper DB helper now records evidence for newly verified price/image/details.
- Quarantine robot now records artifact evidence.
- Matcher records competitor-match evidence for auto-approved matches.
- Review API now supports protected no-valid-match marking:
- `POST /api/review/transceivers/:id/no-valid-match`
- Health API exposes active competitor-status counts.
- Review stats expose global product competitor-status counts.
## Live Deployment
- Scraper build on Erik: passed.
- API build on Erik: passed after TypeScript route param cast.
- Migration applied successfully.
- `tip-api` restarted and healthy.
- `tip-scraper-daemon` was not restarted because `scrape:pricing:naddod` and `scrape:pricing:qsfptek` were active.
## Live Backfill
- Product status after migration:
- `matched=11198`
- `needs_research=6575`
- Evidence ledger seeded:
- `price=10633`
- `image=12189`
- `details=16782`
- `competitor_match=316`
## Live API Check
- `/api/health`: healthy.
- Active-product competitor status:
- `matched=11158`
- `needs_research=6256`
- `no_valid_match=0`
- `ambiguous=0`
- Protected Review stats with Dashboard token returned:
- `matched=11198`
- `needs_research_products=6575`
- `no_valid_match=0`
- `ambiguous=0`
## Operational Next Step
- When `scrape:pricing:naddod` and `scrape:pricing:qsfptek` finish, restart `tip-scraper-daemon` to load the new scheduler matcher/reconcile logic.
- Then run:
- reconcile
- matcher
- Review no-valid-match workflow for products that truly have no valid public 1:1 competitor.
## Training Pool
- Added TIPLLM lessons for:
- competitor state machine
- evidence ledger semantics
- `training-data/tip-llm-capabilities-v1.jsonl` validated successfully.

View File

@ -36,3 +36,5 @@
{"id":"tip-20260509-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Protect TIP data quality by distinguishing real transceiver product pages from category, support, cable, accessory, switch, and redirect artifacts."},{"role":"user","content":"How should TIP classify rows scraped from FS.com category URLs, Flexoptix supported-vendor pages, Flexoptix DAC/AOC or patch-cable pages, Arista switch pages, ShopFiber24 category labels, or Coherent direct-attach/dynamic-component pages?"},{"role":"assistant","content":"Classify them outside the active transceiver verification base. Set category to NonTransceiver or another excluded class and clear price/image/details/competitor/fully verified flags. Examples include fs.com /c/ category URLs, flexoptix.net supported-vendors, universal-dac-aoc, optical-patch-cables, universal-transceiver-our-voodoo, redirect URLs, arista.com products/solutions switch pages, ShopFiber24 labels like FO-TRANSCEIVER or Transceivermodule, and Coherent direct-attach-cables or dynamic-components pages. These are not 1:1 comparable transceiver SKUs."}]}
{"id":"tip-20260509-erik-safe-crawler-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design crawler operations that protect Erik and prefer deterministic source extraction over blind crawling."},{"role":"user","content":"What is the safe operating pattern for TIP crawlers on Erik after the 2026-05-09 verification run?"},{"role":"assistant","content":"Use targeted robots, not broad waves. Run one focused verifier at a time, wait for pg-boss jobs to complete, and check pm2/API health between runs. Prefer lightweight fetch+cheerio detail verifiers for known product URLs. Use Playwright/Crawlee only when the source requires a browser, with concurrency 1 and short batches. Do not start additional jobs while scrape:pricing:flexoptix, discover:vendor:cisco-tmg, or maintenance:find-equivalences are active. Always reconcile and rerun the matcher after quarantine or source updates."}]}
{"id":"tip-20260509-flex-fs-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Report verification status precisely and separate product-data completeness from competitor-match completeness."},{"role":"user","content":"After the 2026-05-09 cleanup, what is the correct interpretation of Flexoptix and FS.com verification status?"},{"role":"assistant","content":"For active Flexoptix and FS.com products, price, image, and details are complete after removing category, redirect, cable, support, and non-FS artifacts from the active base. Remaining not-fully-verified rows are due to competitor_verified=false, not missing product price/image/details. Do not say the whole vendor is 100% fully verified until 1:1 competitor equivalence exists or the absence of a valid competitor is explicitly represented as a verified no-match state."}]}
{"id":"tip-20260509-competitor-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Model verification as an evidence-backed state machine, not a single green checkbox."},{"role":"user","content":"How should TIP represent competitor verification when no valid 1:1 competitor product exists?"},{"role":"assistant","content":"Use a distinct competitor_status value. `matched` means a source-backed 1:1 competitor exists. `no_valid_match` means research verified that no valid public 1:1 competitor is available; this can still resolve the competitor requirement without inventing a match. `ambiguous` means candidates exist but are technically risky or below approval threshold. `needs_research` means the robot has not resolved the state yet. Keep the reason and timestamp in no_match_reason/no_match_verified_at and write a `competitor_no_match` evidence record."}]}
{"id":"tip-20260509-evidence-ledger-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Every verification decision should be auditable."},{"role":"user","content":"What should TIP write when a robot marks price, image, details, competitor match, no-match, or artifact quarantine as verified?"},{"role":"assistant","content":"Write an append-only row into transceiver_verification_evidence. Required fields: transceiver_id, verification_type, source_url when available, source_vendor_id when available, evidence_value JSON, evidence_hash, robot_name, confidence, and created_at. For artifact quarantine, record the rule reason and clear verification flags. For price/image/details, store the exact source-backed value. For competitor match/no-match, store the match basis or no-match reason. This lets TIP explain every green badge and every deliberate non-green state."}]}