feat: add verification evidence state model
This commit is contained in:
parent
de2943ea79
commit
650de6ba9a
@ -20,6 +20,10 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
||||
COUNT(*) FILTER (WHERE image_verified) AS image_verified,
|
||||
COUNT(*) FILTER (WHERE details_verified) AS details_verified,
|
||||
COUNT(*) FILTER (WHERE fully_verified) AS fully_verified,
|
||||
COUNT(*) FILTER (WHERE competitor_status = 'matched') AS competitor_matched,
|
||||
COUNT(*) FILTER (WHERE competitor_status = 'no_valid_match') AS competitor_no_valid_match,
|
||||
COUNT(*) FILTER (WHERE competitor_status = 'ambiguous') AS competitor_ambiguous,
|
||||
COUNT(*) FILTER (WHERE competitor_status = 'needs_research') AS competitor_needs_research,
|
||||
COUNT(*) AS total
|
||||
FROM transceivers
|
||||
WHERE COALESCE(data_confidence, 'unknown') != 'garbage'
|
||||
@ -100,6 +104,12 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
||||
image_verified: Number(v.image_verified || 0),
|
||||
details_verified: Number(v.details_verified || 0),
|
||||
fully_verified: Number(v.fully_verified || 0),
|
||||
competitor_status: {
|
||||
matched: Number(v.competitor_matched || 0),
|
||||
no_valid_match: Number(v.competitor_no_valid_match || 0),
|
||||
ambiguous: Number(v.competitor_ambiguous || 0),
|
||||
needs_research: Number(v.competitor_needs_research || 0),
|
||||
},
|
||||
total: Number(v.total || 0),
|
||||
price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0,
|
||||
fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0,
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
* POST /api/review/run-research — trigger equivalence research job immediately
|
||||
*/
|
||||
import { Router, Request, Response } from "express";
|
||||
import { createHash } from "crypto";
|
||||
import { pool } from "../db/client";
|
||||
|
||||
/** Promote to fully_verified if all 4 flags are set — shared logic */
|
||||
@ -39,6 +40,34 @@ async function queueMaintenanceJob(name: string): Promise<void> {
|
||||
);
|
||||
}
|
||||
|
||||
function hashEvidence(value: Record<string, unknown>): string {
|
||||
return createHash("sha256").update(JSON.stringify(value, Object.keys(value).sort())).digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
async function recordVerificationEvidence(params: {
|
||||
transceiverId: string;
|
||||
verificationType: "competitor_match" | "competitor_no_match";
|
||||
evidenceValue: Record<string, unknown>;
|
||||
robotName: string;
|
||||
confidence?: number;
|
||||
}): Promise<void> {
|
||||
const evidenceHash = hashEvidence(params.evidenceValue);
|
||||
await pool.query(`
|
||||
INSERT INTO transceiver_verification_evidence (
|
||||
transceiver_id, verification_type, evidence_value, evidence_hash, robot_name, confidence
|
||||
)
|
||||
VALUES ($1, $2, $3::jsonb, $4, $5, $6)
|
||||
ON CONFLICT DO NOTHING
|
||||
`, [
|
||||
params.transceiverId,
|
||||
params.verificationType,
|
||||
JSON.stringify(params.evidenceValue),
|
||||
evidenceHash,
|
||||
params.robotName,
|
||||
params.confidence ?? null,
|
||||
]);
|
||||
}
|
||||
|
||||
export const reviewRouter = Router();
|
||||
|
||||
// ── GET /api/review/equivalences ──────────────────────────────────────────────
|
||||
@ -161,6 +190,15 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) =>
|
||||
COUNT(*) AS total
|
||||
FROM transceiver_equivalences
|
||||
`);
|
||||
const productStatus = await pool.query(`
|
||||
SELECT
|
||||
SUM(CASE WHEN competitor_status = 'matched' THEN 1 ELSE 0 END) AS matched,
|
||||
SUM(CASE WHEN competitor_status = 'no_valid_match' THEN 1 ELSE 0 END) AS no_valid_match,
|
||||
SUM(CASE WHEN competitor_status = 'ambiguous' THEN 1 ELSE 0 END) AS ambiguous,
|
||||
SUM(CASE WHEN competitor_status = 'needs_research' THEN 1 ELSE 0 END) AS needs_research_products,
|
||||
SUM(CASE WHEN competitor_status = 'unknown' THEN 1 ELSE 0 END) AS unknown
|
||||
FROM transceivers
|
||||
`);
|
||||
|
||||
const row = result.rows[0];
|
||||
res.json({
|
||||
@ -172,6 +210,13 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) =>
|
||||
rejected: parseInt(row.rejected, 10) || 0,
|
||||
needs_research: parseInt(row.needs_research, 10) || 0,
|
||||
total: parseInt(row.total, 10) || 0,
|
||||
product_status: {
|
||||
matched: parseInt(productStatus.rows[0].matched, 10) || 0,
|
||||
no_valid_match: parseInt(productStatus.rows[0].no_valid_match, 10) || 0,
|
||||
ambiguous: parseInt(productStatus.rows[0].ambiguous, 10) || 0,
|
||||
needs_research_products: parseInt(productStatus.rows[0].needs_research_products, 10) || 0,
|
||||
unknown: parseInt(productStatus.rows[0].unknown, 10) || 0,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
@ -204,9 +249,18 @@ reviewRouter.post("/equivalences/:id/approve", async (req: Request, res: Respons
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = true,
|
||||
competitor_verified_at = NOW()
|
||||
competitor_verified_at = NOW(),
|
||||
competitor_status = 'matched',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1
|
||||
`, [flexoptix_id]);
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: flexoptix_id,
|
||||
verificationType: "competitor_match",
|
||||
evidenceValue: { equivalence_id: id, reviewer },
|
||||
robotName: "review:approve",
|
||||
confidence: Number(eq.rows[0].confidence ?? 1),
|
||||
});
|
||||
|
||||
// Promote to fully_verified if all 4 flags are now set
|
||||
const fullyVerifiedEarned = await checkAndSetFullyVerified(flexoptix_id);
|
||||
@ -240,6 +294,51 @@ reviewRouter.post("/equivalences/:id/reject", async (req: Request, res: Response
|
||||
res.json({ success: true });
|
||||
});
|
||||
|
||||
// ── POST /api/review/transceivers/:id/no-valid-match ─────────────────────────
|
||||
// Mark a product as competitor-resolved because research found no valid 1:1 match.
|
||||
// This is not a fake competitor match; the status and evidence explain the truth.
|
||||
reviewRouter.post("/transceivers/:id/no-valid-match", async (req: Request, res: Response) => {
|
||||
const id = String(req.params.id);
|
||||
const { reason, reviewer } = req.body as { reason?: string; reviewer?: string };
|
||||
|
||||
if (!reason || reason.trim().length < 10) {
|
||||
res.status(400).json({ success: false, error: "reason with at least 10 characters required" });
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = true,
|
||||
competitor_verified_at = NOW(),
|
||||
competitor_status = 'no_valid_match',
|
||||
competitor_status_updated_at = NOW(),
|
||||
no_match_verified_at = NOW(),
|
||||
no_match_reason = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
RETURNING id
|
||||
`, [id, reason.trim()]);
|
||||
|
||||
if (!result.rowCount) {
|
||||
res.status(404).json({ success: false, error: "Not found" });
|
||||
return;
|
||||
}
|
||||
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: id,
|
||||
verificationType: "competitor_no_match",
|
||||
evidenceValue: {
|
||||
reason: reason.trim(),
|
||||
reviewer: reviewer || "manual",
|
||||
},
|
||||
robotName: "review:no-valid-match",
|
||||
confidence: 1,
|
||||
});
|
||||
|
||||
const fullyVerifiedEarned = await checkAndSetFullyVerified(id);
|
||||
res.json({ success: true, fully_verified_earned: fullyVerifiedEarned });
|
||||
});
|
||||
|
||||
// ── PATCH /api/review/equivalences/:id ────────────────────────────────────────
|
||||
reviewRouter.patch("/equivalences/:id", async (req: Request, res: Response) => {
|
||||
const { id } = req.params;
|
||||
@ -310,7 +409,10 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons
|
||||
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = true, competitor_verified_at = NOW()
|
||||
SET competitor_verified = true,
|
||||
competitor_verified_at = NOW(),
|
||||
competitor_status = 'matched',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1 AND competitor_verified = false
|
||||
`, [row.flexoptix_id]);
|
||||
|
||||
@ -361,7 +463,10 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon
|
||||
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = true, competitor_verified_at = NOW()
|
||||
SET competitor_verified = true,
|
||||
competitor_verified_at = NOW(),
|
||||
competitor_status = 'matched',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1 AND competitor_verified = false
|
||||
`, [row.flexoptix_id]);
|
||||
|
||||
|
||||
@ -2649,8 +2649,11 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
const resetComp = await pool.query(`
|
||||
UPDATE transceivers t
|
||||
SET competitor_verified = false,
|
||||
competitor_verified_at = NULL
|
||||
competitor_verified_at = NULL,
|
||||
competitor_status = 'needs_research',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE competitor_verified = true
|
||||
AND COALESCE(competitor_status, 'matched') != 'no_valid_match'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM price_observations po
|
||||
JOIN vendors v ON po.source_vendor_id = v.id
|
||||
@ -2719,6 +2722,8 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
let skipped = 0;
|
||||
|
||||
for (const fx of flexResult.rows) {
|
||||
let fxMatched = false;
|
||||
let fxQueued = false;
|
||||
// Find competitor transceivers with recent price observations and matching specs
|
||||
const candidates = await pool.query(`
|
||||
SELECT t.id AS competitor_id, t.part_number, t.standard_name,
|
||||
@ -2824,14 +2829,62 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = true,
|
||||
competitor_verified_at = NOW()
|
||||
competitor_verified_at = NOW(),
|
||||
competitor_status = 'matched',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1 AND competitor_verified = false
|
||||
`, [fx.id]);
|
||||
await pool.query(`
|
||||
INSERT INTO transceiver_verification_evidence (
|
||||
transceiver_id, verification_type, source_url, source_vendor_id,
|
||||
evidence_value, evidence_hash, robot_name, confidence
|
||||
)
|
||||
VALUES (
|
||||
$1, 'competitor_match', NULL, NULL,
|
||||
$2::jsonb,
|
||||
md5($2::text),
|
||||
'maintenance:find-equivalences',
|
||||
$3
|
||||
)
|
||||
ON CONFLICT DO NOTHING
|
||||
`, [
|
||||
fx.id,
|
||||
JSON.stringify({
|
||||
competitor_id: cand.competitor_id,
|
||||
competitor_part_number: cand.part_number,
|
||||
competitor_vendor: cand.vendor_name,
|
||||
match_basis: basis,
|
||||
notes,
|
||||
}),
|
||||
confidence,
|
||||
]);
|
||||
autoApproved++;
|
||||
fxMatched = true;
|
||||
} else {
|
||||
queued++;
|
||||
fxQueued = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!fxMatched && fxQueued) {
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_status = 'ambiguous',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1
|
||||
AND competitor_verified = false
|
||||
AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match')
|
||||
`, [fx.id]);
|
||||
} else if (!fxMatched && !fxQueued) {
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_status = 'needs_research',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1
|
||||
AND competitor_verified = false
|
||||
AND COALESCE(competitor_status, 'unknown') NOT IN ('no_valid_match')
|
||||
`, [fx.id]);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import { Pool } from "pg";
|
||||
import { config } from "dotenv";
|
||||
import { join } from "path";
|
||||
import { contentHash } from "./hash";
|
||||
|
||||
config({ path: join(__dirname, "..", "..", "..", "..", ".env") });
|
||||
|
||||
@ -18,6 +19,43 @@ export const pool = new Pool({
|
||||
// Alias — some scrapers import { db } instead of { pool }
|
||||
export const db = pool;
|
||||
|
||||
export async function recordVerificationEvidence(params: {
|
||||
transceiverId: string;
|
||||
verificationType: "price" | "image" | "details" | "competitor_match" | "competitor_no_match" | "artifact_quarantine";
|
||||
sourceUrl?: string;
|
||||
sourceVendorId?: string;
|
||||
evidenceValue?: Record<string, unknown>;
|
||||
robotName: string;
|
||||
confidence?: number;
|
||||
}): Promise<void> {
|
||||
const evidenceValue = params.evidenceValue || {};
|
||||
const evidenceHash = contentHash({
|
||||
type: params.verificationType,
|
||||
sourceUrl: params.sourceUrl || "",
|
||||
sourceVendorId: params.sourceVendorId || "",
|
||||
evidenceValue,
|
||||
});
|
||||
|
||||
await pool.query(
|
||||
`INSERT INTO transceiver_verification_evidence (
|
||||
transceiver_id, verification_type, source_url, source_vendor_id,
|
||||
evidence_value, evidence_hash, robot_name, confidence
|
||||
)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7, $8)
|
||||
ON CONFLICT DO NOTHING`,
|
||||
[
|
||||
params.transceiverId,
|
||||
params.verificationType,
|
||||
params.sourceUrl || null,
|
||||
params.sourceVendorId || null,
|
||||
JSON.stringify(evidenceValue),
|
||||
evidenceHash,
|
||||
params.robotName,
|
||||
params.confidence ?? null,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* After any verified flag is set, check if all 4 criteria are met and promote
|
||||
* the transceiver to fully_verified. Call this wherever price/image/details/
|
||||
@ -66,6 +104,14 @@ export async function markImageVerified(
|
||||
[transceiverId, imageUrl]
|
||||
);
|
||||
await checkAndSetFullyVerified(transceiverId);
|
||||
await recordVerificationEvidence({
|
||||
transceiverId,
|
||||
verificationType: "image",
|
||||
sourceUrl: imageUrl,
|
||||
evidenceValue: { imageUrl },
|
||||
robotName: "markImageVerified",
|
||||
confidence: 1,
|
||||
});
|
||||
return (result.rowCount ?? 0) > 0;
|
||||
}
|
||||
|
||||
@ -99,6 +145,16 @@ export async function markDetailsVerified(params: {
|
||||
[params.transceiverId, params.sourceUrl || null]
|
||||
);
|
||||
await checkAndSetFullyVerified(params.transceiverId);
|
||||
if ((result.rowCount ?? 0) > 0) {
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: params.transceiverId,
|
||||
verificationType: "details",
|
||||
sourceUrl: params.sourceUrl,
|
||||
evidenceValue: { sourceUrl: params.sourceUrl || null },
|
||||
robotName: "markDetailsVerified",
|
||||
confidence: 1,
|
||||
});
|
||||
}
|
||||
return (result.rowCount ?? 0) > 0;
|
||||
}
|
||||
|
||||
@ -198,11 +254,20 @@ export async function upsertPriceObservation(params: {
|
||||
`UPDATE transceivers SET
|
||||
price_verified = true,
|
||||
price_verified_at = NOW()
|
||||
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW()" : ""}
|
||||
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW(), competitor_status = 'matched', competitor_status_updated_at = NOW()" : ""}
|
||||
WHERE id = $1`,
|
||||
[params.transceiverId]
|
||||
);
|
||||
await checkAndSetFullyVerified(params.transceiverId);
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: params.transceiverId,
|
||||
verificationType: "price",
|
||||
sourceUrl: params.url,
|
||||
sourceVendorId: params.sourceVendorId,
|
||||
evidenceValue: { price: params.price, currency: params.currency, stockLevel: params.stockLevel },
|
||||
robotName: "upsertPriceObservation",
|
||||
confidence: 1,
|
||||
});
|
||||
return false; // No change
|
||||
}
|
||||
|
||||
@ -232,7 +297,9 @@ export async function upsertPriceObservation(params: {
|
||||
price_verified = true,
|
||||
price_verified_at = NOW(),
|
||||
competitor_verified = true,
|
||||
competitor_verified_at = NOW()
|
||||
competitor_verified_at = NOW(),
|
||||
competitor_status = 'matched',
|
||||
competitor_status_updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[params.transceiverId]
|
||||
);
|
||||
@ -246,6 +313,15 @@ export async function upsertPriceObservation(params: {
|
||||
);
|
||||
}
|
||||
await checkAndSetFullyVerified(params.transceiverId);
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: params.transceiverId,
|
||||
verificationType: "price",
|
||||
sourceUrl: params.url,
|
||||
sourceVendorId: params.sourceVendorId,
|
||||
evidenceValue: { price: params.price, currency: params.currency, stockLevel: params.stockLevel },
|
||||
robotName: "upsertPriceObservation",
|
||||
confidence: 1,
|
||||
});
|
||||
return true; // New observation written
|
||||
}
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
* Moves obvious accessories, switches, instruments, radio products and optical
|
||||
* transport systems out of the active transceiver verification base.
|
||||
*/
|
||||
import { pool } from "./db";
|
||||
import { pool, recordVerificationEvidence } from "./db";
|
||||
import { logger } from "./logger";
|
||||
|
||||
async function quarantine(): Promise<void> {
|
||||
@ -80,9 +80,24 @@ async function quarantine(): Promise<void> {
|
||||
)
|
||||
)
|
||||
AND COALESCE(t.category, '') != 'NonTransceiver'
|
||||
RETURNING t.id
|
||||
RETURNING t.id, t.part_number, v.name AS vendor_name, t.product_page_url
|
||||
`);
|
||||
|
||||
for (const row of result.rows) {
|
||||
await recordVerificationEvidence({
|
||||
transceiverId: row.id,
|
||||
verificationType: "artifact_quarantine",
|
||||
sourceUrl: row.product_page_url || undefined,
|
||||
evidenceValue: {
|
||||
partNumber: row.part_number,
|
||||
vendor: row.vendor_name,
|
||||
reason: "matched deterministic non-transceiver artifact rule",
|
||||
},
|
||||
robotName: "verify:quarantine:non-transceivers",
|
||||
confidence: 1,
|
||||
});
|
||||
}
|
||||
|
||||
logger.info("Non-transceiver quarantine complete", {
|
||||
quarantined: result.rowCount ?? 0,
|
||||
});
|
||||
|
||||
212
sql/103-verification-evidence-and-competitor-status.sql
Normal file
212
sql/103-verification-evidence-and-competitor-status.sql
Normal file
@ -0,0 +1,212 @@
|
||||
-- Migration 103: Verification evidence ledger and competitor status semantics
|
||||
--
|
||||
-- Goal:
|
||||
-- fully_verified should mean "source-backed and resolved", not merely
|
||||
-- "a competitor row was found". A product may be fully resolved when a
|
||||
-- valid 1:1 competitor exists OR when research verified that no valid
|
||||
-- public 1:1 competitor is available.
|
||||
|
||||
ALTER TABLE transceivers
|
||||
ADD COLUMN IF NOT EXISTS competitor_status VARCHAR(32) NOT NULL DEFAULT 'unknown',
|
||||
ADD COLUMN IF NOT EXISTS competitor_status_updated_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS no_match_verified_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS no_match_reason TEXT;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_constraint
|
||||
WHERE conname = 'transceivers_competitor_status_check'
|
||||
) THEN
|
||||
ALTER TABLE transceivers
|
||||
ADD CONSTRAINT transceivers_competitor_status_check
|
||||
CHECK (competitor_status IN (
|
||||
'unknown',
|
||||
'matched',
|
||||
'no_valid_match',
|
||||
'needs_research',
|
||||
'ambiguous'
|
||||
));
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
UPDATE transceivers
|
||||
SET competitor_status = CASE
|
||||
WHEN competitor_verified = true THEN 'matched'
|
||||
WHEN competitor_status = 'unknown' THEN 'needs_research'
|
||||
ELSE competitor_status
|
||||
END,
|
||||
competitor_status_updated_at = COALESCE(competitor_status_updated_at, NOW())
|
||||
WHERE competitor_status IS NULL
|
||||
OR competitor_status = 'unknown'
|
||||
OR competitor_verified = true;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_transceivers_competitor_status
|
||||
ON transceivers (competitor_status);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_transceivers_no_valid_match
|
||||
ON transceivers (no_match_verified_at)
|
||||
WHERE competitor_status = 'no_valid_match';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS transceiver_verification_evidence (
|
||||
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
transceiver_id UUID NOT NULL REFERENCES transceivers(id) ON DELETE CASCADE,
|
||||
verification_type VARCHAR(40) NOT NULL CHECK (verification_type IN (
|
||||
'price',
|
||||
'image',
|
||||
'details',
|
||||
'competitor_match',
|
||||
'competitor_no_match',
|
||||
'artifact_quarantine'
|
||||
)),
|
||||
source_url TEXT,
|
||||
source_vendor_id UUID REFERENCES vendors(id) ON DELETE SET NULL,
|
||||
evidence_value JSONB NOT NULL DEFAULT '{}'::jsonb,
|
||||
evidence_hash TEXT,
|
||||
robot_name TEXT NOT NULL DEFAULT 'unknown',
|
||||
confidence NUMERIC(4,3) CHECK (confidence IS NULL OR confidence BETWEEN 0 AND 1),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_verification_evidence_tx
|
||||
ON transceiver_verification_evidence (transceiver_id, verification_type, created_at DESC);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedupe
|
||||
ON transceiver_verification_evidence (
|
||||
transceiver_id,
|
||||
verification_type,
|
||||
COALESCE(evidence_hash, ''),
|
||||
robot_name
|
||||
);
|
||||
|
||||
COMMENT ON COLUMN transceivers.competitor_status IS
|
||||
'Resolution state for competitor evidence: matched, no_valid_match, needs_research, ambiguous, unknown.';
|
||||
|
||||
COMMENT ON TABLE transceiver_verification_evidence IS
|
||||
'Append-only evidence ledger for TIP verification decisions. Stores source-backed proof for price, image, details, competitor matches and verified no-match states.';
|
||||
|
||||
-- Seed the ledger from already verified rows so TIP starts with an auditable
|
||||
-- baseline instead of an empty proof table.
|
||||
INSERT INTO transceiver_verification_evidence (
|
||||
transceiver_id,
|
||||
verification_type,
|
||||
source_url,
|
||||
source_vendor_id,
|
||||
evidence_value,
|
||||
evidence_hash,
|
||||
robot_name,
|
||||
confidence
|
||||
)
|
||||
SELECT DISTINCT ON (t.id)
|
||||
t.id,
|
||||
'price',
|
||||
po.url,
|
||||
po.source_vendor_id,
|
||||
jsonb_build_object(
|
||||
'price', po.price,
|
||||
'currency', po.currency,
|
||||
'observed_at', po.time
|
||||
),
|
||||
md5(jsonb_build_object(
|
||||
'type', 'price',
|
||||
'price', po.price,
|
||||
'currency', po.currency,
|
||||
'url', COALESCE(po.url, '')
|
||||
)::text),
|
||||
'migration:103:price-backfill',
|
||||
1.0
|
||||
FROM transceivers t
|
||||
JOIN price_observations po ON po.transceiver_id = t.id
|
||||
WHERE t.price_verified = true
|
||||
AND COALESCE(po.is_verified, true) = true
|
||||
ORDER BY t.id, po.time DESC
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
INSERT INTO transceiver_verification_evidence (
|
||||
transceiver_id,
|
||||
verification_type,
|
||||
source_url,
|
||||
evidence_value,
|
||||
evidence_hash,
|
||||
robot_name,
|
||||
confidence
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
'image',
|
||||
COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')),
|
||||
jsonb_build_object('image_url', COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, ''))),
|
||||
md5(jsonb_build_object('type', 'image', 'url', COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')))::text),
|
||||
'migration:103:image-backfill',
|
||||
1.0
|
||||
FROM transceivers
|
||||
WHERE image_verified = true
|
||||
AND COALESCE(NULLIF(image_verified_url, ''), NULLIF(image_url, '')) IS NOT NULL
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
INSERT INTO transceiver_verification_evidence (
|
||||
transceiver_id,
|
||||
verification_type,
|
||||
source_url,
|
||||
evidence_value,
|
||||
evidence_hash,
|
||||
robot_name,
|
||||
confidence
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
'details',
|
||||
COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')),
|
||||
jsonb_build_object(
|
||||
'form_factor', form_factor,
|
||||
'speed_gbps', speed_gbps,
|
||||
'reach_label', reach_label,
|
||||
'fiber_type', fiber_type
|
||||
),
|
||||
md5(jsonb_build_object(
|
||||
'type', 'details',
|
||||
'source_url', COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')),
|
||||
'form_factor', form_factor,
|
||||
'speed_gbps', speed_gbps,
|
||||
'reach_label', reach_label,
|
||||
'fiber_type', fiber_type
|
||||
)::text),
|
||||
'migration:103:details-backfill',
|
||||
1.0
|
||||
FROM transceivers
|
||||
WHERE details_verified = true
|
||||
AND COALESCE(NULLIF(details_source_url, ''), NULLIF(product_page_url, '')) IS NOT NULL
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
INSERT INTO transceiver_verification_evidence (
|
||||
transceiver_id,
|
||||
verification_type,
|
||||
evidence_value,
|
||||
evidence_hash,
|
||||
robot_name,
|
||||
confidence
|
||||
)
|
||||
SELECT DISTINCT ON (eq.flexoptix_id)
|
||||
eq.flexoptix_id,
|
||||
'competitor_match',
|
||||
jsonb_build_object(
|
||||
'equivalence_id', eq.id,
|
||||
'competitor_id', eq.competitor_id,
|
||||
'status', eq.status,
|
||||
'match_basis', eq.match_basis,
|
||||
'match_notes', eq.match_notes
|
||||
),
|
||||
md5(jsonb_build_object(
|
||||
'type', 'competitor_match',
|
||||
'equivalence_id', eq.id,
|
||||
'competitor_id', eq.competitor_id,
|
||||
'status', eq.status
|
||||
)::text),
|
||||
'migration:103:competitor-match-backfill',
|
||||
eq.confidence
|
||||
FROM transceiver_equivalences eq
|
||||
WHERE eq.status IN ('approved', 'auto_approved')
|
||||
ORDER BY eq.flexoptix_id, eq.confidence DESC, eq.updated_at DESC
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
@ -1,9 +1,53 @@
|
||||
# Current TIP Sync State
|
||||
|
||||
Updated: 2026-05-09 20:12 UTC
|
||||
Updated: 2026-05-09 21:00 UTC
|
||||
|
||||
## Newest Work
|
||||
|
||||
- TIP verification truth model on 2026-05-09:
|
||||
- implemented migration `sql/103-verification-evidence-and-competitor-status.sql`
|
||||
- adds `transceivers.competitor_status`
|
||||
- `matched`
|
||||
- `no_valid_match`
|
||||
- `needs_research`
|
||||
- `ambiguous`
|
||||
- `unknown`
|
||||
- adds `no_match_verified_at` and `no_match_reason`
|
||||
- creates append-only `transceiver_verification_evidence`
|
||||
- code changes:
|
||||
- scraper DB helper now records evidence for price/image/details decisions
|
||||
- artifact quarantine records `artifact_quarantine` evidence
|
||||
- matcher writes `competitor_match` evidence for auto-approved matches
|
||||
- matcher sets product status to `matched`, `ambiguous`, or `needs_research`
|
||||
- Review API adds protected `POST /api/review/transceivers/:id/no-valid-match`
|
||||
- Review stats now include product-level competitor status counts
|
||||
- Health API now exposes active-product competitor status counts
|
||||
- live migration/backfill:
|
||||
- applied on Erik successfully
|
||||
- status distribution after migration:
|
||||
- `matched=11198`
|
||||
- `needs_research=6575`
|
||||
- Evidence ledger seeded from current data:
|
||||
- `price=10633`
|
||||
- `image=12189`
|
||||
- `details=16782`
|
||||
- `competitor_match=316`
|
||||
- live API checks:
|
||||
- `/api/health` healthy
|
||||
- active health competitor status:
|
||||
- `matched=11158`
|
||||
- `needs_research=6256`
|
||||
- `no_valid_match=0`
|
||||
- `ambiguous=0`
|
||||
- protected review stats with Dashboard token returned product status counts correctly
|
||||
- operational note:
|
||||
- `tip-api` restarted successfully
|
||||
- `tip-scraper-daemon` was not restarted because `scrape:pricing:naddod` and `scrape:pricing:qsfptek` were active
|
||||
- scheduler code is synced to `/opt/tip`; restart daemon after those jobs complete to load new matcher/reconcile logic
|
||||
- TIPLLM training pool:
|
||||
- appended lessons for competitor state machine and evidence ledger
|
||||
- JSONL validated locally
|
||||
|
||||
- MAGATAMA MagatamaLLM RunPod training and adoption closure on 2026-05-09:
|
||||
- operator requirement:
|
||||
- RunPod success only counts after artifact exists, local Ollama import works, smoke tests pass, aliases/version switch, remote registry is updated, and live MAGATAMA reports no stale active run
|
||||
|
||||
@ -0,0 +1,87 @@
|
||||
# TIP Verification Truth Model And Evidence Ledger — 2026-05-09
|
||||
|
||||
## Scope
|
||||
|
||||
- Make TIP verification explainable instead of simply green/red.
|
||||
- Preserve the difference between:
|
||||
- a true 1:1 competitor match
|
||||
- verified absence of a valid public match
|
||||
- ambiguous candidates
|
||||
- unresolved research
|
||||
- Start an append-only evidence ledger for source-backed verification decisions.
|
||||
|
||||
## Implemented
|
||||
|
||||
- Added migration `sql/103-verification-evidence-and-competitor-status.sql`.
|
||||
- Added product-level competitor status:
|
||||
- `matched`
|
||||
- `no_valid_match`
|
||||
- `needs_research`
|
||||
- `ambiguous`
|
||||
- `unknown`
|
||||
- Added no-match fields:
|
||||
- `no_match_verified_at`
|
||||
- `no_match_reason`
|
||||
- Added `transceiver_verification_evidence`.
|
||||
- `price`
|
||||
- `image`
|
||||
- `details`
|
||||
- `competitor_match`
|
||||
- `competitor_no_match`
|
||||
- `artifact_quarantine`
|
||||
- Scraper DB helper now records evidence for newly verified price/image/details.
|
||||
- Quarantine robot now records artifact evidence.
|
||||
- Matcher records competitor-match evidence for auto-approved matches.
|
||||
- Review API now supports protected no-valid-match marking:
|
||||
- `POST /api/review/transceivers/:id/no-valid-match`
|
||||
- Health API exposes active competitor-status counts.
|
||||
- Review stats expose global product competitor-status counts.
|
||||
|
||||
## Live Deployment
|
||||
|
||||
- Scraper build on Erik: passed.
|
||||
- API build on Erik: passed after TypeScript route param cast.
|
||||
- Migration applied successfully.
|
||||
- `tip-api` restarted and healthy.
|
||||
- `tip-scraper-daemon` was not restarted because `scrape:pricing:naddod` and `scrape:pricing:qsfptek` were active.
|
||||
|
||||
## Live Backfill
|
||||
|
||||
- Product status after migration:
|
||||
- `matched=11198`
|
||||
- `needs_research=6575`
|
||||
- Evidence ledger seeded:
|
||||
- `price=10633`
|
||||
- `image=12189`
|
||||
- `details=16782`
|
||||
- `competitor_match=316`
|
||||
|
||||
## Live API Check
|
||||
|
||||
- `/api/health`: healthy.
|
||||
- Active-product competitor status:
|
||||
- `matched=11158`
|
||||
- `needs_research=6256`
|
||||
- `no_valid_match=0`
|
||||
- `ambiguous=0`
|
||||
- Protected Review stats with Dashboard token returned:
|
||||
- `matched=11198`
|
||||
- `needs_research_products=6575`
|
||||
- `no_valid_match=0`
|
||||
- `ambiguous=0`
|
||||
|
||||
## Operational Next Step
|
||||
|
||||
- When `scrape:pricing:naddod` and `scrape:pricing:qsfptek` finish, restart `tip-scraper-daemon` to load the new scheduler matcher/reconcile logic.
|
||||
- Then run:
|
||||
- reconcile
|
||||
- matcher
|
||||
- Review no-valid-match workflow for products that truly have no valid public 1:1 competitor.
|
||||
|
||||
## Training Pool
|
||||
|
||||
- Added TIPLLM lessons for:
|
||||
- competitor state machine
|
||||
- evidence ledger semantics
|
||||
- `training-data/tip-llm-capabilities-v1.jsonl` validated successfully.
|
||||
|
||||
@ -36,3 +36,5 @@
|
||||
{"id":"tip-20260509-artifact-quarantine-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Protect TIP data quality by distinguishing real transceiver product pages from category, support, cable, accessory, switch, and redirect artifacts."},{"role":"user","content":"How should TIP classify rows scraped from FS.com category URLs, Flexoptix supported-vendor pages, Flexoptix DAC/AOC or patch-cable pages, Arista switch pages, ShopFiber24 category labels, or Coherent direct-attach/dynamic-component pages?"},{"role":"assistant","content":"Classify them outside the active transceiver verification base. Set category to NonTransceiver or another excluded class and clear price/image/details/competitor/fully verified flags. Examples include fs.com /c/ category URLs, flexoptix.net supported-vendors, universal-dac-aoc, optical-patch-cables, universal-transceiver-our-voodoo, redirect URLs, arista.com products/solutions switch pages, ShopFiber24 labels like FO-TRANSCEIVER or Transceivermodule, and Coherent direct-attach-cables or dynamic-components pages. These are not 1:1 comparable transceiver SKUs."}]}
|
||||
{"id":"tip-20260509-erik-safe-crawler-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Design crawler operations that protect Erik and prefer deterministic source extraction over blind crawling."},{"role":"user","content":"What is the safe operating pattern for TIP crawlers on Erik after the 2026-05-09 verification run?"},{"role":"assistant","content":"Use targeted robots, not broad waves. Run one focused verifier at a time, wait for pg-boss jobs to complete, and check pm2/API health between runs. Prefer lightweight fetch+cheerio detail verifiers for known product URLs. Use Playwright/Crawlee only when the source requires a browser, with concurrency 1 and short batches. Do not start additional jobs while scrape:pricing:flexoptix, discover:vendor:cisco-tmg, or maintenance:find-equivalences are active. Always reconcile and rerun the matcher after quarantine or source updates."}]}
|
||||
{"id":"tip-20260509-flex-fs-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Report verification status precisely and separate product-data completeness from competitor-match completeness."},{"role":"user","content":"After the 2026-05-09 cleanup, what is the correct interpretation of Flexoptix and FS.com verification status?"},{"role":"assistant","content":"For active Flexoptix and FS.com products, price, image, and details are complete after removing category, redirect, cable, support, and non-FS artifacts from the active base. Remaining not-fully-verified rows are due to competitor_verified=false, not missing product price/image/details. Do not say the whole vendor is 100% fully verified until 1:1 competitor equivalence exists or the absence of a valid competitor is explicitly represented as a verified no-match state."}]}
|
||||
{"id":"tip-20260509-competitor-status-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Model verification as an evidence-backed state machine, not a single green checkbox."},{"role":"user","content":"How should TIP represent competitor verification when no valid 1:1 competitor product exists?"},{"role":"assistant","content":"Use a distinct competitor_status value. `matched` means a source-backed 1:1 competitor exists. `no_valid_match` means research verified that no valid public 1:1 competitor is available; this can still resolve the competitor requirement without inventing a match. `ambiguous` means candidates exist but are technically risky or below approval threshold. `needs_research` means the robot has not resolved the state yet. Keep the reason and timestamp in no_match_reason/no_match_verified_at and write a `competitor_no_match` evidence record."}]}
|
||||
{"id":"tip-20260509-evidence-ledger-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Every verification decision should be auditable."},{"role":"user","content":"What should TIP write when a robot marks price, image, details, competitor match, no-match, or artifact quarantine as verified?"},{"role":"assistant","content":"Write an append-only row into transceiver_verification_evidence. Required fields: transceiver_id, verification_type, source_url when available, source_vendor_id when available, evidence_value JSON, evidence_hash, robot_name, confidence, and created_at. For artifact quarantine, record the rule reason and clear verification flags. For price/image/details, store the exact source-backed value. For competitor match/no-match, store the match basis or no-match reason. This lets TIP explain every green badge and every deliberate non-green state."}]}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user