fix: automate equivalence research review queue
This commit is contained in:
parent
ef225c7dc5
commit
43b7250180
@ -7,6 +7,7 @@
|
||||
* POST /api/review/equivalences/:id/reject — reject with optional reason
|
||||
* PATCH /api/review/equivalences/:id — edit match_notes
|
||||
* POST /api/review/run-matcher — trigger equivalence job immediately
|
||||
* POST /api/review/run-research — trigger equivalence research job immediately
|
||||
*/
|
||||
import { Router, Request, Response } from "express";
|
||||
import { pool } from "../db/client";
|
||||
@ -27,6 +28,17 @@ async function checkAndSetFullyVerified(transceiverId: string): Promise<boolean>
|
||||
return (result.rowCount ?? 0) > 0;
|
||||
}
|
||||
|
||||
async function queueMaintenanceJob(name: string): Promise<void> {
|
||||
await pool.query(
|
||||
`
|
||||
INSERT INTO pgboss.job (name, data, priority)
|
||||
VALUES ($1, '{}', 0)
|
||||
ON CONFLICT DO NOTHING
|
||||
`,
|
||||
[name]
|
||||
);
|
||||
}
|
||||
|
||||
export const reviewRouter = Router();
|
||||
|
||||
// ── GET /api/review/equivalences ──────────────────────────────────────────────
|
||||
@ -51,7 +63,7 @@ reviewRouter.get("/equivalences", async (req: Request, res: Response) => {
|
||||
params = [limit, offset];
|
||||
limitIdx = 1; offsetIdx = 2;
|
||||
} else if (status === "needs_research") {
|
||||
where = `WHERE eq.status IN ('approved','auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW()`;
|
||||
where = `WHERE eq.status IN ('pending','approved','auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW()`;
|
||||
params = [limit, offset];
|
||||
limitIdx = 1; offsetIdx = 2;
|
||||
} else {
|
||||
@ -143,7 +155,7 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) =>
|
||||
SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END) AS approved,
|
||||
SUM(CASE WHEN status = 'auto_approved' THEN 1 ELSE 0 END) AS auto_approved,
|
||||
SUM(CASE WHEN status = 'rejected' THEN 1 ELSE 0 END) AS rejected,
|
||||
SUM(CASE WHEN status IN ('approved','auto_approved')
|
||||
SUM(CASE WHEN status IN ('pending','approved','auto_approved')
|
||||
AND re_research_due_at IS NOT NULL
|
||||
AND re_research_due_at <= NOW() THEN 1 ELSE 0 END) AS needs_research,
|
||||
COUNT(*) AS total
|
||||
@ -254,9 +266,8 @@ reviewRouter.patch("/equivalences/:id", async (req: Request, res: Response) => {
|
||||
});
|
||||
|
||||
// ── POST /api/review/equivalences/approve-all ─────────────────────────────────
|
||||
// Approve ALL pending equivalences regardless of confidence.
|
||||
// Low-confidence ones (< 0.73) get re_research_due_at = NOW() so the nightly
|
||||
// re-research job will re-verify them one by one.
|
||||
// Approve only high-confidence pending equivalences. Weak candidates are queued
|
||||
// for automated research instead of being marked as approved.
|
||||
reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Response) => {
|
||||
const reviewer = (req.body as { reviewer?: string }).reviewer || "approve-all";
|
||||
const RE_RESEARCH_THRESHOLD = 0.73;
|
||||
@ -271,15 +282,31 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons
|
||||
|
||||
for (const row of candidates.rows) {
|
||||
const needsReSearch = parseFloat(row.confidence) < RE_RESEARCH_THRESHOLD;
|
||||
if (needsReSearch) {
|
||||
await pool.query(`
|
||||
UPDATE transceiver_equivalences
|
||||
SET status = 'pending',
|
||||
reviewed_by = $2,
|
||||
reviewed_at = NULL,
|
||||
re_research_due_at = NOW(),
|
||||
re_researched_at = NULL,
|
||||
match_notes = CONCAT(COALESCE(match_notes, ''), E'\n[Automated research queued ' || NOW()::date || ': confidence below approval threshold]')
|
||||
WHERE id = $1
|
||||
`, [row.id, reviewer]);
|
||||
|
||||
scheduledReSearch++;
|
||||
continue;
|
||||
}
|
||||
|
||||
await pool.query(`
|
||||
UPDATE transceiver_equivalences
|
||||
SET status = 'approved',
|
||||
reviewed_by = $2,
|
||||
reviewed_at = NOW(),
|
||||
re_research_due_at = $3,
|
||||
re_research_due_at = NULL,
|
||||
re_researched_at = NULL
|
||||
WHERE id = $1
|
||||
`, [row.id, reviewer, needsReSearch ? new Date() : null]);
|
||||
`, [row.id, reviewer]);
|
||||
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
@ -289,11 +316,20 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons
|
||||
|
||||
const earned = await checkAndSetFullyVerified(row.flexoptix_id);
|
||||
if (earned) fullyVerified++;
|
||||
if (needsReSearch) scheduledReSearch++;
|
||||
approved++;
|
||||
}
|
||||
|
||||
res.json({ success: true, approved, fully_verified_earned: fullyVerified, scheduled_re_research: scheduledReSearch });
|
||||
if (scheduledReSearch > 0) {
|
||||
await queueMaintenanceJob("maintenance:re-research-equivalences");
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
approved,
|
||||
fully_verified_earned: fullyVerified,
|
||||
scheduled_re_research: scheduledReSearch,
|
||||
left_pending: scheduledReSearch,
|
||||
});
|
||||
});
|
||||
|
||||
// ── POST /api/review/equivalences/bulk-approve ────────────────────────────────
|
||||
@ -315,7 +351,11 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon
|
||||
for (const row of candidates.rows) {
|
||||
await pool.query(`
|
||||
UPDATE transceiver_equivalences
|
||||
SET status = 'approved', reviewed_by = $2, reviewed_at = NOW()
|
||||
SET status = 'approved',
|
||||
reviewed_by = $2,
|
||||
reviewed_at = NOW(),
|
||||
re_research_due_at = NULL,
|
||||
re_researched_at = NULL
|
||||
WHERE id = $1
|
||||
`, [row.id, reviewer]);
|
||||
|
||||
@ -336,13 +376,15 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon
|
||||
// ── POST /api/review/run-matcher ──────────────────────────────────────────────
|
||||
// Trigger the equivalence matcher immediately (admin action)
|
||||
reviewRouter.post("/run-matcher", async (_req: Request, res: Response) => {
|
||||
// Queue the job via pg-boss — import from scraper's db util won't work here,
|
||||
// so we fire directly via DB insert into pg-boss queue
|
||||
await pool.query(`
|
||||
INSERT INTO pgboss.job (name, data, priority)
|
||||
VALUES ('maintenance:find-equivalences', '{}', 0)
|
||||
ON CONFLICT DO NOTHING
|
||||
`);
|
||||
await queueMaintenanceJob("maintenance:find-equivalences");
|
||||
|
||||
res.json({ success: true, message: "Equivalence matcher queued" });
|
||||
});
|
||||
|
||||
// ── POST /api/review/run-research ────────────────────────────────────────────
|
||||
// Trigger the automated equivalence research worker immediately.
|
||||
reviewRouter.post("/run-research", async (_req: Request, res: Response) => {
|
||||
await queueMaintenanceJob("maintenance:re-research-equivalences");
|
||||
|
||||
res.json({ success: true, message: "Equivalence research queued" });
|
||||
});
|
||||
|
||||
@ -44,6 +44,181 @@ config({ path: join(__dirname, "..", "..", "..", ".env") });
|
||||
|
||||
const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`;
|
||||
|
||||
type EquivalenceProduct = {
|
||||
part_number?: string | null;
|
||||
standard_name?: string | null;
|
||||
form_factor?: string | null;
|
||||
speed_gbps?: number | string | null;
|
||||
fiber_type?: string | null;
|
||||
reach_meters?: number | string | null;
|
||||
wavelengths?: string | null;
|
||||
connector?: string | null;
|
||||
};
|
||||
|
||||
type EquivalenceResearchResult = {
|
||||
decision: "approve" | "reject";
|
||||
confidence: number;
|
||||
basis: string[];
|
||||
reasons: string[];
|
||||
rejectReason?: string;
|
||||
};
|
||||
|
||||
function normalizeEquivalenceText(value: unknown): string | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
const text = String(value).trim().toUpperCase();
|
||||
return text.length > 0 ? text : null;
|
||||
}
|
||||
|
||||
function numericEquivalenceValue(value: unknown): number | null {
|
||||
if (value === null || value === undefined || value === "") return null;
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
function extractPrimaryNm(wavelengths: unknown): number | null {
|
||||
if (!wavelengths) return null;
|
||||
const match = String(wavelengths).match(/(\d{3,4})/);
|
||||
return match ? parseInt(match[1], 10) : null;
|
||||
}
|
||||
|
||||
function evaluateEquivalenceResearch(
|
||||
fx: EquivalenceProduct,
|
||||
cp: EquivalenceProduct,
|
||||
hasRecentPrice: boolean,
|
||||
): EquivalenceResearchResult {
|
||||
const basis: string[] = [];
|
||||
const reasons: string[] = [];
|
||||
let score = 0;
|
||||
|
||||
const fxForm = normalizeEquivalenceText(fx.form_factor);
|
||||
const cpForm = normalizeEquivalenceText(cp.form_factor);
|
||||
const fxSpeed = numericEquivalenceValue(fx.speed_gbps);
|
||||
const cpSpeed = numericEquivalenceValue(cp.speed_gbps);
|
||||
const fxStandard = normalizeEquivalenceText(fx.standard_name);
|
||||
const cpStandard = normalizeEquivalenceText(cp.standard_name);
|
||||
const fxFiber = normalizeEquivalenceText(fx.fiber_type);
|
||||
const cpFiber = normalizeEquivalenceText(cp.fiber_type);
|
||||
const fxReach = numericEquivalenceValue(fx.reach_meters);
|
||||
const cpReach = numericEquivalenceValue(cp.reach_meters);
|
||||
const fxNm = extractPrimaryNm(fx.wavelengths);
|
||||
const cpNm = extractPrimaryNm(cp.wavelengths);
|
||||
|
||||
if (!hasRecentPrice) {
|
||||
reasons.push("no recent competitor price observation");
|
||||
return {
|
||||
decision: "reject",
|
||||
confidence: 0,
|
||||
basis,
|
||||
reasons,
|
||||
rejectReason: "automated research: competitor has no recent price observation",
|
||||
};
|
||||
}
|
||||
|
||||
if (fxForm && cpForm && fxForm === cpForm) {
|
||||
score += 25;
|
||||
basis.push("form_factor");
|
||||
} else {
|
||||
reasons.push("form factor mismatch or missing");
|
||||
}
|
||||
|
||||
if (fxSpeed !== null && cpSpeed !== null && fxSpeed === cpSpeed) {
|
||||
score += 20;
|
||||
basis.push("speed_gbps");
|
||||
} else {
|
||||
reasons.push("speed mismatch or missing");
|
||||
}
|
||||
|
||||
if (fxStandard && cpStandard && fxStandard === cpStandard) {
|
||||
score += 30;
|
||||
basis.push("standard_name");
|
||||
} else {
|
||||
reasons.push("standard name not identical");
|
||||
}
|
||||
|
||||
if (fxNm !== null && cpNm !== null) {
|
||||
if (Math.abs(fxNm - cpNm) <= 15) {
|
||||
score += 20;
|
||||
basis.push(`wavelength_${fxNm}nm`);
|
||||
} else {
|
||||
reasons.push(`wavelength mismatch ${fxNm}nm vs ${cpNm}nm`);
|
||||
score -= 20;
|
||||
}
|
||||
} else {
|
||||
reasons.push("wavelength missing");
|
||||
}
|
||||
|
||||
if (fxFiber && cpFiber) {
|
||||
if (fxFiber === cpFiber) {
|
||||
score += 10;
|
||||
basis.push("fiber_type");
|
||||
} else {
|
||||
reasons.push(`fiber mismatch ${fxFiber} vs ${cpFiber}`);
|
||||
score -= 15;
|
||||
}
|
||||
} else {
|
||||
reasons.push("fiber type missing");
|
||||
}
|
||||
|
||||
if (fxReach !== null && cpReach !== null && fxReach > 0 && cpReach > 0) {
|
||||
const ratio = Math.min(fxReach, cpReach) / Math.max(fxReach, cpReach);
|
||||
if (ratio >= 0.85) {
|
||||
score += 10;
|
||||
basis.push("reach");
|
||||
} else {
|
||||
reasons.push(`reach mismatch ${fxReach}m vs ${cpReach}m`);
|
||||
score -= 15;
|
||||
}
|
||||
} else {
|
||||
reasons.push("reach missing");
|
||||
}
|
||||
|
||||
const confidence = Math.max(0, Math.min(1, score / 115));
|
||||
const criticalMismatch = reasons.some((reason) =>
|
||||
reason.startsWith("wavelength mismatch") ||
|
||||
reason.startsWith("fiber mismatch") ||
|
||||
reason.startsWith("reach mismatch") ||
|
||||
reason.startsWith("form factor mismatch") ||
|
||||
reason.startsWith("speed mismatch")
|
||||
);
|
||||
const missingCriticalEvidence = reasons.some((reason) =>
|
||||
reason === "wavelength missing" ||
|
||||
reason === "fiber type missing" ||
|
||||
reason === "reach missing"
|
||||
);
|
||||
|
||||
if (criticalMismatch) {
|
||||
return {
|
||||
decision: "reject",
|
||||
confidence,
|
||||
basis,
|
||||
reasons,
|
||||
rejectReason: `automated research: technical mismatch (${reasons.join("; ")})`,
|
||||
};
|
||||
}
|
||||
|
||||
if (missingCriticalEvidence) {
|
||||
return {
|
||||
decision: "reject",
|
||||
confidence,
|
||||
basis,
|
||||
reasons,
|
||||
rejectReason: `automated research: insufficient technical evidence (${reasons.join("; ")})`,
|
||||
};
|
||||
}
|
||||
|
||||
if (confidence >= 0.73) {
|
||||
return { decision: "approve", confidence, basis, reasons };
|
||||
}
|
||||
|
||||
return {
|
||||
decision: "reject",
|
||||
confidence,
|
||||
basis,
|
||||
reasons,
|
||||
rejectReason: `automated research: confidence ${confidence.toFixed(3)} below approval threshold`,
|
||||
};
|
||||
}
|
||||
|
||||
export async function createScheduler(): Promise<PgBoss> {
|
||||
const boss = new PgBoss({
|
||||
connectionString,
|
||||
@ -2667,52 +2842,105 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
}
|
||||
});
|
||||
|
||||
// ── Re-research approved equivalences ────────────────────────────────────────
|
||||
// Processes up to 200 approved equivalences per day that have re_research_due_at <= NOW().
|
||||
// Re-runs the confidence check: if competitor still has recent prices and specs still match,
|
||||
// the approval is confirmed (re_researched_at = NOW(), next check in 30 days).
|
||||
// If confidence drops or competitor has no recent price: reverts to pending.
|
||||
// ── Re-research equivalences ────────────────────────────────────────────────
|
||||
// Confirms only well-evidenced matches. Weak, stale, incomplete, or technically
|
||||
// contradictory matches are rejected automatically instead of going back to a
|
||||
// manual queue.
|
||||
await boss.work("maintenance:re-research-equivalences", async () => {
|
||||
const { pool } = await import("./utils/db");
|
||||
const ts = new Date().toISOString();
|
||||
console.log(`[${ts}] Running: Re-research approved equivalences`);
|
||||
const batchLimit = Math.max(1, Math.min(10000, parseInt(process.env["EQUIVALENCE_RESEARCH_BATCH_LIMIT"] || "2000", 10)));
|
||||
console.log(`[${ts}] Running: Equivalence automated research`);
|
||||
|
||||
const batch = await pool.query(`
|
||||
SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.confidence,
|
||||
fx.form_factor, fx.speed_gbps, fx.standard_name, fx.fiber_type,
|
||||
fx.reach_meters, fx.wavelengths
|
||||
SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.status, eq.confidence,
|
||||
fx.part_number AS fx_part_number,
|
||||
fx.form_factor AS fx_form_factor,
|
||||
fx.speed_gbps AS fx_speed_gbps,
|
||||
fx.standard_name AS fx_standard_name,
|
||||
fx.fiber_type AS fx_fiber_type,
|
||||
fx.reach_meters AS fx_reach_meters,
|
||||
fx.wavelengths AS fx_wavelengths,
|
||||
fx.connector AS fx_connector,
|
||||
cp.part_number AS cp_part_number,
|
||||
cp.form_factor AS cp_form_factor,
|
||||
cp.speed_gbps AS cp_speed_gbps,
|
||||
cp.standard_name AS cp_standard_name,
|
||||
cp.fiber_type AS cp_fiber_type,
|
||||
cp.reach_meters AS cp_reach_meters,
|
||||
cp.wavelengths AS cp_wavelengths,
|
||||
cp.connector AS cp_connector,
|
||||
cpv.name AS competitor_vendor,
|
||||
(
|
||||
SELECT COUNT(*)
|
||||
FROM price_observations po
|
||||
WHERE po.transceiver_id = eq.competitor_id
|
||||
AND po.time > NOW() - INTERVAL '45 days'
|
||||
) AS recent_price_count
|
||||
FROM transceiver_equivalences eq
|
||||
JOIN transceivers fx ON eq.flexoptix_id = fx.id
|
||||
WHERE eq.status IN ('approved', 'auto_approved')
|
||||
JOIN transceivers cp ON eq.competitor_id = cp.id
|
||||
JOIN vendors cpv ON cpv.id = cp.vendor_id
|
||||
WHERE eq.status IN ('pending', 'approved', 'auto_approved')
|
||||
AND eq.re_research_due_at IS NOT NULL
|
||||
AND eq.re_research_due_at <= NOW()
|
||||
ORDER BY eq.re_research_due_at ASC
|
||||
LIMIT 200
|
||||
`);
|
||||
LIMIT $1
|
||||
`, [batchLimit]);
|
||||
|
||||
let confirmed = 0;
|
||||
let reverted = 0;
|
||||
let rejected = 0;
|
||||
|
||||
for (const eq of batch.rows) {
|
||||
// Check if competitor still has a recent price observation
|
||||
const priceCheck = await pool.query(`
|
||||
SELECT COUNT(*) AS cnt
|
||||
FROM price_observations
|
||||
WHERE transceiver_id = $1 AND time > NOW() - INTERVAL '45 days'
|
||||
`, [eq.competitor_id]);
|
||||
const research = evaluateEquivalenceResearch(
|
||||
{
|
||||
part_number: eq.fx_part_number,
|
||||
form_factor: eq.fx_form_factor,
|
||||
speed_gbps: eq.fx_speed_gbps,
|
||||
standard_name: eq.fx_standard_name,
|
||||
fiber_type: eq.fx_fiber_type,
|
||||
reach_meters: eq.fx_reach_meters,
|
||||
wavelengths: eq.fx_wavelengths,
|
||||
connector: eq.fx_connector,
|
||||
},
|
||||
{
|
||||
part_number: eq.cp_part_number,
|
||||
form_factor: eq.cp_form_factor,
|
||||
speed_gbps: eq.cp_speed_gbps,
|
||||
standard_name: eq.cp_standard_name,
|
||||
fiber_type: eq.cp_fiber_type,
|
||||
reach_meters: eq.cp_reach_meters,
|
||||
wavelengths: eq.cp_wavelengths,
|
||||
connector: eq.cp_connector,
|
||||
},
|
||||
parseInt(eq.recent_price_count, 10) > 0,
|
||||
);
|
||||
|
||||
const hasRecentPrice = parseInt(priceCheck.rows[0].cnt, 10) > 0;
|
||||
|
||||
if (!hasRecentPrice) {
|
||||
// Competitor no longer carries this — revert to pending for manual review
|
||||
if (research.decision === "reject") {
|
||||
await pool.query(`
|
||||
UPDATE transceiver_equivalences
|
||||
SET status = 'pending', re_research_due_at = NULL, re_researched_at = NULL,
|
||||
match_notes = CONCAT(match_notes, E'\n[Re-research ' || NOW()::date || ': no recent price — reverted to pending]')
|
||||
SET status = 'rejected',
|
||||
confidence = $2,
|
||||
match_basis = $3,
|
||||
reject_reason = $4,
|
||||
reviewed_by = 'automated-research',
|
||||
reviewed_at = NOW(),
|
||||
re_research_due_at = NULL,
|
||||
re_researched_at = NOW(),
|
||||
match_notes = CONCAT(
|
||||
COALESCE(match_notes, ''),
|
||||
E'\n[Automated research ' || NOW()::date || ': rejected; ' || $5 || ']'
|
||||
),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
`, [eq.id]);
|
||||
`, [
|
||||
eq.id,
|
||||
research.confidence,
|
||||
research.basis,
|
||||
research.rejectReason || "automated research: rejected",
|
||||
research.reasons.join("; "),
|
||||
]);
|
||||
|
||||
// Reset competitor_verified if no other approved equivalence covers this transceiver
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = false, competitor_verified_at = NULL,
|
||||
@ -2726,20 +2954,51 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
)
|
||||
`, [eq.flexoptix_id, eq.id]);
|
||||
|
||||
reverted++;
|
||||
rejected++;
|
||||
} else {
|
||||
// Still valid — confirm and schedule next re-research in 30 days
|
||||
await pool.query(`
|
||||
UPDATE transceiver_equivalences
|
||||
SET re_researched_at = NOW(),
|
||||
re_research_due_at = NOW() + INTERVAL '30 days'
|
||||
SET status = CASE WHEN status = 'pending' THEN 'auto_approved' ELSE status END,
|
||||
confidence = $2,
|
||||
match_basis = $3,
|
||||
reviewed_by = COALESCE(reviewed_by, 'automated-research'),
|
||||
reviewed_at = COALESCE(reviewed_at, NOW()),
|
||||
reject_reason = NULL,
|
||||
re_researched_at = NOW(),
|
||||
re_research_due_at = NOW() + INTERVAL '30 days',
|
||||
match_notes = CONCAT(
|
||||
COALESCE(match_notes, ''),
|
||||
E'\n[Automated research ' || NOW()::date || ': confirmed; basis: ' || $4 || ']'
|
||||
),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
`, [eq.id]);
|
||||
`, [eq.id, research.confidence, research.basis, research.basis.join(", ")]);
|
||||
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET competitor_verified = true,
|
||||
competitor_verified_at = COALESCE(competitor_verified_at, NOW())
|
||||
WHERE id = $1 AND competitor_verified = false
|
||||
`, [eq.flexoptix_id]);
|
||||
|
||||
confirmed++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`);
|
||||
if (confirmed > 0) {
|
||||
await pool.query(`
|
||||
UPDATE transceivers
|
||||
SET fully_verified = true,
|
||||
fully_verified_at = COALESCE(fully_verified_at, NOW())
|
||||
WHERE competitor_verified = true
|
||||
AND price_verified = true
|
||||
AND image_verified = true
|
||||
AND details_verified = true
|
||||
AND fully_verified = false
|
||||
`);
|
||||
}
|
||||
|
||||
console.log(`[equivalence-research] confirmed: ${confirmed}, rejected: ${rejected}, batch size: ${batch.rows.length}`);
|
||||
});
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
@ -1,9 +1,66 @@
|
||||
# Current TIP Sync State
|
||||
|
||||
Updated: 2026-05-09 03:15 UTC
|
||||
Updated: 2026-05-09 05:45 UTC
|
||||
|
||||
## Newest Work
|
||||
|
||||
- TIP automated equivalence research / manual queue cleanup completed on 2026-05-09:
|
||||
- operator intent:
|
||||
- products should be researched well enough that they do not need manual equivalence validation
|
||||
- Erik must not be stressed by crawler-heavy work
|
||||
- TIPLLM-only policy for crawler/robot research remains in force
|
||||
- root cause found:
|
||||
- `approve-all` approved low-confidence equivalences and only marked them for later re-research
|
||||
- the re-research worker mostly checked whether a competitor still had a recent price
|
||||
- it did not re-evaluate hard technical equivalence evidence such as reach, wavelength, fiber type, speed and form factor
|
||||
- code changed:
|
||||
- `packages/api/src/routes/review.ts`
|
||||
- `approve-all` now approves only confidence >= `0.73`
|
||||
- weak pending rows stay pending and are queued for automated research instead of being marked approved
|
||||
- `needs_research` stats/listing now includes pending research rows
|
||||
- added `POST /api/review/run-research`
|
||||
- `packages/scraper/src/scheduler.ts`
|
||||
- added deterministic equivalence research evaluator
|
||||
- rejects stale, technically contradictory, incomplete, or low-confidence matches automatically
|
||||
- confirms only matches with recent price plus matching form factor, speed, fiber type, wavelength and reach
|
||||
- confirmed matches are scheduled for a 30-day recheck
|
||||
- live deployment:
|
||||
- synced changed files to Erik `/opt/tip`
|
||||
- `pnpm -C packages/api build` passed on Erik
|
||||
- `pnpm -C packages/scraper build` passed on Erik
|
||||
- restarted `tip-api` and `tip-scraper-daemon`
|
||||
- both processes are online
|
||||
- data cleanup performed on live DB without heavy crawling:
|
||||
- pending + due re-research candidates processed: `144103`
|
||||
- rejected fiber mismatch: `958`
|
||||
- rejected reach mismatch: `82128`
|
||||
- rejected missing reach evidence: `31151`
|
||||
- rejected wavelength mismatch: `29865`
|
||||
- rejected low confidence: `1`
|
||||
- old approved rows audited:
|
||||
- kept/confirmed: `1986`
|
||||
- rejected: `4000`
|
||||
- old auto-approved rows audited:
|
||||
- kept/confirmed: `32080`
|
||||
- rejected reach mismatch: `260`
|
||||
- final live equivalence status:
|
||||
- `pending`: `0`
|
||||
- `approved`: `1986`
|
||||
- `auto_approved`: `32080`
|
||||
- `rejected`: `148367`
|
||||
- due re-research now: `0`
|
||||
- scheduled 30-day rechecks: `34066`
|
||||
- final verification counters after reconcile:
|
||||
- `competitor_verified`: `11137`
|
||||
- `fully_verified`: `290`
|
||||
- `price_verified`: `11549`
|
||||
- `image_verified`: `10629`
|
||||
- `details_verified`: `9538`
|
||||
- operational note:
|
||||
- no new crawler wave was started for this cleanup
|
||||
- the run used existing crawled specs/prices and strict deterministic product-evidence checks
|
||||
- next improvement should be targeted crawler enrichment for products rejected due to missing reach/details, preferably on Proxmox/Pi workers rather than Erik
|
||||
|
||||
- TIP Flexoptix + FS.com price/image revalidation completed on 2026-05-09:
|
||||
- live root cause:
|
||||
- scraper runs had set `transceivers.price_verified`, but `price_observations.is_verified` stayed false
|
||||
|
||||
98
sync/history/2026-05-09-tip-equivalence-auto-research.md
Normal file
98
sync/history/2026-05-09-tip-equivalence-auto-research.md
Normal file
@ -0,0 +1,98 @@
|
||||
# TIP Equivalence Automated Research
|
||||
|
||||
Date: 2026-05-09
|
||||
|
||||
## Goal
|
||||
|
||||
Remove manual equivalence validation as a required workflow for TIP product verification. Low-confidence matches should be researched and either confirmed or rejected automatically.
|
||||
|
||||
## Findings
|
||||
|
||||
- The dashboard had a large `Approved + Re-Research` backlog.
|
||||
- `approve-all` was marking low-confidence rows approved, then setting `re_research_due_at`.
|
||||
- The re-research worker only checked whether the competitor still had a recent price; it did not re-check technical equivalence quality.
|
||||
- Many low-confidence rows were objectively bad matches:
|
||||
- reach mismatches
|
||||
- wavelength mismatches
|
||||
- missing reach evidence
|
||||
- fiber mismatches
|
||||
|
||||
## Code Changes
|
||||
|
||||
- `packages/api/src/routes/review.ts`
|
||||
- `approve-all` now approves only confidence >= `0.73`.
|
||||
- Weak rows stay pending and get queued for automated research.
|
||||
- `needs_research` includes pending research rows.
|
||||
- Added `POST /api/review/run-research`.
|
||||
|
||||
- `packages/scraper/src/scheduler.ts`
|
||||
- Added deterministic equivalence evaluator.
|
||||
- Confirms matches only when there is:
|
||||
- recent competitor price
|
||||
- matching form factor
|
||||
- matching speed
|
||||
- matching fiber type
|
||||
- matching wavelength
|
||||
- compatible reach
|
||||
- confidence >= `0.73`
|
||||
- Rejects stale, incomplete, contradictory, or low-confidence matches automatically.
|
||||
- Confirmed matches get a 30-day recheck.
|
||||
|
||||
## Deployment
|
||||
|
||||
- Synced code to Erik `/opt/tip`.
|
||||
- Built on Erik:
|
||||
- `pnpm -C packages/api build`
|
||||
- `pnpm -C packages/scraper build`
|
||||
- Restarted:
|
||||
- `tip-api`
|
||||
- `tip-scraper-daemon`
|
||||
- Both were online after restart.
|
||||
|
||||
## Live Data Cleanup
|
||||
|
||||
No heavy crawler wave was started. Cleanup used existing crawled specs and price observations.
|
||||
|
||||
Processed pending + due re-research:
|
||||
|
||||
- total: `144103`
|
||||
- rejected fiber mismatch: `958`
|
||||
- rejected reach mismatch: `82128`
|
||||
- rejected missing reach evidence: `31151`
|
||||
- rejected wavelength mismatch: `29865`
|
||||
- rejected low confidence: `1`
|
||||
|
||||
Processed old approved rows:
|
||||
|
||||
- confirmed: `1986`
|
||||
- rejected fiber mismatch: `184`
|
||||
- rejected reach mismatch: `1704`
|
||||
- rejected missing reach evidence: `1117`
|
||||
- rejected wavelength mismatch: `993`
|
||||
- rejected low confidence: `2`
|
||||
|
||||
Processed old auto-approved rows:
|
||||
|
||||
- confirmed: `32080`
|
||||
- rejected reach mismatch: `260`
|
||||
|
||||
## Final State
|
||||
|
||||
- pending: `0`
|
||||
- approved: `1986`
|
||||
- auto_approved: `32080`
|
||||
- rejected: `148367`
|
||||
- due re-research now: `0`
|
||||
- scheduled 30-day rechecks: `34066`
|
||||
|
||||
Product verification counters after reconcile:
|
||||
|
||||
- competitor_verified: `11137`
|
||||
- fully_verified: `290`
|
||||
- price_verified: `11549`
|
||||
- image_verified: `10629`
|
||||
- details_verified: `9538`
|
||||
|
||||
## Next Work
|
||||
|
||||
Products rejected for missing reach/details should be enriched by targeted vendor crawlers. Keep Erik light; use Proxmox/Pi workers for heavier crawl waves. TIPLLM-only policy remains active for crawler/robot research and learning records.
|
||||
Loading…
x
Reference in New Issue
Block a user