From 43b72501804f9a4b8ad1866701410003eef5c882 Mon Sep 17 00:00:00 2001
From: Rene Fichtmueller <renefichtmueller@MacStudio-von-Rene-8.local>
Date: Sat, 9 May 2026 07:48:11 +0200
Subject: [PATCH] fix: automate equivalence research review queue

---
 packages/api/src/routes/review.ts             |  76 +++-
 packages/scraper/src/scheduler.ts             | 325 ++++++++++++++++--
 sync/CURRENT.md                               |  59 +++-
 ...026-05-09-tip-equivalence-auto-research.md |  98 ++++++
 4 files changed, 507 insertions(+), 51 deletions(-)
 create mode 100644 sync/history/2026-05-09-tip-equivalence-auto-research.md
diff --git a/packages/api/src/routes/review.ts b/packages/api/src/routes/review.ts
index c216917..3efc647 100644
--- a/packages/api/src/routes/review.ts
+++ b/packages/api/src/routes/review.ts
@@ -7,6 +7,7 @@
  * POST /api/review/equivalences/:id/reject  — reject with optional reason
  * PATCH /api/review/equivalences/:id        — edit match_notes
  * POST /api/review/run-matcher             — trigger equivalence job immediately
+ * POST /api/review/run-research            — trigger equivalence research job immediately
  */
 import { Router, Request, Response } from "express";
 import { pool } from "../db/client";
@@ -27,6 +28,17 @@ async function checkAndSetFullyVerified(transceiverId: string): Promise<boolean>
   return (result.rowCount ?? 0) > 0;
 }
 
+async function queueMaintenanceJob(name: string): Promise<void> {
+  await pool.query(
+    `
+    INSERT INTO pgboss.job (name, data, priority)
+    VALUES ($1, '{}', 0)
+    ON CONFLICT DO NOTHING
+  `,
+    [name]
+  );
+}
+
 export const reviewRouter = Router();
 
 // ── GET /api/review/equivalences ──────────────────────────────────────────────
@@ -51,7 +63,7 @@ reviewRouter.get("/equivalences", async (req: Request, res: Response) => {
     params = [limit, offset];
     limitIdx = 1; offsetIdx = 2;
   } else if (status === "needs_research") {
-    where = `WHERE eq.status IN ('approved','auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW()`;
+    where = `WHERE eq.status IN ('pending','approved','auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW()`;
     params = [limit, offset];
     limitIdx = 1; offsetIdx = 2;
   } else {
@@ -143,7 +155,7 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) =>
       SUM(CASE WHEN status = 'approved'      THEN 1 ELSE 0 END) AS approved,
       SUM(CASE WHEN status = 'auto_approved' THEN 1 ELSE 0 END) AS auto_approved,
       SUM(CASE WHEN status = 'rejected'      THEN 1 ELSE 0 END) AS rejected,
-      SUM(CASE WHEN status IN ('approved','auto_approved')
+      SUM(CASE WHEN status IN ('pending','approved','auto_approved')
                AND re_research_due_at IS NOT NULL
                AND re_research_due_at <= NOW()        THEN 1 ELSE 0 END) AS needs_research,
       COUNT(*)                                                            AS total
@@ -254,9 +266,8 @@ reviewRouter.patch("/equivalences/:id", async (req: Request, res: Response) => {
 });
 
 // ── POST /api/review/equivalences/approve-all ─────────────────────────────────
-// Approve ALL pending equivalences regardless of confidence.
-// Low-confidence ones (< 0.73) get re_research_due_at = NOW() so the nightly
-// re-research job will re-verify them one by one.
+// Approve only high-confidence pending equivalences. Weak candidates are queued
+// for automated research instead of being marked as approved.
 reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Response) => {
   const reviewer = (req.body as { reviewer?: string }).reviewer || "approve-all";
   const RE_RESEARCH_THRESHOLD = 0.73;
@@ -271,15 +282,31 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons
 
   for (const row of candidates.rows) {
     const needsReSearch = parseFloat(row.confidence) < RE_RESEARCH_THRESHOLD;
+    if (needsReSearch) {
+      await pool.query(`
+        UPDATE transceiver_equivalences
+        SET status             = 'pending',
+            reviewed_by        = $2,
+            reviewed_at        = NULL,
+            re_research_due_at = NOW(),
+            re_researched_at   = NULL,
+            match_notes        = CONCAT(COALESCE(match_notes, ''), E'\n[Automated research queued ' || NOW()::date || ': confidence below approval threshold]')
+        WHERE id = $1
+      `, [row.id, reviewer]);
+
+      scheduledReSearch++;
+      continue;
+    }
+
     await pool.query(`
       UPDATE transceiver_equivalences
       SET status             = 'approved',
           reviewed_by        = $2,
           reviewed_at        = NOW(),
-          re_research_due_at = $3,
+          re_research_due_at = NULL,
           re_researched_at   = NULL
       WHERE id = $1
-    `, [row.id, reviewer, needsReSearch ? new Date() : null]);
+    `, [row.id, reviewer]);
 
     await pool.query(`
       UPDATE transceivers
@@ -289,11 +316,20 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons
 
     const earned = await checkAndSetFullyVerified(row.flexoptix_id);
     if (earned) fullyVerified++;
-    if (needsReSearch) scheduledReSearch++;
     approved++;
   }
 
-  res.json({ success: true, approved, fully_verified_earned: fullyVerified, scheduled_re_research: scheduledReSearch });
+  if (scheduledReSearch > 0) {
+    await queueMaintenanceJob("maintenance:re-research-equivalences");
+  }
+
+  res.json({
+    success: true,
+    approved,
+    fully_verified_earned: fullyVerified,
+    scheduled_re_research: scheduledReSearch,
+    left_pending: scheduledReSearch,
+  });
 });
 
 // ── POST /api/review/equivalences/bulk-approve ────────────────────────────────
@@ -315,7 +351,11 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon
   for (const row of candidates.rows) {
     await pool.query(`
       UPDATE transceiver_equivalences
-      SET status = 'approved', reviewed_by = $2, reviewed_at = NOW()
+      SET status = 'approved',
+          reviewed_by = $2,
+          reviewed_at = NOW(),
+          re_research_due_at = NULL,
+          re_researched_at = NULL
       WHERE id = $1
     `, [row.id, reviewer]);
 
@@ -336,13 +376,15 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon
 // ── POST /api/review/run-matcher ──────────────────────────────────────────────
 // Trigger the equivalence matcher immediately (admin action)
 reviewRouter.post("/run-matcher", async (_req: Request, res: Response) => {
-  // Queue the job via pg-boss — import from scraper's db util won't work here,
-  // so we fire directly via DB insert into pg-boss queue
-  await pool.query(`
-    INSERT INTO pgboss.job (name, data, priority)
-    VALUES ('maintenance:find-equivalences', '{}', 0)
-    ON CONFLICT DO NOTHING
-  `);
+  await queueMaintenanceJob("maintenance:find-equivalences");
 
   res.json({ success: true, message: "Equivalence matcher queued" });
 });
+
+// ── POST /api/review/run-research ────────────────────────────────────────────
+// Trigger the automated equivalence research worker immediately.
+reviewRouter.post("/run-research", async (_req: Request, res: Response) => {
+  await queueMaintenanceJob("maintenance:re-research-equivalences");
+
+  res.json({ success: true, message: "Equivalence research queued" });
+});
diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts
index 927fd9c..be1c5ee 100644
--- a/packages/scraper/src/scheduler.ts
+++ b/packages/scraper/src/scheduler.ts
@@ -44,6 +44,181 @@ config({ path: join(__dirname, "..", "..", "..", ".env") });
 
 const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`;
 
+type EquivalenceProduct = {
+  part_number?: string | null;
+  standard_name?: string | null;
+  form_factor?: string | null;
+  speed_gbps?: number | string | null;
+  fiber_type?: string | null;
+  reach_meters?: number | string | null;
+  wavelengths?: string | null;
+  connector?: string | null;
+};
+
+type EquivalenceResearchResult = {
+  decision: "approve" | "reject";
+  confidence: number;
+  basis: string[];
+  reasons: string[];
+  rejectReason?: string;
+};
+
+function normalizeEquivalenceText(value: unknown): string | null {
+  if (value === null || value === undefined) return null;
+  const text = String(value).trim().toUpperCase();
+  return text.length > 0 ? text : null;
+}
+
+function numericEquivalenceValue(value: unknown): number | null {
+  if (value === null || value === undefined || value === "") return null;
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : null;
+}
+
+function extractPrimaryNm(wavelengths: unknown): number | null {
+  if (!wavelengths) return null;
+  const match = String(wavelengths).match(/(\d{3,4})/);
+  return match ? parseInt(match[1], 10) : null;
+}
+
+function evaluateEquivalenceResearch(
+  fx: EquivalenceProduct,
+  cp: EquivalenceProduct,
+  hasRecentPrice: boolean,
+): EquivalenceResearchResult {
+  const basis: string[] = [];
+  const reasons: string[] = [];
+  let score = 0;
+
+  const fxForm = normalizeEquivalenceText(fx.form_factor);
+  const cpForm = normalizeEquivalenceText(cp.form_factor);
+  const fxSpeed = numericEquivalenceValue(fx.speed_gbps);
+  const cpSpeed = numericEquivalenceValue(cp.speed_gbps);
+  const fxStandard = normalizeEquivalenceText(fx.standard_name);
+  const cpStandard = normalizeEquivalenceText(cp.standard_name);
+  const fxFiber = normalizeEquivalenceText(fx.fiber_type);
+  const cpFiber = normalizeEquivalenceText(cp.fiber_type);
+  const fxReach = numericEquivalenceValue(fx.reach_meters);
+  const cpReach = numericEquivalenceValue(cp.reach_meters);
+  const fxNm = extractPrimaryNm(fx.wavelengths);
+  const cpNm = extractPrimaryNm(cp.wavelengths);
+
+  if (!hasRecentPrice) {
+    reasons.push("no recent competitor price observation");
+    return {
+      decision: "reject",
+      confidence: 0,
+      basis,
+      reasons,
+      rejectReason: "automated research: competitor has no recent price observation",
+    };
+  }
+
+  if (fxForm && cpForm && fxForm === cpForm) {
+    score += 25;
+    basis.push("form_factor");
+  } else {
+    reasons.push("form factor mismatch or missing");
+  }
+
+  if (fxSpeed !== null && cpSpeed !== null && fxSpeed === cpSpeed) {
+    score += 20;
+    basis.push("speed_gbps");
+  } else {
+    reasons.push("speed mismatch or missing");
+  }
+
+  if (fxStandard && cpStandard && fxStandard === cpStandard) {
+    score += 30;
+    basis.push("standard_name");
+  } else {
+    reasons.push("standard name not identical");
+  }
+
+  if (fxNm !== null && cpNm !== null) {
+    if (Math.abs(fxNm - cpNm) <= 15) {
+      score += 20;
+      basis.push(`wavelength_${fxNm}nm`);
+    } else {
+      reasons.push(`wavelength mismatch ${fxNm}nm vs ${cpNm}nm`);
+      score -= 20;
+    }
+  } else {
+    reasons.push("wavelength missing");
+  }
+
+  if (fxFiber && cpFiber) {
+    if (fxFiber === cpFiber) {
+      score += 10;
+      basis.push("fiber_type");
+    } else {
+      reasons.push(`fiber mismatch ${fxFiber} vs ${cpFiber}`);
+      score -= 15;
+    }
+  } else {
+    reasons.push("fiber type missing");
+  }
+
+  if (fxReach !== null && cpReach !== null && fxReach > 0 && cpReach > 0) {
+    const ratio = Math.min(fxReach, cpReach) / Math.max(fxReach, cpReach);
+    if (ratio >= 0.85) {
+      score += 10;
+      basis.push("reach");
+    } else {
+      reasons.push(`reach mismatch ${fxReach}m vs ${cpReach}m`);
+      score -= 15;
+    }
+  } else {
+    reasons.push("reach missing");
+  }
+
+  const confidence = Math.max(0, Math.min(1, score / 115));
+  const criticalMismatch = reasons.some((reason) =>
+    reason.startsWith("wavelength mismatch") ||
+    reason.startsWith("fiber mismatch") ||
+    reason.startsWith("reach mismatch") ||
+    reason.startsWith("form factor mismatch") ||
+    reason.startsWith("speed mismatch")
+  );
+  const missingCriticalEvidence = reasons.some((reason) =>
+    reason === "wavelength missing" ||
+    reason === "fiber type missing" ||
+    reason === "reach missing"
+  );
+
+  if (criticalMismatch) {
+    return {
+      decision: "reject",
+      confidence,
+      basis,
+      reasons,
+      rejectReason: `automated research: technical mismatch (${reasons.join("; ")})`,
+    };
+  }
+
+  if (missingCriticalEvidence) {
+    return {
+      decision: "reject",
+      confidence,
+      basis,
+      reasons,
+      rejectReason: `automated research: insufficient technical evidence (${reasons.join("; ")})`,
+    };
+  }
+
+  if (confidence >= 0.73) {
+    return { decision: "approve", confidence, basis, reasons };
+  }
+
+  return {
+    decision: "reject",
+    confidence,
+    basis,
+    reasons,
+    rejectReason: `automated research: confidence ${confidence.toFixed(3)} below approval threshold`,
+  };
+}
+
 export async function createScheduler(): Promise<PgBoss> {
   const boss = new PgBoss({
     connectionString,
@@ -2667,52 +2842,105 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
     }
   });
 
-  // ── Re-research approved equivalences ────────────────────────────────────────
-  // Processes up to 200 approved equivalences per day that have re_research_due_at <= NOW().
-  // Re-runs the confidence check: if competitor still has recent prices and specs still match,
-  // the approval is confirmed (re_researched_at = NOW(), next check in 30 days).
-  // If confidence drops or competitor has no recent price: reverts to pending.
+  // ── Re-research equivalences ────────────────────────────────────────────────
+  // Confirms only well-evidenced matches. Weak, stale, incomplete, or technically
+  // contradictory matches are rejected automatically instead of going back to a
+  // manual queue.
   await boss.work("maintenance:re-research-equivalences", async () => {
     const { pool } = await import("./utils/db");
     const ts = new Date().toISOString();
-    console.log(`[${ts}] Running: Re-research approved equivalences`);
+    const batchLimit = Math.max(1, Math.min(10000, parseInt(process.env["EQUIVALENCE_RESEARCH_BATCH_LIMIT"] || "2000", 10)));
+    console.log(`[${ts}] Running: Equivalence automated research`);
 
     const batch = await pool.query(`
-      SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.confidence,
-             fx.form_factor, fx.speed_gbps, fx.standard_name, fx.fiber_type,
-             fx.reach_meters, fx.wavelengths
+      SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.status, eq.confidence,
+             fx.part_number AS fx_part_number,
+             fx.form_factor AS fx_form_factor,
+             fx.speed_gbps AS fx_speed_gbps,
+             fx.standard_name AS fx_standard_name,
+             fx.fiber_type AS fx_fiber_type,
+             fx.reach_meters AS fx_reach_meters,
+             fx.wavelengths AS fx_wavelengths,
+             fx.connector AS fx_connector,
+             cp.part_number AS cp_part_number,
+             cp.form_factor AS cp_form_factor,
+             cp.speed_gbps AS cp_speed_gbps,
+             cp.standard_name AS cp_standard_name,
+             cp.fiber_type AS cp_fiber_type,
+             cp.reach_meters AS cp_reach_meters,
+             cp.wavelengths AS cp_wavelengths,
+             cp.connector AS cp_connector,
+             cpv.name AS competitor_vendor,
+             (
+               SELECT COUNT(*)
+               FROM price_observations po
+               WHERE po.transceiver_id = eq.competitor_id
+                 AND po.time > NOW() - INTERVAL '45 days'
+             ) AS recent_price_count
       FROM transceiver_equivalences eq
       JOIN transceivers fx ON eq.flexoptix_id = fx.id
-      WHERE eq.status IN ('approved', 'auto_approved')
+      JOIN transceivers cp ON eq.competitor_id = cp.id
+      JOIN vendors cpv ON cpv.id = cp.vendor_id
+      WHERE eq.status IN ('pending', 'approved', 'auto_approved')
         AND eq.re_research_due_at IS NOT NULL
         AND eq.re_research_due_at <= NOW()
       ORDER BY eq.re_research_due_at ASC
-      LIMIT 200
-    `);
+      LIMIT $1
+    `, [batchLimit]);
 
     let confirmed = 0;
-    let reverted = 0;
+    let rejected = 0;
 
     for (const eq of batch.rows) {
-      // Check if competitor still has a recent price observation
-      const priceCheck = await pool.query(`
-        SELECT COUNT(*) AS cnt
-        FROM price_observations
-        WHERE transceiver_id = $1 AND time > NOW() - INTERVAL '45 days'
-      `, [eq.competitor_id]);
+      const research = evaluateEquivalenceResearch(
+        {
+          part_number: eq.fx_part_number,
+          form_factor: eq.fx_form_factor,
+          speed_gbps: eq.fx_speed_gbps,
+          standard_name: eq.fx_standard_name,
+          fiber_type: eq.fx_fiber_type,
+          reach_meters: eq.fx_reach_meters,
+          wavelengths: eq.fx_wavelengths,
+          connector: eq.fx_connector,
+        },
+        {
+          part_number: eq.cp_part_number,
+          form_factor: eq.cp_form_factor,
+          speed_gbps: eq.cp_speed_gbps,
+          standard_name: eq.cp_standard_name,
+          fiber_type: eq.cp_fiber_type,
+          reach_meters: eq.cp_reach_meters,
+          wavelengths: eq.cp_wavelengths,
+          connector: eq.cp_connector,
+        },
+        parseInt(eq.recent_price_count, 10) > 0,
+      );
 
-      const hasRecentPrice = parseInt(priceCheck.rows[0].cnt, 10) > 0;
-
-      if (!hasRecentPrice) {
-        // Competitor no longer carries this — revert to pending for manual review
+      if (research.decision === "reject") {
         await pool.query(`
           UPDATE transceiver_equivalences
-          SET status = 'pending', re_research_due_at = NULL, re_researched_at = NULL,
-              match_notes = CONCAT(match_notes, E'\n[Re-research ' || NOW()::date || ': no recent price — reverted to pending]')
+          SET status = 'rejected',
+              confidence = $2,
+              match_basis = $3,
+              reject_reason = $4,
+              reviewed_by = 'automated-research',
+              reviewed_at = NOW(),
+              re_research_due_at = NULL,
+              re_researched_at = NOW(),
+              match_notes = CONCAT(
+                COALESCE(match_notes, ''),
+                E'\n[Automated research ' || NOW()::date || ': rejected; ' || $5 || ']'
+              ),
+              updated_at = NOW()
           WHERE id = $1
-        `, [eq.id]);
+        `, [
+          eq.id,
+          research.confidence,
+          research.basis,
+          research.rejectReason || "automated research: rejected",
+          research.reasons.join("; "),
+        ]);
 
-        // Reset competitor_verified if no other approved equivalence covers this transceiver
         await pool.query(`
           UPDATE transceivers
           SET competitor_verified = false, competitor_verified_at = NULL,
@@ -2726,20 +2954,51 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
             )
         `, [eq.flexoptix_id, eq.id]);
 
-        reverted++;
+        rejected++;
       } else {
-        // Still valid — confirm and schedule next re-research in 30 days
         await pool.query(`
           UPDATE transceiver_equivalences
-          SET re_researched_at   = NOW(),
-              re_research_due_at = NOW() + INTERVAL '30 days'
+          SET status = CASE WHEN status = 'pending' THEN 'auto_approved' ELSE status END,
+              confidence = $2,
+              match_basis = $3,
+              reviewed_by = COALESCE(reviewed_by, 'automated-research'),
+              reviewed_at = COALESCE(reviewed_at, NOW()),
+              reject_reason = NULL,
+              re_researched_at = NOW(),
+              re_research_due_at = NOW() + INTERVAL '30 days',
+              match_notes = CONCAT(
+                COALESCE(match_notes, ''),
+                E'\n[Automated research ' || NOW()::date || ': confirmed; basis: ' || $4 || ']'
+              ),
+              updated_at = NOW()
           WHERE id = $1
-        `, [eq.id]);
+        `, [eq.id, research.confidence, research.basis, research.basis.join(", ")]);
+
+        await pool.query(`
+          UPDATE transceivers
+          SET competitor_verified = true,
+              competitor_verified_at = COALESCE(competitor_verified_at, NOW())
+          WHERE id = $1 AND competitor_verified = false
+        `, [eq.flexoptix_id]);
+
         confirmed++;
       }
     }
 
-    console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`);
+    if (confirmed > 0) {
+      await pool.query(`
+        UPDATE transceivers
+        SET fully_verified = true,
+            fully_verified_at = COALESCE(fully_verified_at, NOW())
+        WHERE competitor_verified = true
+          AND price_verified = true
+          AND image_verified = true
+          AND details_verified = true
+          AND fully_verified = false
+      `);
+    }
+
+    console.log(`[equivalence-research] confirmed: ${confirmed}, rejected: ${rejected}, batch size: ${batch.rows.length}`);
   });
 
   // ══════════════════════════════════════════════════════════════════════
diff --git a/sync/CURRENT.md b/sync/CURRENT.md
index 25ea39e..3833126 100644
--- a/sync/CURRENT.md
+++ b/sync/CURRENT.md
@@ -1,9 +1,66 @@
 # Current TIP Sync State
 
-Updated: 2026-05-09 03:15 UTC
+Updated: 2026-05-09 05:45 UTC
 
 ## Newest Work
 
+- TIP automated equivalence research / manual queue cleanup completed on 2026-05-09:
+  - operator intent:
+    - products should be researched well enough that they do not need manual equivalence validation
+    - Erik must not be stressed by crawler-heavy work
+    - TIPLLM-only policy for crawler/robot research remains in force
+  - root cause found:
+    - `approve-all` approved low-confidence equivalences and only marked them for later re-research
+    - the re-research worker mostly checked whether a competitor still had a recent price
+    - it did not re-evaluate hard technical equivalence evidence such as reach, wavelength, fiber type, speed and form factor
+  - code changed:
+    - `packages/api/src/routes/review.ts`
+      - `approve-all` now approves only confidence >= `0.73`
+      - weak pending rows stay pending and are queued for automated research instead of being marked approved
+      - `needs_research` stats/listing now includes pending research rows
+      - added `POST /api/review/run-research`
+    - `packages/scraper/src/scheduler.ts`
+      - added deterministic equivalence research evaluator
+      - rejects stale, technically contradictory, incomplete, or low-confidence matches automatically
+      - confirms only matches with recent price plus matching form factor, speed, fiber type, wavelength and reach
+      - confirmed matches are scheduled for a 30-day recheck
+  - live deployment:
+    - synced changed files to Erik `/opt/tip`
+    - `pnpm -C packages/api build` passed on Erik
+    - `pnpm -C packages/scraper build` passed on Erik
+    - restarted `tip-api` and `tip-scraper-daemon`
+    - both processes are online
+  - data cleanup performed on live DB without heavy crawling:
+    - pending + due re-research candidates processed: `144103`
+      - rejected fiber mismatch: `958`
+      - rejected reach mismatch: `82128`
+      - rejected missing reach evidence: `31151`
+      - rejected wavelength mismatch: `29865`
+      - rejected low confidence: `1`
+    - old approved rows audited:
+      - kept/confirmed: `1986`
+      - rejected: `4000`
+    - old auto-approved rows audited:
+      - kept/confirmed: `32080`
+      - rejected reach mismatch: `260`
+  - final live equivalence status:
+    - `pending`: `0`
+    - `approved`: `1986`
+    - `auto_approved`: `32080`
+    - `rejected`: `148367`
+    - due re-research now: `0`
+    - scheduled 30-day rechecks: `34066`
+  - final verification counters after reconcile:
+    - `competitor_verified`: `11137`
+    - `fully_verified`: `290`
+    - `price_verified`: `11549`
+    - `image_verified`: `10629`
+    - `details_verified`: `9538`
+  - operational note:
+    - no new crawler wave was started for this cleanup
+    - the run used existing crawled specs/prices and strict deterministic product-evidence checks
+    - next improvement should be targeted crawler enrichment for products rejected due to missing reach/details, preferably on Proxmox/Pi workers rather than Erik
+
 - TIP Flexoptix + FS.com price/image revalidation completed on 2026-05-09:
   - live root cause:
     - scraper runs had set `transceivers.price_verified`, but `price_observations.is_verified` stayed false
diff --git a/sync/history/2026-05-09-tip-equivalence-auto-research.md b/sync/history/2026-05-09-tip-equivalence-auto-research.md
new file mode 100644
index 0000000..cdad42b
--- /dev/null
+++ b/sync/history/2026-05-09-tip-equivalence-auto-research.md
@@ -0,0 +1,98 @@
+# TIP Equivalence Automated Research
+
+Date: 2026-05-09
+
+## Goal
+
+Remove manual equivalence validation as a required workflow for TIP product verification. Low-confidence matches should be researched and either confirmed or rejected automatically.
+
+## Findings
+
+- The dashboard had a large `Approved + Re-Research` backlog.
+- `approve-all` was marking low-confidence rows approved, then setting `re_research_due_at`.
+- The re-research worker only checked whether the competitor still had a recent price; it did not re-check technical equivalence quality.
+- Many low-confidence rows were objectively bad matches:
+  - reach mismatches
+  - wavelength mismatches
+  - missing reach evidence
+  - fiber mismatches
+
+## Code Changes
+
+- `packages/api/src/routes/review.ts`
+  - `approve-all` now approves only confidence >= `0.73`.
+  - Weak rows stay pending and get queued for automated research.
+  - `needs_research` includes pending research rows.
+  - Added `POST /api/review/run-research`.
+
+- `packages/scraper/src/scheduler.ts`
+  - Added deterministic equivalence evaluator.
+  - Confirms matches only when there is:
+    - recent competitor price
+    - matching form factor
+    - matching speed
+    - matching fiber type
+    - matching wavelength
+    - compatible reach
+    - confidence >= `0.73`
+  - Rejects stale, incomplete, contradictory, or low-confidence matches automatically.
+  - Confirmed matches get a 30-day recheck.
+
+## Deployment
+
+- Synced code to Erik `/opt/tip`.
+- Built on Erik:
+  - `pnpm -C packages/api build`
+  - `pnpm -C packages/scraper build`
+- Restarted:
+  - `tip-api`
+  - `tip-scraper-daemon`
+- Both were online after restart.
+
+## Live Data Cleanup
+
+No heavy crawler wave was started. Cleanup used existing crawled specs and price observations.
+
+Processed pending + due re-research:
+
+- total: `144103`
+- rejected fiber mismatch: `958`
+- rejected reach mismatch: `82128`
+- rejected missing reach evidence: `31151`
+- rejected wavelength mismatch: `29865`
+- rejected low confidence: `1`
+
+Processed old approved rows:
+
+- confirmed: `1986`
+- rejected fiber mismatch: `184`
+- rejected reach mismatch: `1704`
+- rejected missing reach evidence: `1117`
+- rejected wavelength mismatch: `993`
+- rejected low confidence: `2`
+
+Processed old auto-approved rows:
+
+- confirmed: `32080`
+- rejected reach mismatch: `260`
+
+## Final State
+
+- pending: `0`
+- approved: `1986`
+- auto_approved: `32080`
+- rejected: `148367`
+- due re-research now: `0`
+- scheduled 30-day rechecks: `34066`
+
+Product verification counters after reconcile:
+
+- competitor_verified: `11137`
+- fully_verified: `290`
+- price_verified: `11549`
+- image_verified: `10629`
+- details_verified: `9538`
+
+## Next Work
+
+Products rejected for missing reach/details should be enriched by targeted vendor crawlers. Keep Erik light; use Proxmox/Pi workers for heavier crawl waves. TIPLLM-only policy remains active for crawler/robot research and learning records.