From 43b72501804f9a4b8ad1866701410003eef5c882 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 9 May 2026 07:48:11 +0200 Subject: [PATCH] fix: automate equivalence research review queue --- packages/api/src/routes/review.ts | 76 +++- packages/scraper/src/scheduler.ts | 325 ++++++++++++++++-- sync/CURRENT.md | 59 +++- ...026-05-09-tip-equivalence-auto-research.md | 98 ++++++ 4 files changed, 507 insertions(+), 51 deletions(-) create mode 100644 sync/history/2026-05-09-tip-equivalence-auto-research.md diff --git a/packages/api/src/routes/review.ts b/packages/api/src/routes/review.ts index c216917..3efc647 100644 --- a/packages/api/src/routes/review.ts +++ b/packages/api/src/routes/review.ts @@ -7,6 +7,7 @@ * POST /api/review/equivalences/:id/reject — reject with optional reason * PATCH /api/review/equivalences/:id — edit match_notes * POST /api/review/run-matcher — trigger equivalence job immediately + * POST /api/review/run-research — trigger equivalence research job immediately */ import { Router, Request, Response } from "express"; import { pool } from "../db/client"; @@ -27,6 +28,17 @@ async function checkAndSetFullyVerified(transceiverId: string): Promise return (result.rowCount ?? 0) > 0; } +async function queueMaintenanceJob(name: string): Promise { + await pool.query( + ` + INSERT INTO pgboss.job (name, data, priority) + VALUES ($1, '{}', 0) + ON CONFLICT DO NOTHING + `, + [name] + ); +} + export const reviewRouter = Router(); // ── GET /api/review/equivalences ────────────────────────────────────────────── @@ -51,7 +63,7 @@ reviewRouter.get("/equivalences", async (req: Request, res: Response) => { params = [limit, offset]; limitIdx = 1; offsetIdx = 2; } else if (status === "needs_research") { - where = `WHERE eq.status IN ('approved','auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW()`; + where = `WHERE eq.status IN ('pending','approved','auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW()`; params = [limit, offset]; limitIdx = 1; offsetIdx = 2; } else { @@ -143,7 +155,7 @@ reviewRouter.get("/equivalences/stats", async (_req: Request, res: Response) => SUM(CASE WHEN status = 'approved' THEN 1 ELSE 0 END) AS approved, SUM(CASE WHEN status = 'auto_approved' THEN 1 ELSE 0 END) AS auto_approved, SUM(CASE WHEN status = 'rejected' THEN 1 ELSE 0 END) AS rejected, - SUM(CASE WHEN status IN ('approved','auto_approved') + SUM(CASE WHEN status IN ('pending','approved','auto_approved') AND re_research_due_at IS NOT NULL AND re_research_due_at <= NOW() THEN 1 ELSE 0 END) AS needs_research, COUNT(*) AS total @@ -254,9 +266,8 @@ reviewRouter.patch("/equivalences/:id", async (req: Request, res: Response) => { }); // ── POST /api/review/equivalences/approve-all ───────────────────────────────── -// Approve ALL pending equivalences regardless of confidence. -// Low-confidence ones (< 0.73) get re_research_due_at = NOW() so the nightly -// re-research job will re-verify them one by one. +// Approve only high-confidence pending equivalences. Weak candidates are queued +// for automated research instead of being marked as approved. reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Response) => { const reviewer = (req.body as { reviewer?: string }).reviewer || "approve-all"; const RE_RESEARCH_THRESHOLD = 0.73; @@ -271,15 +282,31 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons for (const row of candidates.rows) { const needsReSearch = parseFloat(row.confidence) < RE_RESEARCH_THRESHOLD; + if (needsReSearch) { + await pool.query(` + UPDATE transceiver_equivalences + SET status = 'pending', + reviewed_by = $2, + reviewed_at = NULL, + re_research_due_at = NOW(), + re_researched_at = NULL, + match_notes = CONCAT(COALESCE(match_notes, ''), E'\n[Automated research queued ' || NOW()::date || ': confidence below approval threshold]') + WHERE id = $1 + `, [row.id, reviewer]); + + scheduledReSearch++; + continue; + } + await pool.query(` UPDATE transceiver_equivalences SET status = 'approved', reviewed_by = $2, reviewed_at = NOW(), - re_research_due_at = $3, + re_research_due_at = NULL, re_researched_at = NULL WHERE id = $1 - `, [row.id, reviewer, needsReSearch ? new Date() : null]); + `, [row.id, reviewer]); await pool.query(` UPDATE transceivers @@ -289,11 +316,20 @@ reviewRouter.post("/equivalences/approve-all", async (req: Request, res: Respons const earned = await checkAndSetFullyVerified(row.flexoptix_id); if (earned) fullyVerified++; - if (needsReSearch) scheduledReSearch++; approved++; } - res.json({ success: true, approved, fully_verified_earned: fullyVerified, scheduled_re_research: scheduledReSearch }); + if (scheduledReSearch > 0) { + await queueMaintenanceJob("maintenance:re-research-equivalences"); + } + + res.json({ + success: true, + approved, + fully_verified_earned: fullyVerified, + scheduled_re_research: scheduledReSearch, + left_pending: scheduledReSearch, + }); }); // ── POST /api/review/equivalences/bulk-approve ──────────────────────────────── @@ -315,7 +351,11 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon for (const row of candidates.rows) { await pool.query(` UPDATE transceiver_equivalences - SET status = 'approved', reviewed_by = $2, reviewed_at = NOW() + SET status = 'approved', + reviewed_by = $2, + reviewed_at = NOW(), + re_research_due_at = NULL, + re_researched_at = NULL WHERE id = $1 `, [row.id, reviewer]); @@ -336,13 +376,15 @@ reviewRouter.post("/equivalences/bulk-approve", async (req: Request, res: Respon // ── POST /api/review/run-matcher ────────────────────────────────────────────── // Trigger the equivalence matcher immediately (admin action) reviewRouter.post("/run-matcher", async (_req: Request, res: Response) => { - // Queue the job via pg-boss — import from scraper's db util won't work here, - // so we fire directly via DB insert into pg-boss queue - await pool.query(` - INSERT INTO pgboss.job (name, data, priority) - VALUES ('maintenance:find-equivalences', '{}', 0) - ON CONFLICT DO NOTHING - `); + await queueMaintenanceJob("maintenance:find-equivalences"); res.json({ success: true, message: "Equivalence matcher queued" }); }); + +// ── POST /api/review/run-research ──────────────────────────────────────────── +// Trigger the automated equivalence research worker immediately. +reviewRouter.post("/run-research", async (_req: Request, res: Response) => { + await queueMaintenanceJob("maintenance:re-research-equivalences"); + + res.json({ success: true, message: "Equivalence research queued" }); +}); diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index 927fd9c..be1c5ee 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -44,6 +44,181 @@ config({ path: join(__dirname, "..", "..", "..", ".env") }); const connectionString = `postgres://${process.env.POSTGRES_USER || "tip"}:${process.env.POSTGRES_PASSWORD || "tip_dev_2026"}@${process.env.POSTGRES_HOST || "localhost"}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB || "transceiver_db"}`; +type EquivalenceProduct = { + part_number?: string | null; + standard_name?: string | null; + form_factor?: string | null; + speed_gbps?: number | string | null; + fiber_type?: string | null; + reach_meters?: number | string | null; + wavelengths?: string | null; + connector?: string | null; +}; + +type EquivalenceResearchResult = { + decision: "approve" | "reject"; + confidence: number; + basis: string[]; + reasons: string[]; + rejectReason?: string; +}; + +function normalizeEquivalenceText(value: unknown): string | null { + if (value === null || value === undefined) return null; + const text = String(value).trim().toUpperCase(); + return text.length > 0 ? text : null; +} + +function numericEquivalenceValue(value: unknown): number | null { + if (value === null || value === undefined || value === "") return null; + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : null; +} + +function extractPrimaryNm(wavelengths: unknown): number | null { + if (!wavelengths) return null; + const match = String(wavelengths).match(/(\d{3,4})/); + return match ? parseInt(match[1], 10) : null; +} + +function evaluateEquivalenceResearch( + fx: EquivalenceProduct, + cp: EquivalenceProduct, + hasRecentPrice: boolean, +): EquivalenceResearchResult { + const basis: string[] = []; + const reasons: string[] = []; + let score = 0; + + const fxForm = normalizeEquivalenceText(fx.form_factor); + const cpForm = normalizeEquivalenceText(cp.form_factor); + const fxSpeed = numericEquivalenceValue(fx.speed_gbps); + const cpSpeed = numericEquivalenceValue(cp.speed_gbps); + const fxStandard = normalizeEquivalenceText(fx.standard_name); + const cpStandard = normalizeEquivalenceText(cp.standard_name); + const fxFiber = normalizeEquivalenceText(fx.fiber_type); + const cpFiber = normalizeEquivalenceText(cp.fiber_type); + const fxReach = numericEquivalenceValue(fx.reach_meters); + const cpReach = numericEquivalenceValue(cp.reach_meters); + const fxNm = extractPrimaryNm(fx.wavelengths); + const cpNm = extractPrimaryNm(cp.wavelengths); + + if (!hasRecentPrice) { + reasons.push("no recent competitor price observation"); + return { + decision: "reject", + confidence: 0, + basis, + reasons, + rejectReason: "automated research: competitor has no recent price observation", + }; + } + + if (fxForm && cpForm && fxForm === cpForm) { + score += 25; + basis.push("form_factor"); + } else { + reasons.push("form factor mismatch or missing"); + } + + if (fxSpeed !== null && cpSpeed !== null && fxSpeed === cpSpeed) { + score += 20; + basis.push("speed_gbps"); + } else { + reasons.push("speed mismatch or missing"); + } + + if (fxStandard && cpStandard && fxStandard === cpStandard) { + score += 30; + basis.push("standard_name"); + } else { + reasons.push("standard name not identical"); + } + + if (fxNm !== null && cpNm !== null) { + if (Math.abs(fxNm - cpNm) <= 15) { + score += 20; + basis.push(`wavelength_${fxNm}nm`); + } else { + reasons.push(`wavelength mismatch ${fxNm}nm vs ${cpNm}nm`); + score -= 20; + } + } else { + reasons.push("wavelength missing"); + } + + if (fxFiber && cpFiber) { + if (fxFiber === cpFiber) { + score += 10; + basis.push("fiber_type"); + } else { + reasons.push(`fiber mismatch ${fxFiber} vs ${cpFiber}`); + score -= 15; + } + } else { + reasons.push("fiber type missing"); + } + + if (fxReach !== null && cpReach !== null && fxReach > 0 && cpReach > 0) { + const ratio = Math.min(fxReach, cpReach) / Math.max(fxReach, cpReach); + if (ratio >= 0.85) { + score += 10; + basis.push("reach"); + } else { + reasons.push(`reach mismatch ${fxReach}m vs ${cpReach}m`); + score -= 15; + } + } else { + reasons.push("reach missing"); + } + + const confidence = Math.max(0, Math.min(1, score / 115)); + const criticalMismatch = reasons.some((reason) => + reason.startsWith("wavelength mismatch") || + reason.startsWith("fiber mismatch") || + reason.startsWith("reach mismatch") || + reason.startsWith("form factor mismatch") || + reason.startsWith("speed mismatch") + ); + const missingCriticalEvidence = reasons.some((reason) => + reason === "wavelength missing" || + reason === "fiber type missing" || + reason === "reach missing" + ); + + if (criticalMismatch) { + return { + decision: "reject", + confidence, + basis, + reasons, + rejectReason: `automated research: technical mismatch (${reasons.join("; ")})`, + }; + } + + if (missingCriticalEvidence) { + return { + decision: "reject", + confidence, + basis, + reasons, + rejectReason: `automated research: insufficient technical evidence (${reasons.join("; ")})`, + }; + } + + if (confidence >= 0.73) { + return { decision: "approve", confidence, basis, reasons }; + } + + return { + decision: "reject", + confidence, + basis, + reasons, + rejectReason: `automated research: confidence ${confidence.toFixed(3)} below approval threshold`, + }; +} + export async function createScheduler(): Promise { const boss = new PgBoss({ connectionString, @@ -2667,52 +2842,105 @@ export async function registerWorkers(boss: PgBoss): Promise { } }); - // ── Re-research approved equivalences ──────────────────────────────────────── - // Processes up to 200 approved equivalences per day that have re_research_due_at <= NOW(). - // Re-runs the confidence check: if competitor still has recent prices and specs still match, - // the approval is confirmed (re_researched_at = NOW(), next check in 30 days). - // If confidence drops or competitor has no recent price: reverts to pending. + // ── Re-research equivalences ──────────────────────────────────────────────── + // Confirms only well-evidenced matches. Weak, stale, incomplete, or technically + // contradictory matches are rejected automatically instead of going back to a + // manual queue. await boss.work("maintenance:re-research-equivalences", async () => { const { pool } = await import("./utils/db"); const ts = new Date().toISOString(); - console.log(`[${ts}] Running: Re-research approved equivalences`); + const batchLimit = Math.max(1, Math.min(10000, parseInt(process.env["EQUIVALENCE_RESEARCH_BATCH_LIMIT"] || "2000", 10))); + console.log(`[${ts}] Running: Equivalence automated research`); const batch = await pool.query(` - SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.confidence, - fx.form_factor, fx.speed_gbps, fx.standard_name, fx.fiber_type, - fx.reach_meters, fx.wavelengths + SELECT eq.id, eq.flexoptix_id, eq.competitor_id, eq.status, eq.confidence, + fx.part_number AS fx_part_number, + fx.form_factor AS fx_form_factor, + fx.speed_gbps AS fx_speed_gbps, + fx.standard_name AS fx_standard_name, + fx.fiber_type AS fx_fiber_type, + fx.reach_meters AS fx_reach_meters, + fx.wavelengths AS fx_wavelengths, + fx.connector AS fx_connector, + cp.part_number AS cp_part_number, + cp.form_factor AS cp_form_factor, + cp.speed_gbps AS cp_speed_gbps, + cp.standard_name AS cp_standard_name, + cp.fiber_type AS cp_fiber_type, + cp.reach_meters AS cp_reach_meters, + cp.wavelengths AS cp_wavelengths, + cp.connector AS cp_connector, + cpv.name AS competitor_vendor, + ( + SELECT COUNT(*) + FROM price_observations po + WHERE po.transceiver_id = eq.competitor_id + AND po.time > NOW() - INTERVAL '45 days' + ) AS recent_price_count FROM transceiver_equivalences eq JOIN transceivers fx ON eq.flexoptix_id = fx.id - WHERE eq.status IN ('approved', 'auto_approved') + JOIN transceivers cp ON eq.competitor_id = cp.id + JOIN vendors cpv ON cpv.id = cp.vendor_id + WHERE eq.status IN ('pending', 'approved', 'auto_approved') AND eq.re_research_due_at IS NOT NULL AND eq.re_research_due_at <= NOW() ORDER BY eq.re_research_due_at ASC - LIMIT 200 - `); + LIMIT $1 + `, [batchLimit]); let confirmed = 0; - let reverted = 0; + let rejected = 0; for (const eq of batch.rows) { - // Check if competitor still has a recent price observation - const priceCheck = await pool.query(` - SELECT COUNT(*) AS cnt - FROM price_observations - WHERE transceiver_id = $1 AND time > NOW() - INTERVAL '45 days' - `, [eq.competitor_id]); + const research = evaluateEquivalenceResearch( + { + part_number: eq.fx_part_number, + form_factor: eq.fx_form_factor, + speed_gbps: eq.fx_speed_gbps, + standard_name: eq.fx_standard_name, + fiber_type: eq.fx_fiber_type, + reach_meters: eq.fx_reach_meters, + wavelengths: eq.fx_wavelengths, + connector: eq.fx_connector, + }, + { + part_number: eq.cp_part_number, + form_factor: eq.cp_form_factor, + speed_gbps: eq.cp_speed_gbps, + standard_name: eq.cp_standard_name, + fiber_type: eq.cp_fiber_type, + reach_meters: eq.cp_reach_meters, + wavelengths: eq.cp_wavelengths, + connector: eq.cp_connector, + }, + parseInt(eq.recent_price_count, 10) > 0, + ); - const hasRecentPrice = parseInt(priceCheck.rows[0].cnt, 10) > 0; - - if (!hasRecentPrice) { - // Competitor no longer carries this — revert to pending for manual review + if (research.decision === "reject") { await pool.query(` UPDATE transceiver_equivalences - SET status = 'pending', re_research_due_at = NULL, re_researched_at = NULL, - match_notes = CONCAT(match_notes, E'\n[Re-research ' || NOW()::date || ': no recent price — reverted to pending]') + SET status = 'rejected', + confidence = $2, + match_basis = $3, + reject_reason = $4, + reviewed_by = 'automated-research', + reviewed_at = NOW(), + re_research_due_at = NULL, + re_researched_at = NOW(), + match_notes = CONCAT( + COALESCE(match_notes, ''), + E'\n[Automated research ' || NOW()::date || ': rejected; ' || $5 || ']' + ), + updated_at = NOW() WHERE id = $1 - `, [eq.id]); + `, [ + eq.id, + research.confidence, + research.basis, + research.rejectReason || "automated research: rejected", + research.reasons.join("; "), + ]); - // Reset competitor_verified if no other approved equivalence covers this transceiver await pool.query(` UPDATE transceivers SET competitor_verified = false, competitor_verified_at = NULL, @@ -2726,20 +2954,51 @@ export async function registerWorkers(boss: PgBoss): Promise { ) `, [eq.flexoptix_id, eq.id]); - reverted++; + rejected++; } else { - // Still valid — confirm and schedule next re-research in 30 days await pool.query(` UPDATE transceiver_equivalences - SET re_researched_at = NOW(), - re_research_due_at = NOW() + INTERVAL '30 days' + SET status = CASE WHEN status = 'pending' THEN 'auto_approved' ELSE status END, + confidence = $2, + match_basis = $3, + reviewed_by = COALESCE(reviewed_by, 'automated-research'), + reviewed_at = COALESCE(reviewed_at, NOW()), + reject_reason = NULL, + re_researched_at = NOW(), + re_research_due_at = NOW() + INTERVAL '30 days', + match_notes = CONCAT( + COALESCE(match_notes, ''), + E'\n[Automated research ' || NOW()::date || ': confirmed; basis: ' || $4 || ']' + ), + updated_at = NOW() WHERE id = $1 - `, [eq.id]); + `, [eq.id, research.confidence, research.basis, research.basis.join(", ")]); + + await pool.query(` + UPDATE transceivers + SET competitor_verified = true, + competitor_verified_at = COALESCE(competitor_verified_at, NOW()) + WHERE id = $1 AND competitor_verified = false + `, [eq.flexoptix_id]); + confirmed++; } } - console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`); + if (confirmed > 0) { + await pool.query(` + UPDATE transceivers + SET fully_verified = true, + fully_verified_at = COALESCE(fully_verified_at, NOW()) + WHERE competitor_verified = true + AND price_verified = true + AND image_verified = true + AND details_verified = true + AND fully_verified = false + `); + } + + console.log(`[equivalence-research] confirmed: ${confirmed}, rejected: ${rejected}, batch size: ${batch.rows.length}`); }); // ══════════════════════════════════════════════════════════════════════ diff --git a/sync/CURRENT.md b/sync/CURRENT.md index 25ea39e..3833126 100644 --- a/sync/CURRENT.md +++ b/sync/CURRENT.md @@ -1,9 +1,66 @@ # Current TIP Sync State -Updated: 2026-05-09 03:15 UTC +Updated: 2026-05-09 05:45 UTC ## Newest Work +- TIP automated equivalence research / manual queue cleanup completed on 2026-05-09: + - operator intent: + - products should be researched well enough that they do not need manual equivalence validation + - Erik must not be stressed by crawler-heavy work + - TIPLLM-only policy for crawler/robot research remains in force + - root cause found: + - `approve-all` approved low-confidence equivalences and only marked them for later re-research + - the re-research worker mostly checked whether a competitor still had a recent price + - it did not re-evaluate hard technical equivalence evidence such as reach, wavelength, fiber type, speed and form factor + - code changed: + - `packages/api/src/routes/review.ts` + - `approve-all` now approves only confidence >= `0.73` + - weak pending rows stay pending and are queued for automated research instead of being marked approved + - `needs_research` stats/listing now includes pending research rows + - added `POST /api/review/run-research` + - `packages/scraper/src/scheduler.ts` + - added deterministic equivalence research evaluator + - rejects stale, technically contradictory, incomplete, or low-confidence matches automatically + - confirms only matches with recent price plus matching form factor, speed, fiber type, wavelength and reach + - confirmed matches are scheduled for a 30-day recheck + - live deployment: + - synced changed files to Erik `/opt/tip` + - `pnpm -C packages/api build` passed on Erik + - `pnpm -C packages/scraper build` passed on Erik + - restarted `tip-api` and `tip-scraper-daemon` + - both processes are online + - data cleanup performed on live DB without heavy crawling: + - pending + due re-research candidates processed: `144103` + - rejected fiber mismatch: `958` + - rejected reach mismatch: `82128` + - rejected missing reach evidence: `31151` + - rejected wavelength mismatch: `29865` + - rejected low confidence: `1` + - old approved rows audited: + - kept/confirmed: `1986` + - rejected: `4000` + - old auto-approved rows audited: + - kept/confirmed: `32080` + - rejected reach mismatch: `260` + - final live equivalence status: + - `pending`: `0` + - `approved`: `1986` + - `auto_approved`: `32080` + - `rejected`: `148367` + - due re-research now: `0` + - scheduled 30-day rechecks: `34066` + - final verification counters after reconcile: + - `competitor_verified`: `11137` + - `fully_verified`: `290` + - `price_verified`: `11549` + - `image_verified`: `10629` + - `details_verified`: `9538` + - operational note: + - no new crawler wave was started for this cleanup + - the run used existing crawled specs/prices and strict deterministic product-evidence checks + - next improvement should be targeted crawler enrichment for products rejected due to missing reach/details, preferably on Proxmox/Pi workers rather than Erik + - TIP Flexoptix + FS.com price/image revalidation completed on 2026-05-09: - live root cause: - scraper runs had set `transceivers.price_verified`, but `price_observations.is_verified` stayed false diff --git a/sync/history/2026-05-09-tip-equivalence-auto-research.md b/sync/history/2026-05-09-tip-equivalence-auto-research.md new file mode 100644 index 0000000..cdad42b --- /dev/null +++ b/sync/history/2026-05-09-tip-equivalence-auto-research.md @@ -0,0 +1,98 @@ +# TIP Equivalence Automated Research + +Date: 2026-05-09 + +## Goal + +Remove manual equivalence validation as a required workflow for TIP product verification. Low-confidence matches should be researched and either confirmed or rejected automatically. + +## Findings + +- The dashboard had a large `Approved + Re-Research` backlog. +- `approve-all` was marking low-confidence rows approved, then setting `re_research_due_at`. +- The re-research worker only checked whether the competitor still had a recent price; it did not re-check technical equivalence quality. +- Many low-confidence rows were objectively bad matches: + - reach mismatches + - wavelength mismatches + - missing reach evidence + - fiber mismatches + +## Code Changes + +- `packages/api/src/routes/review.ts` + - `approve-all` now approves only confidence >= `0.73`. + - Weak rows stay pending and get queued for automated research. + - `needs_research` includes pending research rows. + - Added `POST /api/review/run-research`. + +- `packages/scraper/src/scheduler.ts` + - Added deterministic equivalence evaluator. + - Confirms matches only when there is: + - recent competitor price + - matching form factor + - matching speed + - matching fiber type + - matching wavelength + - compatible reach + - confidence >= `0.73` + - Rejects stale, incomplete, contradictory, or low-confidence matches automatically. + - Confirmed matches get a 30-day recheck. + +## Deployment + +- Synced code to Erik `/opt/tip`. +- Built on Erik: + - `pnpm -C packages/api build` + - `pnpm -C packages/scraper build` +- Restarted: + - `tip-api` + - `tip-scraper-daemon` +- Both were online after restart. + +## Live Data Cleanup + +No heavy crawler wave was started. Cleanup used existing crawled specs and price observations. + +Processed pending + due re-research: + +- total: `144103` +- rejected fiber mismatch: `958` +- rejected reach mismatch: `82128` +- rejected missing reach evidence: `31151` +- rejected wavelength mismatch: `29865` +- rejected low confidence: `1` + +Processed old approved rows: + +- confirmed: `1986` +- rejected fiber mismatch: `184` +- rejected reach mismatch: `1704` +- rejected missing reach evidence: `1117` +- rejected wavelength mismatch: `993` +- rejected low confidence: `2` + +Processed old auto-approved rows: + +- confirmed: `32080` +- rejected reach mismatch: `260` + +## Final State + +- pending: `0` +- approved: `1986` +- auto_approved: `32080` +- rejected: `148367` +- due re-research now: `0` +- scheduled 30-day rechecks: `34066` + +Product verification counters after reconcile: + +- competitor_verified: `11137` +- fully_verified: `290` +- price_verified: `11549` +- image_verified: `10629` +- details_verified: `9538` + +## Next Work + +Products rejected for missing reach/details should be enriched by targeted vendor crawlers. Keep Erik light; use Proxmox/Pi workers for heavier crawl waves. TIPLLM-only policy remains active for crawler/robot research and learning records.