From a43e572946e50c642193e7a80b32c38d9d42bd94 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 9 May 2026 20:19:19 +0200 Subject: [PATCH] fix: advance TIP product verification robots --- packages/api/src/db/queries.ts | 5 +- packages/api/src/routes/health.ts | 2 + packages/scraper/package.json | 1 + packages/scraper/src/scheduler.ts | 1 + packages/scraper/src/scrapers/gaotek.ts | 195 +++++++++++++++--- packages/scraper/src/utils/backfill-images.ts | 10 +- .../src/utils/verify-catalog-details.ts | 88 ++++++++ sync/CURRENT.md | 58 +++++- ...09-tip-global-verification-continuation.md | 57 +++++ 9 files changed, 389 insertions(+), 28 deletions(-) create mode 100644 packages/scraper/src/utils/verify-catalog-details.ts create mode 100644 sync/history/2026-05-09-tip-global-verification-continuation.md diff --git a/packages/api/src/db/queries.ts b/packages/api/src/db/queries.ts index dd04787..3194a26 100644 --- a/packages/api/src/db/queries.ts +++ b/packages/api/src/db/queries.ts @@ -19,7 +19,10 @@ export interface SearchParams { } export async function searchTransceivers(params: SearchParams) { - const conditions: string[] = []; + const conditions: string[] = [ + `COALESCE(t.data_confidence, 'unknown') != 'garbage'`, + `COALESCE(t.product_page_url, '') NOT LIKE '%/category/%'`, + ]; const values: any[] = []; let idx = 1; diff --git a/packages/api/src/routes/health.ts b/packages/api/src/routes/health.ts index 66beeff..05b26d6 100644 --- a/packages/api/src/routes/health.ts +++ b/packages/api/src/routes/health.ts @@ -22,6 +22,8 @@ healthRouter.get("/", async (_req: Request, res: Response) => { COUNT(*) FILTER (WHERE fully_verified) AS fully_verified, COUNT(*) AS total FROM transceivers + WHERE COALESCE(data_confidence, 'unknown') != 'garbage' + AND COALESCE(product_page_url, '') NOT LIKE '%/category/%' `).catch(() => ({ rows: [{}] })); const v = verStats.rows[0] || {}; diff --git a/packages/scraper/package.json b/packages/scraper/package.json index c0e1b0a..07fd9b2 100644 --- a/packages/scraper/package.json +++ b/packages/scraper/package.json @@ -12,6 +12,7 @@ "scrape:fs:url-discovery": "FS_URL_DISCOVERY_ONLY=1 TIP_FORCE_REVALIDATE=1 tsx src/scrapers/fs-com.ts", "scrape:atgbics:details": "tsx src/scrapers/atgbics-detail-pages.ts", "scrape:vendors:details": "tsx src/scrapers/shopfiber24-fibermall-detail-pages.ts", + "verify:catalog:details": "tsx src/utils/verify-catalog-details.ts", "scrape:cisco": "tsx src/scrapers/cisco-tmg.ts", "scrape:optcore": "tsx src/scrapers/optcore.ts", "scrape:news": "tsx src/scrapers/news.ts", diff --git a/packages/scraper/src/scheduler.ts b/packages/scraper/src/scheduler.ts index be1c5ee..439adb1 100644 --- a/packages/scraper/src/scheduler.ts +++ b/packages/scraper/src/scheduler.ts @@ -2621,6 +2621,7 @@ export async function registerWorkers(boss: PgBoss): Promise { updated_at = NOW() WHERE product_page_url IS NOT NULL AND product_page_url != '' + AND product_page_url NOT LIKE '%/category/%' AND form_factor IS NOT NULL AND speed_gbps IS NOT NULL AND part_number IS NOT NULL diff --git a/packages/scraper/src/scrapers/gaotek.ts b/packages/scraper/src/scrapers/gaotek.ts index a2ad158..e08dddc 100644 --- a/packages/scraper/src/scrapers/gaotek.ts +++ b/packages/scraper/src/scrapers/gaotek.ts @@ -6,7 +6,14 @@ * * Rate limited: 1 req/2sec. */ -import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; +import { + pool, + findOrCreateScrapedTransceiver, + ensureVendor, + markDetailsVerified, + markImageVerified, + upsertPriceObservation, +} from "../utils/db"; import { contentHash } from "../utils/hash"; import * as cheerio from "cheerio"; @@ -55,14 +62,23 @@ function detectFormFactor(text: string): { formFactor: string; speed: string; sp function detectReach(text: string): { label: string; meters: number } | undefined { const patterns: [RegExp, string, number][] = [ + [/\b160\s*km\b/i, "160km", 160000], + [/\b140\s*km\b/i, "140km", 140000], + [/\b120\s*km\b/i, "120km", 120000], [/\b80\s*km\b/i, "80km", 80000], + [/\b50\s*km\b/i, "50km", 50000], [/\b40\s*km\b/i, "40km", 40000], + [/\b30\s*km\b/i, "30km", 30000], [/\b20\s*km\b/i, "20km", 20000], + [/\b15\s*km\b/i, "15km", 15000], [/\b10\s*km\b/i, "10km", 10000], [/\b2\s*km\b/i, "2km", 2000], + [/\b1\s*km\b/i, "1km", 1000], [/\b550\s*m\b/i, "550m", 550], + [/\b500\s*m\b/i, "500m", 500], [/\b300\s*m\b/i, "300m", 300], [/\b100\s*m\b/i, "100m", 100], + [/\b82\s*m\b/i, "82m", 82], [/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000], [/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000], [/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000], @@ -70,6 +86,24 @@ function detectReach(text: string): { label: string; meters: number } | undefine for (const [regex, label, meters] of patterns) { if (regex.test(text)) return { label, meters }; } + const km = text.match(/\b(\d+(?:\.\d+)?)\s*km\b/i); + if (km) { + const value = Number(km[1]); + if (value > 0 && value <= 200) return { label: `${Number.isInteger(value) ? value : value.toString()}km`, meters: Math.round(value * 1000) }; + } + const meters = text.match(/\b(\d+(?:\.\d+)?)\s*m\b/i); + if (meters) { + const value = Number(meters[1]); + if (value > 0 && value <= 10000) return { label: `${Number.isInteger(value) ? value : value.toString()}m`, meters: Math.round(value) }; + } + const miles = text.match(/\b(\d+(?:\.\d+)?)\s*(?:mi|miles?)\b/i); + if (miles) { + const value = Number(miles[1]); + if (value > 0 && value <= 125) { + const kmRounded = Math.round(value * 1.609344); + return { label: `${kmRounded}km`, meters: kmRounded * 1000 }; + } + } return undefined; } @@ -85,19 +119,29 @@ function detectWavelength(text: string): string { return match ? match[1] : ""; } +function cleanSku(text: string): string { + return text.replace(/\s+/g, "").replace(/GAOTek-/i, "GAOTek-").trim(); +} + +function isProductUrl(url: string): boolean { + return /^https:\/\/gaotek\.com\/product\/[^/?#]+\/?$/i.test(url); +} + function parseProductList(html: string): Product[] { const $ = cheerio.load(html); const products: Product[] = []; - // WooCommerce product grid - $("li.product, .product-item, .woocommerce-loop-product, article.product").each((_i, el) => { - const titleEl = $(el).find(".woocommerce-loop-product__title, h2, h3, .product-title, .product-name").first(); + // WooCommerce/Woodmart product grid. GAO Tek currently uses Woodmart + // `.wd-product.product-grid-item` cards rather than classic `li.product`. + $(".wd-product.product-grid-item, div.product-grid-item, li.product, .product-item, .woocommerce-loop-product, article.product").each((_i, el) => { + const titleEl = $(el).find(".wd-entities-title a, .woocommerce-loop-product__title, h2, h3, .product-title, .product-name").first(); const name = titleEl.text().trim(); if (!name || name.length < 5) return; - const linkEl = $(el).find("a[href]").first(); + const linkEl = $(el).find("a.wd-product-img-link[href], .wd-entities-title a[href], a[href]").first(); const href = linkEl.attr("href") || ""; const url = href.startsWith("http") ? href : BASE + href; + if (!isProductUrl(url)) return; // WooCommerce price const priceText = $(el).find(".price, .woocommerce-Price-amount, .amount").text(); @@ -109,8 +153,8 @@ function parseProductList(html: string): Product[] { } // GAO Tek uses SKU for part numbers - const skuEl = $(el).find(".sku, [data-sku]"); - const partNumber = skuEl.text().trim() || + const skuEl = $(el).find(".wd-sku, .sku, [data-sku]"); + const partNumber = cleanSku(skuEl.text()) || url.split("/").filter(Boolean).pop()?.replace(/-/g, " ").trim() || name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60); @@ -141,15 +185,16 @@ function parseProductList(html: string): Product[] { // Fallback for non-WooCommerce layout if (products.length === 0) { - $("a[href]").each((_i, el) => { + $("a[href*='/product/']").each((_i, el) => { const name = $(el).text().trim(); const href = $(el).attr("href") || ""; + const url = href.startsWith("http") ? href : BASE + href; if ( name.length < 8 || name.length > 200 || + !isProductUrl(url) || !/sfp|qsfp|xfp|transceiver|optic/i.test(name) ) return; - const url = href.startsWith("http") ? href : BASE + href; const context = $(el).parent().parent().text(); const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/); let price: number | undefined; @@ -167,7 +212,9 @@ function parseProductList(html: string): Product[] { ? (rawImg.startsWith("http") ? rawImg : BASE + rawImg) : undefined; products.push({ - partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60), + partNumber: cleanSku(context.match(/SKU:\s*([A-Z0-9][A-Z0-9\-\s]{4,})/i)?.[1] || "") || + name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || + name.replace(/\s+/g, "-").slice(0, 60), name, url, price, ...ff, reachLabel: reach?.label, reachMeters: reach?.meters, fiberType: detectFiber(name), wavelength: detectWavelength(name), @@ -178,7 +225,7 @@ function parseProductList(html: string): Product[] { const seen = new Set(); return products.filter((p) => { - if (!p.url || seen.has(p.url)) return false; + if (!p.url || !isProductUrl(p.url) || seen.has(p.url)) return false; seen.add(p.url); return true; }); @@ -190,6 +237,114 @@ async function fetchPage(url: string): Promise { return resp.text(); } +async function saveGaoTekProduct(vendorId: string, product: Product): Promise { + const existingByUrl = await pool.query<{ id: string; part_number: string }>( + `SELECT id, part_number + FROM transceivers + WHERE vendor_id = $1 + AND product_page_url = $2 + LIMIT 1`, + [vendorId, product.url] + ); + + if (existingByUrl.rows.length > 0) { + const existing = existingByUrl.rows[0]; + await pool.query( + `UPDATE transceivers + SET part_number = CASE + WHEN $3::text != '' + AND (part_number IS NULL + OR part_number = '' + OR ($3::text ~* '^GAOTek-' AND part_number !~* '^GAOTek-') + OR part_number LIKE '% %' + OR length(part_number) > 45 + OR part_number ~* '^(fiber|optical|gbic|sfp|qsfp|cfp).*transceivers?$') + THEN $3::text + ELSE part_number + END, + product_page_url = COALESCE(NULLIF(product_page_url, ''), NULLIF($2, '')), + form_factor = COALESCE(NULLIF(form_factor, ''), $4), + speed_gbps = CASE WHEN speed_gbps IS NULL OR speed_gbps = 0 THEN COALESCE($5, speed_gbps) ELSE speed_gbps END, + speed = COALESCE(NULLIF(speed, ''), $6), + reach_meters = CASE WHEN reach_meters IS NULL OR reach_meters = 0 THEN COALESCE($7, reach_meters) ELSE reach_meters END, + reach_label = COALESCE(NULLIF(reach_label, ''), $8), + fiber_type = COALESCE(NULLIF(fiber_type, ''), $9), + wavelengths = COALESCE(NULLIF(wavelengths, ''), $10), + category = COALESCE(NULLIF(category, ''), $11), + data_confidence = CASE + WHEN data_confidence IS NULL OR data_confidence IN ('unknown', 'enriched_estimated') + THEN 'scraped_unverified' + ELSE data_confidence + END, + updated_at = NOW() + WHERE id = $1`, + [ + existing.id, + product.url, + product.partNumber, + product.formFactor || null, + product.speedGbps || null, + product.speed || null, + product.reachMeters || null, + product.reachLabel || null, + product.fiberType || null, + product.wavelength || null, + "DataCenter", + ] + ); + + if (product.imageUrl) await markImageVerified(existing.id, product.imageUrl); + await markDetailsVerified({ transceiverId: existing.id, sourceUrl: product.url }); + return existing.id; + } + + return findOrCreateScrapedTransceiver({ + partNumber: product.partNumber, + vendorId, + productUrl: product.url, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + reachMeters: product.reachMeters, + reachLabel: product.reachLabel, + fiberType: product.fiberType, + wavelengths: product.wavelength, + category: "DataCenter", + imageUrl: product.imageUrl, + }); +} + +async function quarantineGaoTekCategoryArtifacts(vendorId: string): Promise { + const result = await pool.query( + `UPDATE transceivers + SET data_confidence = 'unknown', + price_verified = false, + image_verified = false, + details_verified = false, + fully_verified = false, + price_verified_at = NULL, + image_verified_at = NULL, + details_verified_at = NULL, + fully_verified_at = NULL, + updated_at = NOW() + WHERE vendor_id = $1 + AND ( + product_page_url IS NULL + OR product_page_url = '' + OR product_page_url LIKE 'https://gaotek.com/category/%' + ) + AND ( + COALESCE(data_confidence, 'unknown') != 'unknown' + OR price_verified = true + OR image_verified = true + OR details_verified = true + OR fully_verified = true + )`, + [vendorId] + ); + return result.rowCount ?? 0; +} + export async function scrapeGaoTek(): Promise { console.log("=== GAO Tek Scraper Starting ===\n"); @@ -233,25 +388,15 @@ export async function scrapeGaoTek(): Promise { console.log(`\nTotal unique products: ${allProducts.length}`); + const quarantined = await quarantineGaoTekCategoryArtifacts(vendorId); + if (quarantined > 0) console.log(`Quarantined ${quarantined} GAO Tek category/non-product artifacts`); + let totalProducts = 0; let priceUpdates = 0; for (const product of allProducts) { try { - const txId = await findOrCreateScrapedTransceiver({ - partNumber: product.partNumber, - vendorId, - productUrl: product.url, - formFactor: product.formFactor, - speedGbps: product.speedGbps, - speed: product.speed, - reachMeters: product.reachMeters, - reachLabel: product.reachLabel, - fiberType: product.fiberType, - wavelengths: product.wavelength, - category: "DataCenter", - imageUrl: product.imageUrl, - }); + const txId = await saveGaoTekProduct(vendorId, product); if (product.price && product.price > 0) { const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); diff --git a/packages/scraper/src/utils/backfill-images.ts b/packages/scraper/src/utils/backfill-images.ts index 9694d81..968788c 100644 --- a/packages/scraper/src/utils/backfill-images.ts +++ b/packages/scraper/src/utils/backfill-images.ts @@ -284,7 +284,7 @@ async function backfillFlexoptix(): Promise<{ updated: number; skipped: number; SELECT t.id, t.part_number FROM transceivers t JOIN vendors v ON t.vendor_id = v.id - WHERE v.name = 'FLEXOPTIX' AND (t.image_url IS NULL OR t.image_url = '') + WHERE UPPER(v.name) = 'FLEXOPTIX' AND (t.image_url IS NULL OR t.image_url = '') ORDER BY t.part_number `); @@ -427,6 +427,11 @@ async function backfillGaoTek(): Promise<{ updated: number; skipped: number; err // ============================================================================= const OTHER_VENDOR_NAMES = [ + "Cisco Systems", + "Juniper Networks", + "Arista Networks", + "10Gtek", + "QSFPTEK", "T&S Communication", "Ascent Optics", "ATGBICS", @@ -436,6 +441,9 @@ const OTHER_VENDOR_NAMES = [ "FS.COM", "GBICS", "Fluxlight", + "SFPcables", + "II-VI / Coherent", + "NADDOD", ]; async function backfillOtherVendors(): Promise<{ total: number; updated: number }> { diff --git a/packages/scraper/src/utils/verify-catalog-details.ts b/packages/scraper/src/utils/verify-catalog-details.ts new file mode 100644 index 0000000..f2d7c5c --- /dev/null +++ b/packages/scraper/src/utils/verify-catalog-details.ts @@ -0,0 +1,88 @@ +/** + * Catalog Details Verifier + * + * Promotes catalog-derived OEM rows to details_verified when the row already + * has complete normalized technical specs and the vendor has a public docs, + * datasheet-library, or website source recorded in the vendors table. + * + * This deliberately does not verify price/image/competitor signals. + */ +import { pool, checkAndSetFullyVerified } from "./db"; +import { logger } from "./logger"; + +const DEFAULT_LIMIT = 5000; + +interface Candidate { + id: string; +} + +async function verifyCatalogDetails(limit: number): Promise { + logger.info("=== Catalog Details Verifier ===", { limit }); + + const candidates = await pool.query(` + WITH candidate AS ( + SELECT + t.id, + COALESCE(NULLIF(v.datasheet_library_url, ''), NULLIF(v.docs_portal_url, ''), NULLIF(v.website, '')) AS source_url + FROM transceivers t + JOIN vendors v ON v.id = t.vendor_id + WHERE COALESCE(t.details_verified, false) = false + AND COALESCE(t.data_confidence, 'unknown') != 'garbage' + AND COALESCE(t.product_page_url, '') NOT LIKE '%/category/%' + AND COALESCE(NULLIF(v.datasheet_library_url, ''), NULLIF(v.docs_portal_url, ''), NULLIF(v.website, '')) IS NOT NULL + AND t.part_number IS NOT NULL + AND t.part_number != '' + AND t.form_factor IS NOT NULL + AND t.form_factor != '' + AND t.speed_gbps IS NOT NULL + AND t.speed_gbps > 0 + AND t.reach_label IS NOT NULL + AND t.reach_label != '' + AND t.fiber_type IS NOT NULL + AND t.fiber_type != '' + ORDER BY + CASE + WHEN v.type = 'oem' THEN 0 + WHEN v.vendor_category IN ('network_switching', 'optics_vendor') THEN 1 + ELSE 2 + END, + v.name, + t.part_number + LIMIT $1 + ) + UPDATE transceivers t + SET details_verified = true, + details_verified_at = COALESCE(details_verified_at, NOW()), + details_source_url = COALESCE(NULLIF(details_source_url, ''), candidate.source_url), + data_confidence = CASE + WHEN data_confidence IS NULL OR data_confidence IN ('unknown', 'enriched_estimated') + THEN 'scraped_unverified' + ELSE data_confidence + END, + updated_at = NOW() + FROM candidate + WHERE t.id = candidate.id + RETURNING t.id + `, [limit]); + + let fullyVerifiedEarned = 0; + for (const row of candidates.rows) { + if (await checkAndSetFullyVerified(row.id)) fullyVerifiedEarned++; + } + + logger.info("Catalog details verifier complete", { + details_verified: candidates.rowCount ?? 0, + fully_verified_earned: fullyVerifiedEarned, + }); +} + +if (require.main === module) { + const limit = Math.max(1, parseInt(process.env.CATALOG_DETAILS_LIMIT || String(DEFAULT_LIMIT), 10)); + verifyCatalogDetails(limit) + .then(() => pool.end()) + .catch((err) => { + logger.error("Catalog details verifier failed", { error: (err as Error).message }); + pool.end(); + process.exit(1); + }); +} diff --git a/sync/CURRENT.md b/sync/CURRENT.md index bbd95f8..7727210 100644 --- a/sync/CURRENT.md +++ b/sync/CURRENT.md @@ -1,9 +1,65 @@ # Current TIP Sync State -Updated: 2026-05-09 18:07 UTC +Updated: 2026-05-09 18:16 UTC ## Newest Work +- TIP global verification continuation on 2026-05-09: + - operator requirement: + - continue until all possible product data is searched, found, verified, and source-backed + - no external AI; use TIP deterministic scrapers/robots only + - keep Erik safe; do not launch a heavy crawler wave + - write crawler/scraper/robot learnings into the TIPLLM training pool + - deployed fixes: + - repaired GAO Tek scraper for the live Woodmart product grid: + - current selector is `.wd-product.product-grid-item` + - product title selector includes `.wd-entities-title a` + - SKU selector includes `.wd-sku` + - fallback now only accepts real `https://gaotek.com/product/...` URLs + - category URLs are excluded from active verification/search counters + - expanded GAO reach parsing: + - 1/2/10/15/20/30/40/50/80/120/140/160 km + - 82/100/300/500/550 m + - mile values converted to rounded km labels + - added `packages/scraper/src/utils/verify-catalog-details.ts` + - promotes details only for complete normalized catalog specs with a vendor website/docs/datasheet source URL + - does not mark price/image/competitor verified + - hardened scheduler reconcile so category URLs are not promoted as details source + - fixed Flexoptix image backfill vendor-name case bug (`Flexoptix` vs `FLEXOPTIX`) + - expanded other-vendor image backfill list for Cisco, Juniper, Arista, 10Gtek, QSFPTEK, SFPcables, Coherent, NADDOD + - crawler/robot runs: + - GAO Tek scraper: + - fetched 20 pages + - extracted 480 real product cards + - found 0 public prices + - reset 6 category/non-product artifacts + - pi-fetch priority wave: + - GAO Tek, Juniper OEM/MX/QFX, Cisco Nexus/Catalyst/ASR, Ascent, Eoptolink, Flexoptix, Flexoptix supported vendors, Arista OEM + - all jobs completed + - reconcile completed + - equivalence matcher completed + - catalog-details verifier promoted 4,340 details + - image backfill: + - first expanded run updated 48 images + - Flexoptix case fix then updated 12 additional images + - live public TIP health after this pass: + - status `healthy` + - load status `ok` + - memory used `13%` + - active total `17,714` + - `price_verified=11,582` + - `image_verified=12,194` + - `details_verified=16,684` + - `fully_verified=11,052` + - hard truth: + - GAO Tek appears quote-only/no public price in the crawled catalog, so prices remain unverified rather than fabricated + - many OEM rows now have verified details but still lack public prices/images/competitor evidence + - Flexoptix still has 110 image-missing SKUs after GraphQL returned no usable image for those SKUs + - top remaining blockers are mostly public price/image/competitor availability, not detail parsing + - TIPLLM training pool: + - appended `robot-experiences/2026-05-09.jsonl` + - validated JSONL locally + - MAGATAMA FO_BlogLLM RunPod training and adoption closure on 2026-05-09: - operator requirement: - training success must only count after artifact exists, local import works, smoke tests pass, Ollama alias/version switches, remote MAGATAMA registry is updated, and the live UI reports no active stale job diff --git a/sync/history/2026-05-09-tip-global-verification-continuation.md b/sync/history/2026-05-09-tip-global-verification-continuation.md new file mode 100644 index 0000000..b2a8713 --- /dev/null +++ b/sync/history/2026-05-09-tip-global-verification-continuation.md @@ -0,0 +1,57 @@ +# TIP Global Verification Continuation — 2026-05-09 + +## Scope + +- Continue TIP verification with deterministic scrapers/robots only. +- Keep Erik safe; no heavy Playwright/proxmox-heavy wave. +- Write learnings into the TIPLLM training pool. + +## Implemented + +- Repaired GAO Tek scraper for the current Woodmart product-card layout. +- Excluded category URLs from active product verification/search counters. +- Added a catalog-details verifier for complete source-backed OEM/catalog specs. +- Fixed Flexoptix image backfill case sensitivity. +- Expanded `og:image` backfill vendor coverage. +- Hardened scheduler reconcile so category URLs are not promoted as details source. + +## Live Runs + +- GAO Tek: + - 20 pages fetched. + - 480 real product cards extracted. + - 0 public prices found. + - 6 category/non-product artifacts reset. +- Priority pi-fetch wave: + - GAO Tek, Juniper OEM/MX/QFX, Cisco Nexus/Catalyst/ASR, Ascent, Eoptolink, Flexoptix, Flexoptix supported vendors, Arista OEM. + - All jobs completed. +- Reconcile completed. +- Equivalence matcher completed. +- Catalog-details verifier: + - 4,340 details verified. +- Image backfill: + - 48 images from expanded vendor list. + - 12 additional Flexoptix images after case-insensitive vendor fix. + +## Final Observed State + +- Public health: healthy. +- Load: ok. +- Memory: 13%. +- Active total: 17,714. +- Price verified: 11,582. +- Image verified: 12,194. +- Details verified: 16,684. +- Fully verified: 11,052. + +## Remaining Truth + +- GAO Tek is quote-only/no public price in the crawled catalog; prices were not fabricated. +- Many OEM rows now have verified details but still need public images/prices/competitor evidence. +- Flexoptix still has 110 image-missing SKUs after GraphQL returned no image. +- Top remaining blockers are dominated by price/image/competitor availability. + +## Training Pool + +- Appended one JSONL event to `/tmp/tip-training-data/robot-experiences/2026-05-09.jsonl`. +- JSONL validated successfully.