fix: revalidate flexoptix fs prices and images
This commit is contained in:
parent
fd29bee5cb
commit
ef225c7dc5
@ -68,8 +68,10 @@ import { updateVerifiedSpecs, parseSpecTable } from "../utils/spec-updater";
|
|||||||
// FS.com German locale uses www.fs.com/de/ (de.fs.com DNS does not exist)
|
// FS.com German locale uses www.fs.com/de/ (de.fs.com DNS does not exist)
|
||||||
const BASE_URL = "https://www.fs.com/de";
|
const BASE_URL = "https://www.fs.com/de";
|
||||||
const MAX_PAGES_PER_CATEGORY = 10;
|
const MAX_PAGES_PER_CATEGORY = 10;
|
||||||
const MAX_DETAIL_PAGES_PER_RUN = 300;
|
const MAX_DETAIL_PAGES_PER_RUN = parseInt(process.env["FS_MAX_DETAIL_PAGES_PER_RUN"] ?? "300", 10);
|
||||||
const STOCK_FRESH_HOURS = 12;
|
const STOCK_FRESH_HOURS = parseInt(process.env["FS_STOCK_FRESH_HOURS"] ?? "12", 10);
|
||||||
|
const FORCE_REVALIDATE = process.env["TIP_FORCE_REVALIDATE"] === "1";
|
||||||
|
const ONLY_MISSING_IMAGES = process.env["FS_ONLY_MISSING_IMAGES"] === "1";
|
||||||
|
|
||||||
const PROXY_URLS = (process.env["PROXY_URLS"] ?? "")
|
const PROXY_URLS = (process.env["PROXY_URLS"] ?? "")
|
||||||
.split(",")
|
.split(",")
|
||||||
@ -81,6 +83,12 @@ function buildProxyConfiguration(): ProxyConfiguration | undefined {
|
|||||||
return new ProxyConfiguration({ proxyUrls: PROXY_URLS });
|
return new ProxyConfiguration({ proxyUrls: PROXY_URLS });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function normalizeFsProductUrl(url: string): string {
|
||||||
|
return url.replace(/^https:\/\/www\.fs\.com\/de\/de\//, "https://www.fs.com/de/")
|
||||||
|
.replace(/\?.*$/, "")
|
||||||
|
.replace(/\/$/, "");
|
||||||
|
}
|
||||||
|
|
||||||
const CATEGORY_URLS = [
|
const CATEGORY_URLS = [
|
||||||
"/c/1g-sfp-81",
|
"/c/1g-sfp-81",
|
||||||
"/c/10g-sfp-63",
|
"/c/10g-sfp-63",
|
||||||
@ -524,12 +532,33 @@ async function scrapeProductDetails(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const imgEl = document.querySelector<HTMLImageElement>(
|
const imageCandidates = Array.from(document.querySelectorAll<HTMLImageElement>(
|
||||||
".product-image img, .prod-img img, .product-gallery img, " +
|
".big_img_box img, img.big_img, .big_img_m_active, .big_img_m, " +
|
||||||
|
".small_img_active img, .product-image img, .prod-img img, .product-gallery img, " +
|
||||||
'[class*="main-image"] img, [class*="primary-image"] img, ' +
|
'[class*="main-image"] img, [class*="primary-image"] img, ' +
|
||||||
".slick-current img, .product__image img"
|
".slick-current img, .product__image img"
|
||||||
);
|
)).map((img) => {
|
||||||
const imageUrl = imgEl?.src ?? imgEl?.getAttribute("data-src") ?? "";
|
const url =
|
||||||
|
img.currentSrc ||
|
||||||
|
img.getAttribute("data-src") ||
|
||||||
|
img.getAttribute("data-original") ||
|
||||||
|
img.getAttribute("data-lazy") ||
|
||||||
|
img.getAttribute("src") ||
|
||||||
|
"";
|
||||||
|
const cls = `${img.className || ""} ${img.parentElement?.className || ""}`;
|
||||||
|
const score =
|
||||||
|
/big_img_box|big_img|big_img_m_active/.test(cls) ? 100 :
|
||||||
|
/small_img_active/.test(cls) ? 50 :
|
||||||
|
10;
|
||||||
|
return { url, score, w: img.naturalWidth || 0, h: img.naturalHeight || 0 };
|
||||||
|
}).filter((candidate) =>
|
||||||
|
candidate.url &&
|
||||||
|
/resource\.fs\.com/.test(candidate.url) &&
|
||||||
|
!/default\.jpg|generalImg|logo|icon|svg/i.test(candidate.url) &&
|
||||||
|
(candidate.w === 0 || candidate.w >= 120) &&
|
||||||
|
(candidate.h === 0 || candidate.h >= 120)
|
||||||
|
).sort((a, b) => b.score - a.score || (b.w * b.h) - (a.w * a.h));
|
||||||
|
const imageUrl = imageCandidates[0]?.url ?? "";
|
||||||
|
|
||||||
const dsEl = document.querySelector<HTMLAnchorElement>(
|
const dsEl = document.querySelector<HTMLAnchorElement>(
|
||||||
'a[href*="datasheet"], a[href*=".pdf"], a[download][href*=".pdf"]'
|
'a[href*="datasheet"], a[href*=".pdf"], a[download][href*=".pdf"]'
|
||||||
@ -759,7 +788,7 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
// ── Filter: skip products with fresh stock data ─────────────────────────────
|
// ── Filter: skip products with fresh stock data ─────────────────────────────
|
||||||
const allPartNumbers = [...productMap.values()].map((p) => p.partNumber).filter(Boolean);
|
const allPartNumbers = [...productMap.values()].map((p) => p.partNumber).filter(Boolean);
|
||||||
const freshlyScraped = new Set<string>();
|
const freshlyScraped = new Set<string>();
|
||||||
if (allPartNumbers.length > 0) {
|
if (!FORCE_REVALIDATE && allPartNumbers.length > 0) {
|
||||||
const freshResult = await pool.query(
|
const freshResult = await pool.query(
|
||||||
`SELECT DISTINCT t.part_number
|
`SELECT DISTINCT t.part_number
|
||||||
FROM transceivers t
|
FROM transceivers t
|
||||||
@ -772,12 +801,37 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
for (const row of freshResult.rows) freshlyScraped.add(row.part_number as string);
|
for (const row of freshResult.rows) freshlyScraped.add(row.part_number as string);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let missingImageUrls = new Set<string>();
|
||||||
|
if (ONLY_MISSING_IMAGES) {
|
||||||
|
const missingResult = await pool.query(
|
||||||
|
`SELECT DISTINCT product_page_url
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON v.id = t.vendor_id
|
||||||
|
WHERE v.name = 'FS.COM'
|
||||||
|
AND COALESCE(t.image_verified, false) = false
|
||||||
|
AND product_page_url LIKE '%/products/%'`
|
||||||
|
);
|
||||||
|
missingImageUrls = new Set(
|
||||||
|
missingResult.rows
|
||||||
|
.map((row) => normalizeFsProductUrl(row.product_page_url as string))
|
||||||
|
.filter(Boolean)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const urlsToScrape = [...productMap.keys()]
|
const urlsToScrape = [...productMap.keys()]
|
||||||
.filter((url) => !freshlyScraped.has(productMap.get(url)?.partNumber ?? ""))
|
.filter((url) => !freshlyScraped.has(productMap.get(url)?.partNumber ?? ""))
|
||||||
|
.filter((url) => !ONLY_MISSING_IMAGES || missingImageUrls.has(normalizeFsProductUrl(url)))
|
||||||
.slice(0, MAX_DETAIL_PAGES_PER_RUN);
|
.slice(0, MAX_DETAIL_PAGES_PER_RUN);
|
||||||
|
|
||||||
console.log(`\n[Phase 2] Scraping ${urlsToScrape.length} detail pages`);
|
console.log(`\n[Phase 2] Scraping ${urlsToScrape.length} detail pages`);
|
||||||
console.log(` (${productMap.size - urlsToScrape.length} skipped — data ≤${STOCK_FRESH_HOURS}h fresh)`);
|
console.log(
|
||||||
|
ONLY_MISSING_IMAGES
|
||||||
|
? ` (${missingImageUrls.size} DB product URLs missing images; ${productMap.size - urlsToScrape.length} skipped by targeted image filter)`
|
||||||
|
:
|
||||||
|
FORCE_REVALIDATE
|
||||||
|
? ` (${productMap.size - urlsToScrape.length} skipped — max detail cap ${MAX_DETAIL_PAGES_PER_RUN})`
|
||||||
|
: ` (${productMap.size - urlsToScrape.length} skipped — data ≤${STOCK_FRESH_HOURS}h fresh)`
|
||||||
|
);
|
||||||
|
|
||||||
if (urlsToScrape.length === 0) {
|
if (urlsToScrape.length === 0) {
|
||||||
console.log("[Phase 2] All products have fresh stock data — nothing to scrape.");
|
console.log("[Phase 2] All products have fresh stock data — nothing to scrape.");
|
||||||
|
|||||||
@ -46,12 +46,18 @@ export async function markImageVerified(
|
|||||||
): Promise<boolean> {
|
): Promise<boolean> {
|
||||||
const result = await pool.query(
|
const result = await pool.query(
|
||||||
`UPDATE transceivers
|
`UPDATE transceivers
|
||||||
SET image_url = COALESCE(NULLIF(image_url, ''), $2::text),
|
SET image_url = CASE
|
||||||
|
WHEN image_url IS NULL
|
||||||
|
OR image_url = ''
|
||||||
|
OR image_url ~* '(placeholder|no-image|no_image|missing|default)'
|
||||||
|
THEN $2::text
|
||||||
|
ELSE image_url
|
||||||
|
END,
|
||||||
has_image = true,
|
has_image = true,
|
||||||
image_verified = true,
|
image_verified = true,
|
||||||
image_verified_at = COALESCE(image_verified_at, NOW()),
|
image_verified_at = NOW(),
|
||||||
image_verified_url = COALESCE(NULLIF(image_verified_url, ''), $2::text),
|
image_verified_url = $2::text,
|
||||||
image_scraped_at = COALESCE(image_scraped_at, NOW()),
|
image_scraped_at = NOW(),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
AND $2::text IS NOT NULL
|
AND $2::text IS NOT NULL
|
||||||
@ -178,9 +184,20 @@ export async function upsertPriceObservation(params: {
|
|||||||
|
|
||||||
if (existing.rows.length > 0 && existing.rows[0].content_hash === params.contentHash && !isStale) {
|
if (existing.rows.length > 0 && existing.rows[0].content_hash === params.contentHash && !isStale) {
|
||||||
// Price unchanged and recent — still ensure verified flags are current
|
// Price unchanged and recent — still ensure verified flags are current
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE price_observations
|
||||||
|
SET is_verified = true,
|
||||||
|
verified_at = COALESCE(verified_at, NOW())
|
||||||
|
WHERE transceiver_id = $1
|
||||||
|
AND source_vendor_id = $2
|
||||||
|
AND content_hash = $3
|
||||||
|
AND time > NOW() - INTERVAL '${REFRESH_DAYS} days'`,
|
||||||
|
[params.transceiverId, params.sourceVendorId, params.contentHash]
|
||||||
|
);
|
||||||
await pool.query(
|
await pool.query(
|
||||||
`UPDATE transceivers SET
|
`UPDATE transceivers SET
|
||||||
price_verified = true
|
price_verified = true,
|
||||||
|
price_verified_at = COALESCE(price_verified_at, NOW())
|
||||||
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = COALESCE(competitor_verified_at, NOW())" : ""}
|
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = COALESCE(competitor_verified_at, NOW())" : ""}
|
||||||
WHERE id = $1 AND (price_verified IS NULL OR price_verified = false OR ${isCompetitor ? "competitor_verified IS NULL OR competitor_verified = false" : "false"})`,
|
WHERE id = $1 AND (price_verified IS NULL OR price_verified = false OR ${isCompetitor ? "competitor_verified IS NULL OR competitor_verified = false" : "false"})`,
|
||||||
[params.transceiverId]
|
[params.transceiverId]
|
||||||
@ -190,8 +207,11 @@ export async function upsertPriceObservation(params: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
await pool.query(
|
await pool.query(
|
||||||
`INSERT INTO price_observations (time, transceiver_id, source_vendor_id, price, currency, stock_level, quantity_available, lead_time_days, url, content_hash)
|
`INSERT INTO price_observations (
|
||||||
VALUES (NOW(), $1, $2, $3, $4, $5, $6, $7, $8, $9)`,
|
time, transceiver_id, source_vendor_id, price, currency, stock_level,
|
||||||
|
quantity_available, lead_time_days, url, content_hash, is_verified, verified_at
|
||||||
|
)
|
||||||
|
VALUES (NOW(), $1, $2, $3, $4, $5, $6, $7, $8, $9, true, NOW())`,
|
||||||
[
|
[
|
||||||
params.transceiverId,
|
params.transceiverId,
|
||||||
params.sourceVendorId,
|
params.sourceVendorId,
|
||||||
@ -210,6 +230,7 @@ export async function upsertPriceObservation(params: {
|
|||||||
await pool.query(
|
await pool.query(
|
||||||
`UPDATE transceivers SET
|
`UPDATE transceivers SET
|
||||||
price_verified = true,
|
price_verified = true,
|
||||||
|
price_verified_at = COALESCE(price_verified_at, NOW()),
|
||||||
competitor_verified = true,
|
competitor_verified = true,
|
||||||
competitor_verified_at = COALESCE(competitor_verified_at, NOW())
|
competitor_verified_at = COALESCE(competitor_verified_at, NOW())
|
||||||
WHERE id = $1`,
|
WHERE id = $1`,
|
||||||
@ -217,7 +238,11 @@ export async function upsertPriceObservation(params: {
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
await pool.query(
|
await pool.query(
|
||||||
`UPDATE transceivers SET price_verified = true WHERE id = $1 AND (price_verified IS NULL OR price_verified = false)`,
|
`UPDATE transceivers
|
||||||
|
SET price_verified = true,
|
||||||
|
price_verified_at = COALESCE(price_verified_at, NOW())
|
||||||
|
WHERE id = $1
|
||||||
|
AND (price_verified IS NULL OR price_verified = false OR price_verified_at IS NULL)`,
|
||||||
[params.transceiverId]
|
[params.transceiverId]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -384,8 +409,8 @@ export async function findOrCreateScrapedTransceiver(params: {
|
|||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
// Update image_url, has_image and image_verified if we have a new image for a record without one
|
// Re-validate image metadata whenever the scraper sees a current product image.
|
||||||
if (params.imageUrl && !existing.rows[0].image_url) {
|
if (params.imageUrl) {
|
||||||
await markImageVerified(existing.rows[0].id, params.imageUrl);
|
await markImageVerified(existing.rows[0].id, params.imageUrl);
|
||||||
}
|
}
|
||||||
if (params.productUrl) {
|
if (params.productUrl) {
|
||||||
|
|||||||
@ -1,9 +1,67 @@
|
|||||||
# Current TIP Sync State
|
# Current TIP Sync State
|
||||||
|
|
||||||
Updated: 2026-05-09 02:05 UTC
|
Updated: 2026-05-09 03:15 UTC
|
||||||
|
|
||||||
## Newest Work
|
## Newest Work
|
||||||
|
|
||||||
|
- TIP Flexoptix + FS.com price/image revalidation completed on 2026-05-09:
|
||||||
|
- live root cause:
|
||||||
|
- scraper runs had set `transceivers.price_verified`, but `price_observations.is_verified` stayed false
|
||||||
|
- FS.com product image selector was stale and missed current `.big_img` / `.big_img_m` product images
|
||||||
|
- code fixed:
|
||||||
|
- `packages/scraper/src/utils/db.ts`
|
||||||
|
- new/fresh unchanged price observations now get `is_verified = true` and `verified_at`
|
||||||
|
- `price_verified_at` is refreshed when price verification is confirmed
|
||||||
|
- image verification now refreshes `image_verified_at`, `image_verified_url`, and `image_scraped_at`
|
||||||
|
- existing records revalidate images whenever current scraper output contains an image URL
|
||||||
|
- `packages/scraper/src/scrapers/fs-com.ts`
|
||||||
|
- added `TIP_FORCE_REVALIDATE`
|
||||||
|
- added `FS_MAX_DETAIL_PAGES_PER_RUN`
|
||||||
|
- added `FS_ONLY_MISSING_IMAGES`
|
||||||
|
- updated FS.com image extraction to prefer current `resource.fs.com` product images from `.big_img_box`, `img.big_img`, `.big_img_m_active`, `.big_img_m`, `.small_img_active`
|
||||||
|
- rejects default/logo/general/icon/SVG image URLs
|
||||||
|
- live runs on Erik:
|
||||||
|
- `pnpm -C packages/scraper build` passed on `/opt/tip`
|
||||||
|
- Flexoptix catalog revalidation:
|
||||||
|
- 615 products processed
|
||||||
|
- 615 Flexoptix price observations marked verified
|
||||||
|
- 605 Flexoptix images verified in the run window
|
||||||
|
- FS.com full force revalidation:
|
||||||
|
- 270 products discovered
|
||||||
|
- 270 detail pages scraped
|
||||||
|
- 0 failed detail requests
|
||||||
|
- 17 new price observations in first full pass
|
||||||
|
- 266 FS.com price observations marked verified after first pass
|
||||||
|
- FS.com targeted missing-image revalidation:
|
||||||
|
- 99 detail pages scraped
|
||||||
|
- 0 failed detail requests
|
||||||
|
- FS.com image-verified products increased from 207 to 299
|
||||||
|
- FS.com verified price observations increased to 271 after targeted pass
|
||||||
|
- final checked counters:
|
||||||
|
- Flexoptix:
|
||||||
|
- products: 744
|
||||||
|
- product price_verified: 619
|
||||||
|
- product image_verified: 615
|
||||||
|
- price observation rows: 1288
|
||||||
|
- verified price observation rows: 615
|
||||||
|
- FS.COM:
|
||||||
|
- products: 383
|
||||||
|
- product price_verified: 379
|
||||||
|
- product image_verified: 299
|
||||||
|
- price observation rows: 818
|
||||||
|
- verified price observation rows: 271
|
||||||
|
- operations:
|
||||||
|
- `tip-scraper-daemon` restarted and is online
|
||||||
|
- Erik remained stable; final load was about `2.16, 2.22, 2.47`
|
||||||
|
- CT115 / `tip-scraper` SSH did not respond quickly from this session, so it was not used
|
||||||
|
- TIPLLM training pool:
|
||||||
|
- `/tmp/tip-training-data` was recloned from Gitea
|
||||||
|
- crawler experience was written to:
|
||||||
|
- `robot-experiences/2026-05-09.jsonl`
|
||||||
|
- `qa-pairs/robot-control-high.jsonl`
|
||||||
|
- pushed to Gitea commit:
|
||||||
|
- `850083f crawl: add flexoptix fs revalidation learning record`
|
||||||
|
|
||||||
- MAGATAMA dashboard truthfulness / UX hardening on 2026-05-09:
|
- MAGATAMA dashboard truthfulness / UX hardening on 2026-05-09:
|
||||||
- live `api/llm/status` on MAGATAMA now publicly confirms the corrected `magatamallm` lane counts:
|
- live `api/llm/status` on MAGATAMA now publicly confirms the corrected `magatamallm` lane counts:
|
||||||
- `15679` train / collected
|
- `15679` train / collected
|
||||||
|
|||||||
116
sync/history/2026-05-09-flexoptix-fs-price-image-revalidation.md
Normal file
116
sync/history/2026-05-09-flexoptix-fs-price-image-revalidation.md
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
# 2026-05-09 Flexoptix + FS.com Price/Image Revalidation
|
||||||
|
|
||||||
|
## Request
|
||||||
|
|
||||||
|
Rene reported that many TIP prices, especially Flexoptix prices, were wrong and asked for all Flexoptix and FS.com prices to be fully revalidated and images checked.
|
||||||
|
|
||||||
|
Standing constraints were preserved:
|
||||||
|
|
||||||
|
- TIP crawler/robot planning and extraction feedback stays TIPLLM-only.
|
||||||
|
- No external AI was used for crawler planning or extraction feedback.
|
||||||
|
- Erik must not be overloaded.
|
||||||
|
- Robot/crawler experiences must be written into the Gitea-backed TIPLLM training pool.
|
||||||
|
- Work status must be written back to `sync/`.
|
||||||
|
|
||||||
|
## Root Cause
|
||||||
|
|
||||||
|
Two concrete issues were found:
|
||||||
|
|
||||||
|
1. `upsertPriceObservation` marked `transceivers.price_verified`, but inserted price rows did not set `price_observations.is_verified` or `verified_at`.
|
||||||
|
2. FS.com image extraction still used older selectors. Current FS.com product pages expose product images under `.big_img_box`, `img.big_img`, `.big_img_m_active`, `.big_img_m`, and `.small_img_active`, usually from `resource.fs.com/mall/mainImg/...`.
|
||||||
|
|
||||||
|
## Code Changed
|
||||||
|
|
||||||
|
- `packages/scraper/src/utils/db.ts`
|
||||||
|
- Price observations now set `is_verified = true` and `verified_at` for new observations.
|
||||||
|
- Fresh unchanged observations are backfilled to verified.
|
||||||
|
- `price_verified_at` is maintained.
|
||||||
|
- Image verification now refreshes `image_verified_at`, `image_verified_url`, and `image_scraped_at`.
|
||||||
|
- Existing transceivers now call `markImageVerified` whenever a scraper provides an image URL.
|
||||||
|
|
||||||
|
- `packages/scraper/src/scrapers/fs-com.ts`
|
||||||
|
- Added `TIP_FORCE_REVALIDATE`.
|
||||||
|
- Added `FS_MAX_DETAIL_PAGES_PER_RUN`.
|
||||||
|
- Added `FS_ONLY_MISSING_IMAGES`.
|
||||||
|
- Added URL normalization for FS.com product URLs.
|
||||||
|
- Updated image extraction to prefer current product image DOM and reject default/logo/general/icon/SVG URLs.
|
||||||
|
|
||||||
|
## Live Runs
|
||||||
|
|
||||||
|
All runs were executed sequentially and rate-limited on Erik after CT115 / `tip-scraper` SSH did not respond quickly enough from this session.
|
||||||
|
|
||||||
|
Build:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm -C packages/scraper build
|
||||||
|
```
|
||||||
|
|
||||||
|
Result: passed on `/opt/tip`.
|
||||||
|
|
||||||
|
Flexoptix:
|
||||||
|
|
||||||
|
- 615 products processed.
|
||||||
|
- 615 Flexoptix price observation rows marked verified.
|
||||||
|
- 605 Flexoptix images verified in the run window.
|
||||||
|
|
||||||
|
FS.com full force revalidation:
|
||||||
|
|
||||||
|
- 270 products discovered.
|
||||||
|
- 270 detail pages scraped.
|
||||||
|
- 0 failed detail requests.
|
||||||
|
- 17 new price observations.
|
||||||
|
- 266 FS.com price observations verified after the pass.
|
||||||
|
|
||||||
|
FS.com targeted missing-image pass:
|
||||||
|
|
||||||
|
- 99 DB product URLs without images matched current category listings.
|
||||||
|
- 99 detail pages scraped.
|
||||||
|
- 0 failed detail requests.
|
||||||
|
- FS.com image-verified products increased from 207 to 299.
|
||||||
|
- FS.com verified price observations increased to 271.
|
||||||
|
|
||||||
|
## Final Counters
|
||||||
|
|
||||||
|
Flexoptix:
|
||||||
|
|
||||||
|
- products: 744
|
||||||
|
- product price_verified: 619
|
||||||
|
- product image_verified: 615
|
||||||
|
- price observation rows: 1288
|
||||||
|
- verified price observation rows: 615
|
||||||
|
|
||||||
|
FS.COM:
|
||||||
|
|
||||||
|
- products: 383
|
||||||
|
- product price_verified: 379
|
||||||
|
- product image_verified: 299
|
||||||
|
- price observation rows: 818
|
||||||
|
- verified price observation rows: 271
|
||||||
|
|
||||||
|
## Operations
|
||||||
|
|
||||||
|
- `tip-scraper-daemon` restarted and is online.
|
||||||
|
- `tip-api` remained online.
|
||||||
|
- Erik remained stable; final load around `2.16, 2.22, 2.47`.
|
||||||
|
- External dashboard health curl failed once from local DNS resolution, while PM2 and DB checks were healthy.
|
||||||
|
|
||||||
|
## TIPLLM Training Pool
|
||||||
|
|
||||||
|
The local clone `/tmp/tip-training-data` was recreated from Gitea.
|
||||||
|
|
||||||
|
New records were written to:
|
||||||
|
|
||||||
|
- `robot-experiences/2026-05-09.jsonl`
|
||||||
|
- `qa-pairs/robot-control-high.jsonl`
|
||||||
|
|
||||||
|
Pushed to Gitea:
|
||||||
|
|
||||||
|
```text
|
||||||
|
850083f crawl: add flexoptix fs revalidation learning record
|
||||||
|
```
|
||||||
|
|
||||||
|
## Follow-Up
|
||||||
|
|
||||||
|
- FS.com still has 84 products without `image_verified`; 67 of those had no usable `/products/` URL in the current DB snapshot or were not found in current category listings.
|
||||||
|
- A future robot wave should specifically reconcile FS.com rows with blank/missing `product_page_url`.
|
||||||
|
- For future heavy FS.com work, prefer CT115/Proxmox/Pi once SSH reachability is confirmed; Erik should remain the controller or slow emergency runner only.
|
||||||
Loading…
x
Reference in New Issue
Block a user