diff --git a/robot-experiences/2026-05-09.jsonl b/robot-experiences/2026-05-09.jsonl index 6baec12..c9ccb1d 100644 --- a/robot-experiences/2026-05-09.jsonl +++ b/robot-experiences/2026-05-09.jsonl @@ -11,3 +11,5 @@ {"event":"crawler_result","observed_at":"2026-05-09T11:58:00Z","actor":"codex-fscom-url-discovery-and-reconcile","profile":"erik-safe-playwright-url-discovery","wave":"manual-targeted-url-discovery","vendor":"FS.COM","summary":"Added and ran FS.com URL-discovery mode for historical FS- rows without product URLs, then reran DB-detail reconciliation with H1-derived detail evidence. This converted almost all FS.com no-url rows into source-linked product rows and substantially increased image/detail coverage.","input":{"user_request":"Continue and finish FS.com/Fiberstore; all crawler/scraper/robot work must go to TIPLLM training pool. Operator also asked whether Crawlee repos can help; use that evaluation to improve safe crawler architecture.","precheck":{"total_fs_rows":383,"price_verified":379,"image_verified":299,"details_verified":260,"price_image_details_verified":260,"fully_verified":205,"missing_url":76,"missing_image_url":84,"missing_reach_label":9,"missing_fiber_type":123,"html_product_like_rows":299,"html_complete":258}},"decision":{"code_changes":["Added FS_URL_DISCOVERY_ONLY mode that maps existing FS- rows without product_page_url to https://www.fs.com/de/products/.html and writes evidence onto the existing DB row instead of creating duplicates.","DB_DETAIL_ONLY rows now carry targetTransceiverId so targeted reconciliation updates the original row.","Image verification is marked for target rows when a current FS.com product image is seen.","Detail verification now accepts deterministic source evidence from H1/part/spec text, not only classical spec tables, when fiber/reach/wavelength/connector can be inferred."],"runtime_policy":"Run URL discovery in safe bounded batches on Erik with Playwright concurrency 1, nice -n 10, no category crawl, and post-run DB measurement."},"outcome":{"url_discovery_pilot":{"target":20,"scraped":19,"failed":0,"new_price_observations":19,"new_stock_observations":17,"specs_verified":3,"post_missing_url":57},"url_discovery_full":{"target":56,"scraped":55,"failed":1,"failed_url":"https://www.fs.com/de/products/229461.html","new_price_observations":55,"new_stock_observations":43,"specs_verified":55},"reconciliation":{"target":57,"scraped":55,"failed":0,"new_price_observations":41,"new_stock_observations":40,"specs_verified":55,"notable_no_text":["https://www.fs.com/de/products/110529.html","https://www.fs.com/de/products/167147.html"]},"postcheck":{"total_fs_rows":383,"price_verified":379,"image_verified":374,"details_verified":373,"price_image_details_verified":373,"fully_verified":205,"missing_url":2,"missing_image_url":9,"missing_reach_label":4,"missing_fiber_type":9,"html_product_like_rows":373,"html_image_verified":372,"html_details_verified":371,"html_complete":371,"no_url_rows":2,"category_rows":4},"tip_health":{"status":"healthy","transceiver_count":17647,"vendor_count":478,"switch_count":680,"price_verified":11557,"image_verified":10711,"details_verified":9929,"fully_verified":8526,"load_status":"ok","memory_used_pct":13}},"safety_notes":["No external AI was used for TIP crawler planning or extraction feedback.","Erik remained healthy; Crawlee AutoscaledPool stayed at concurrency 1 and reported no CPU/memory/event-loop overload.","Do not claim FS.com is complete: 371 of 373 HTML product-like rows are complete, but 2 no-url rows, 4 category rows, and 9 image/fiber gaps remain."]} {"event":"crawler_runtime_integration","observed_at":"2026-05-09T12:12:00Z","actor":"codex-crawlee-binding","profile":"crawlee-core-plus-python-side-worker","wave":"runtime-integration","vendor":"all","summary":"Installed and bound Crawlee runtime work into TIP. TypeScript Crawlee remains the production core; Crawlee-Python was added as an optional isolated evidence worker for Pi/Proxmox/residential nodes.","input":{"user_request":"Install, use and bind apify/crawlee and crawlee-python for TIP.","repos_evaluated":["apify/crawlee","apify/crawlee-python","hiteshchoudhary/crawlee-project"]},"decision":{"code_changes":["Added package scripts for FS.com DB-detail and URL-discovery Crawlee modes.","Added packages/crawlee-python optional worker package.","Added scripts/setup-crawlee-python-worker.sh.","Added docs/TIP_CRAWLEE_RUNTIME.md.","Added package-local .gitignore so Python build artifacts are not committed."],"runtime_policy":"Production writes remain in the TypeScript TIP scraper. Crawlee-Python writes JSONL evidence only and must not directly mutate TIP DB."},"outcome":{"tip_commits":["60531b6 feat: add crawlee python worker integration","49f0871 chore: ignore crawlee python build artifacts"],"python_smoke_test":{"venv":"/tmp/tip-crawlee-python-venv","package":"crawlee==1.6.3","url":"https://crawlee.dev","result":"JSONL evidence output succeeded"},"safety_notes":["Use low concurrency on Erik.","Move heavier experiments to Pi/Proxmox/residential workers.","No external AI was used."]}} {"event":"equivalence_revalidation_result","observed_at":"2026-05-09T12:16:00Z","actor":"codex-immediate-full-equivalence-revalidation","profile":"erik-safe-db-only","wave":"manual-immediate-full-active-research","vendor":"all-active-equivalence-matches","summary":"Revalidated all active approved/auto-approved TIP equivalence matches immediately with deterministic 1:1 DB evidence rules. No crawler wave was started.","input":{"user_request":"Verify and validate everything open in TIP immediately and check whether products truly match 1:1.","precheck":{"pending":0,"approved":1986,"auto_approved":32080,"rejected":148367,"due_research":0,"active_matches":34066,"no_recent_price_gaps":0,"hard_technical_mismatches":0,"missing_critical_evidence":0}},"decision":{"criteria":["recent competitor price evidence","form factor exact","speed exact","fiber type exact","reach ratio >= 0.85","primary wavelength within 15nm"],"runtime_policy":"Mark all active matches due immediately and process with existing PgBoss maintenance:re-research-equivalences worker. Keep work DB-only to protect Erik."},"outcome":{"marked_due":34066,"jobs_queued":18,"jobs_completed":18,"due_research_after":0,"active_researched_today":34066,"rejected_in_immediate_pass":0,"final_queue":{"pending":0,"approved":1986,"auto_approved":32080,"rejected":148367},"verification_counters":{"competitor_verified":11470,"fully_verified":9135,"price_verified":11557,"image_verified":10711,"details_verified":9929,"total":17647},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"Manual equivalence queue is empty and active matches were just rechecked, but not every product row in TIP is fully complete; remaining work is vendor-specific product source enrichment.","safety_notes":["No external AI was used.","No broad browser crawler wave was started.","Continue targeted vendor crawlers or offload heavy discovery to Pi/Proxmox workers."]} +{"event":"crawler_result","observed_at":"2026-05-09T13:45:00Z","actor":"codex-targeted-vendor-verification","profile":"erik-safe-fetch-db-detail","wave":"manual-vendor-verification","vendor":"NADDOD+GAO Tek+Ascent Optics+Eoptolink","summary":"Improved vendor verification by making existing scrapers write product URLs, image evidence and detail evidence, then running low-load static/DB-detail passes. No broad browser crawler wave was used.","input":{"user_request":"Continue until everything is finished, but keep Erik safe and write crawler/scraper/robot learnings to TIPLLM training pool.","precheck":{"NADDOD":{"total":748,"price":744,"image":12,"details":157,"fully":0,"no_url":6},"GAO Tek":{"total":414,"price":2,"image":314,"details":41,"fully":2,"no_url":412},"Ascent Optics":{"total":305,"price":0,"image":0,"details":0,"fully":0,"no_url":305},"Eoptolink":{"total":287,"price":0,"image":0,"details":0,"fully":0,"no_url":287}}},"decision":{"code_changes":["NADDOD DB_DETAIL_ONLY mode verifies existing DB rows with product_page_url and missing image/details, instead of blind sitemap rotation.","NADDOD extracts og:image, page evidence, AOC/DAC lengths, DR/FR/SR/VR/XDR patterns and writes details to the target row.","GAO Tek writes product_page_url and image evidence.","Ascent Optics writes product_page_url and table image evidence.","Eoptolink writes product_page_url, image, reach/wavelength evidence and avoids over-broad form-factor parsing by preferring title/slug evidence."],"runtime_policy":"Run static/fetch and DB-detail passes with nice -n 10 and existing per-page delays. No Playwright wave on Erik."},"outcome":{"GAO Tek":{"processed":473,"details_after":126,"no_url_after":0},"Ascent Optics":{"processed":253,"image_after":235,"details_after":213},"Eoptolink":{"processed_pages":75,"image_after":287,"details_after":287},"NADDOD":{"targeted_pages_processed":968,"price_after":744,"image_after":742,"details_after":659,"fully_after":659,"no_url_after":6},"global_verification":{"price_verified":11557,"image_verified":11963,"details_verified":11018,"fully_verified":9794,"total":17647},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"NADDOD is much improved but not 100% complete. OEM catalogs without retail prices cannot honestly become fully_verified unless price evidence exists.","safety_notes":["No external AI was used.","No broad browser crawler wave was started.","Erik stayed healthy."]} +{"event":"source_correction","observed_at":"2026-05-09T13:54:00Z","actor":"codex-fscom-16t-variant-correction","profile":"source-truth-correction","wave":"operator-spotted-variant-fix","vendor":"FS.COM+Flexoptix","summary":"Corrected FS.com 1.6T OSFP family handling after the operator spotted separate 500m DR8 and 2km 2FR4 variants. The 500m DR8 variant maps to Flexoptix O.1316T.C.05.M; the 2km FR4 variant remains a separate FS.com product.","input":{"user_finding":"FS.com page clearly has OSFP-DR8-1.6T-FL 500m and OSFP-2FR4-1.6T-FL 2km. Flexoptix O.1316T.C.05.M is the 500m version; FR4 must also be in the DB.","precheck":{"FS.COM OSFP-DR8-1.6T-FL":{"reach_label":"500m","reach_meters":500,"wavelengths":null,"equivalence_to_O.1316T.C.05.M":"rejected due missing wavelength"},"FS.COM OSFP-2FR4-1.6T-FL":{"reach_label":"2km","reach_meters":0,"speed":"200G","speed_gbps":1600},"Flexoptix O.1316T.C.05.M":{"reach_label":"500m","reach_meters":500,"wavelengths":"1311"}}},"decision":{"db_corrections":["Set FS OSFP-DR8-1.6T-FL to speed 1.6T, reach 500m/500, SMF, wavelength 1310, standard_name 1.6T OSFP DR8.","Set FS OSFP-2FR4-1.6T-FL to speed 1.6T, reach 2km/2000, SMF, wavelength 1310, standard_name 1.6T OSFP 2FR4.","Set Flexoptix O.1316T.C.05.M standard_name 1.6T OSFP DR8.","Approve only O.1316T.C.05.M to OSFP-DR8-1.6T-FL with confidence 0.913; keep 2FR4 separate."],"scraper_changes":["FS.com scraper recognizes 1,6T, 1.6T and 1600G as 1.6T/1600.","FS.com scraper converts reach labels such as 2km into reach_meters=2000.","FS.com scraper updates stale speed labels when numeric source speed matches the row."]},"outcome":{"postcheck":{"FS.COM OSFP-DR8-1.6T-FL":{"speed":"1.6T","speed_gbps":1600,"reach_label":"500m","reach_meters":500,"fiber_type":"SMF","wavelengths":"1310","standard_name":"1.6T OSFP DR8","fully_verified":true},"FS.COM OSFP-2FR4-1.6T-FL":{"speed":"1.6T","speed_gbps":1600,"reach_label":"2km","reach_meters":2000,"fiber_type":"SMF","wavelengths":"1310","standard_name":"1.6T OSFP 2FR4","fully_verified":true},"equivalence":"O.1316T.C.05.M <-> OSFP-DR8-1.6T-FL approved; no FR4-to-500m collapse"},"remote_build":"pnpm -C packages/scraper build passed on Erik"},"truth_policy":"Variant selectors that change reach/protocol/model are separate products. A 500m DR8 product must never be matched to a 2km 2FR4 product.","safety_notes":["No external AI was used.","Only DB correction and scraper parser hardening were done."]}