From 350977e08d110ad2c018fa5f4c74bda6eb80c75a Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 9 May 2026 17:37:38 +0200 Subject: [PATCH] crawl: add fibermall url backfill learning --- robot-experiences/2026-05-09.jsonl | 1 + 1 file changed, 1 insertion(+) diff --git a/robot-experiences/2026-05-09.jsonl b/robot-experiences/2026-05-09.jsonl index 30d88bc..35c0e70 100644 --- a/robot-experiences/2026-05-09.jsonl +++ b/robot-experiences/2026-05-09.jsonl @@ -25,3 +25,4 @@ {"event":"parser_hardening","observed_at":"2026-05-09T15:23:00Z","actor":"codex-shopfiber24-parser-hardening","profile":"local-code-plus-light-rsync","wave":"shopfiber24-deterministic-reach-guard","vendor":"ShopFiber24","summary":"Hardened ShopFiber24 reach and speed parsing so variable-length cable families are not falsely detail-verified and 800G/QSFP-DD800 text is not normalized as 400G.","input":{"risk_examples":["variable cable ranges like 1 - 30 m","800G or QSFP-DD800 product text"]},"decision":{"rules":["Reject variable reach ranges such as 1 - 30 m, 1 to 30 m, and 1 bis 30 m for detail verification.","Parse explicit single m/km values as deterministic reach.","Normalize 800G/QSFP-DD800 as 800G/800Gbps rather than 400G."],"runtime_policy":"Local parser patch plus light rsync/build; no crawler wave; no external AI."},"outcome":{"code_changed":["packages/scraper/src/scrapers/fiber24.ts"],"verification":{"local_scraper_build":"passed","remote_scraper_build":"passed"},"deployment":{"remote_file_synced":true}},"truth_policy":"ShopFiber24 variable cable-family rows require variant-level extraction; do not approve them as one fixed reach product.","safety_notes":["No external AI was used.","No browser crawler was started.","Erik stayed healthy after the small build."]} {"event":"parser_hardening","observed_at":"2026-05-09T15:33:00Z","actor":"codex-atgbics-parser-truth-hardening","profile":"local-code-plus-light-rsync","wave":"atgbics-fiber-default-and-range-guard","vendor":"ATGBICS","summary":"Hardened ATGBICS parser so unknown fiber type no longer defaults to SMF and variable reach ranges are not treated as deterministic reach evidence.","input":{"risk_examples":["unknown fiber text defaulting to SMF","variable reach ranges like 1 - 30 m"]},"decision":{"rules":["Reject variable reach ranges such as 1 - 30 m before generic reach parsing.","Only return SMF from explicit single-mode or protocol-family evidence.","Return empty fiber type when evidence is missing so details verification cannot pass by default."],"runtime_policy":"Local parser patch plus light rsync/build; no crawler wave; no external AI."},"outcome":{"code_changed":["packages/scraper/src/scrapers/atgbics.ts"],"verification":{"local_scraper_build":"passed","remote_scraper_build":"passed"},"deployment":{"remote_file_synced":true}},"truth_policy":"ATGBICS remaining rows need targeted source/protocol evidence; default fiber assumptions must not promote detail verification.","safety_notes":["No external AI was used.","No browser crawler was started.","TIP public health stayed healthy after rsync/build."]} {"event":"db_evidence_backfill","observed_at":"2026-05-09T15:39:00Z","actor":"codex-shopfiber24-deterministic-code-backfill","profile":"erik-safe-db-only","wave":"shopfiber24-safe-code-closure","vendor":"ShopFiber24","summary":"Backfilled 9 ShopFiber24 rows from deterministic product-code evidence while skipping variable cable-family rows. Promoted 9 additional rows to fully_verified.","input":{"precheck":{"shopfiber24_near_complete_missing_details":101,"safe_deterministic_rows":9,"skipped_variable_family_rows":"XM/CXM/CUXM/CXX/AOC/DAC families"}},"decision":{"rules":["LRM means 220m MMF at 1310nm.","BX60/BX-D-60/BX-U-60 means 60km SMF with 1270/1330 BiDi wavelength evidence.","LH70 means 70km SMF at 1550nm.","T-80 means 80m Copper with N/A wavelength.","Do not mark XM/CXM/CUXM/CXX/AOC/DAC family rows as fixed reach variants."],"runtime_policy":"DB-only update; no crawler wave; no external AI."},"outcome":{"updated":{"shopfiber24_detail_rows":9,"fully_verified_promoted":9},"postcheck":{"shopfiber24_near_complete_missing_details":92},"global_after":{"total":17647,"details_verified":11604,"fully_verified":10328},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"Remaining ShopFiber24 gaps need variant-level extraction or direct source parsing; variable cable families are not complete products until the exact configured length is known.","safety_notes":["No external AI was used.","No browser crawler was started.","Erik public health stayed healthy."]} +{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:43:00Z","actor":"codex-fibermall-url-protocol-backfill","profile":"erik-safe-db-only","wave":"fibermall-url-protocol-closure","vendor":"FiberMall","summary":"Backfilled 12 FiberMall rows from protocol evidence in product URL slugs. Promoted 12 additional rows to fully_verified.","input":{"precheck":{"fibermall_near_complete_missing_details":36,"safe_url_protocol_rows":12}},"decision":{"rules":["Use URL protocol slugs such as LRM, LR4, 2FR4, LR8, SR4, ZR, SFP28-LR only when they directly encode form factor/speed/reach/fiber/wavelength.","Skip brand-name-only rows without protocol/reach evidence.","Correct form factor and speed when URL evidence contradicts stale parser defaults."],"runtime_policy":"DB-only update; no crawler wave; no external AI."},"outcome":{"updated":{"fibermall_detail_rows":12,"fully_verified_promoted":12},"postcheck":{"fibermall_near_complete_missing_details":24},"global_after":{"total":17647,"details_verified":11616,"fully_verified":10340},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"Remaining FiberMall rows need stronger product-page parsing; brand/OEM-code-only URL rows must not be auto-approved.","safety_notes":["No external AI was used.","No browser crawler was started.","Erik public health stayed healthy."]}