From 2f885123bdd90ec5d7e96a27ffa3ac16b0c60cb9 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 9 May 2026 17:04:01 +0200 Subject: [PATCH] crawl: add qsfptek cable verification learning --- robot-experiences/2026-05-09.jsonl | 1 + 1 file changed, 1 insertion(+) diff --git a/robot-experiences/2026-05-09.jsonl b/robot-experiences/2026-05-09.jsonl index cb23fec..0668527 100644 --- a/robot-experiences/2026-05-09.jsonl +++ b/robot-experiences/2026-05-09.jsonl @@ -18,3 +18,4 @@ {"event":"db_evidence_backfill","observed_at":"2026-05-09T14:28:00Z","actor":"codex-highspeed-wavelength-backfill","profile":"erik-safe-db-only","wave":"deterministic-product-evidence-backfill","vendor":"all-highspeed-transceivers","summary":"Backfilled 187 missing highspeed wavelength fields from existing deterministic DB evidence without a crawler wave. Rules required explicit nm text or a known optical family code plus matching fiber class.","input":{"precheck":{"highspeed_rows":4438,"missing_wavelengths":2095},"evidence_sources":["part_number","standard_name","notes","product_page_url"]},"decision":{"rules":["explicit 850nm => 850","explicit 1310nm or 1311nm => 1310","explicit 1550nm => 1550","MMF with SR/SR4/SR8/SRBD/VR/ESR/CSR family => 850","SMF with DR/FR/LR/ER/PSM family => 1310","SMF with CWDM4 => 1271,1291,1311,1331"],"skip_policy":"Do not fill ambiguous rows; do not use unknown fiber class for inference; do not mark full source verification from this alone.","runtime_policy":"Single DB-only UPDATE on Erik; no browser crawler; no external AI."},"outcome":{"updated":{"1310":129,"850":40,"1271,1291,1311,1331":18,"total":187},"postcheck":{"highspeed_rows":4438,"missing_wavelengths":1908,"largest_remaining_gaps":{"ATGBICS":663,"NADDOD":419,"Flexoptix":183,"Eoptolink":141,"FS.COM":114,"QSFPTEK":97}},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"This is technical evidence enrichment, not a complete vendor-source verification claim. Remaining gaps need vendor-specific parser improvements or targeted crawls.","safety_notes":["No external AI was used.","No crawler wave was started.","Erik stayed healthy."]} {"event":"crawler_result","observed_at":"2026-05-09T14:39:00Z","actor":"codex-atgbics-json-rerun-and-copper-wavelength-semantics","profile":"erik-safe-shopify-json","wave":"targeted-atgbics-light-rerun-plus-db-truth-correction","vendor":"ATGBICS+all-copper-products","summary":"Hardened ATGBICS wavelength detection and ran a low-load Shopify products.json pass. The run confirmed prices/images but did not reduce ATGBICS technical gaps; a separate DB truth correction set Copper/DAC wavelength semantics to N/A for 1044 rows.","input":{"precheck":{"highspeed_missing_wavelengths_after_previous_backfill":1908,"highspeed_copper_missing_wavelengths":548,"atgbics":{"total":8269,"price":8241,"image":8257,"details":7435,"fully":7428,"highspeed_missing_wavelengths":663}}},"decision":{"code_changes":["ATGBICS detects N/A wavelength for Copper/DAC/Twinax/Base-T/RJ45.","ATGBICS detects CWDM4 as 1271,1291,1311,1331.","ATGBICS detects SR-family as 850 and DR/FR/LR/ER/PSM-family as 1310 when explicit protocol code is present."],"runtime_policy":"Run one ATGBICS Shopify products.json pass with nice -n 10; no Playwright/browser; stop if health degrades.","truth_policy":"Copper/DAC products do not have optical wavelengths and should use N/A instead of being counted as unresolved optical wavelength gaps."},"outcome":{"remote_build":"pnpm -C packages/scraper build passed on active /opt/tip","atgbics_run":{"products_processed":7946,"price_updates":61,"image_updates":7943,"result":"counters unchanged for technical completeness"},"atgbics_postcheck":{"total":8269,"price":8241,"image":8257,"details":7435,"fully":7428,"highspeed_missing_wavelengths":663},"db_truth_correction":{"copper_rows_set_wavelength_na":1044,"highspeed_missing_wavelengths_after":1360,"highspeed_copper_missing_after":0,"highspeed_optical_missing_after":1220},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":14}},"next_best_action":"Do targeted ATGBICS parser/classifier work for ZR, DCO, C-band, LAN-WDM, CR8, breakout and OSFP/QSFP-DD cable form-factor correction instead of repeating broad JSON runs.","safety_notes":["No external AI was used.","No browser crawler wave was started.","Erik stayed healthy although SSH briefly refused connections twice; public health stayed green."]} {"event":"db_evidence_backfill","observed_at":"2026-05-09T14:54:00Z","actor":"codex-copper-dac-reach-details-and-api-semantics","profile":"erik-safe-db-plus-api-deploy","wave":"copper-dac-truth-correction","vendor":"all-copper-products+ATGBICS","summary":"Filled deterministic Copper/DAC/Twinax cable lengths, marked source-backed cable details, corrected ATGBICS OSFP cable form factors, and updated comparable-product API logic so Copper/DAC products can compare with wavelengths=N/A while optical products still require numeric wavelength evidence.","input":{"precheck":{"copper_missing_reach_label":464,"copper_missing_reach_meters":467,"copper_missing_details":498,"global_details_verified":11085,"global_fully_verified":9861}},"decision":{"rules":["Only write cable reach when length is deterministic from part_number or product_page_url.","Use wavelengths=N/A for Copper/DAC/Twinax/CU products.","Mark details verified only when product_page_url exists and core technical fields are present.","Correct ATGBICS OSFP cable rows that were parsed as SFP.","Comparable API may compare Copper/DAC/CU with N/A wavelengths, but optical comparisons still require numeric wavelength match."],"runtime_policy":"DB-only update plus small API/scraper code deploy; no crawler wave; no external AI."},"outcome":{"updated":{"copper_reach_detail_rows":342,"atgbics_osfp_form_factor_rows":78,"fully_verified_promoted":310},"postcheck":{"copper_missing_reach_label":122,"copper_missing_reach_meters":125,"copper_missing_details":158,"global_details_verified":11425,"global_fully_verified":10170,"selected_vendors":{"ATGBICS":{"details":"7656/8269","fully":"7646/8269"},"NADDOD":{"details":"726/748","fully":"726/748"},"QSFPTEK":{"details":"165/201","fully":"140/201"},"FS.COM":{"details":"373/383","fully":"300/383"},"Flexoptix":{"details":"626/744","fully":"622/744"},"GAO Tek":{"details":"127/414","fully":"2/414"}}},"deployment":{"scraper_build":"passed","api_build":"passed","tip_api_restart":"online"},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"Copper/DAC/Twinax products are complete only when their cable length and endpoint/form-factor evidence are present; do not invent optical wavelengths for them.","safety_notes":["No external AI was used.","No browser crawler was started.","Erik public health stayed healthy; SSH intermittently refused connections and work paused during those windows."]} +{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:02:00Z","actor":"codex-qsfptek-cable-aoc-parser-and-backfill","profile":"erik-safe-db-plus-parser","wave":"qsfptek-detail-gap-closure","vendor":"QSFPTEK","summary":"Fixed QSFPTEK parser so product URLs and generic cable lengths are preserved, then backfilled 28 deterministic cable/AOC rows with source-backed details. Eight additional rows became fully verified.","input":{"precheck":{"qsfptek_missing_details":36,"qsfptek_rows_with_source_url":36,"deterministic_leading_length_rows":28}},"decision":{"code_changes":["Pass productUrl to findOrCreateScrapedTransceiver.","Parse generic m/km reach including leading cable lengths.","Classify MFS/AOC/active fiber as AOC Cable.","Classify MCP/DAC/Copper/Twinax as Cable.","Use wavelengths=N/A for Copper/DAC.","Add safe protocol-family wavelength parsing for future QSFPTEK runs."],"runtime_policy":"DB-only backfill for existing rows plus scraper source patch; no broad crawler run; no external AI."},"outcome":{"updated":{"qsfptek_detail_rows":28,"fully_verified_promoted":8},"deployment":{"scraper_source_synced":true,"scraper_build":"passed"}},"truth_policy":"Only source-backed rows with deterministic length were marked details verified; remaining QSFPTEK rows need source parsing for fiber/reach and should not be guessed.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections, so remote actions were paused during blocked windows."]}