crawl: add atgbics cable backfill learning

This commit is contained in:
Rene Fichtmueller 2026-05-09 17:13:28 +02:00
parent 4bc7592d2c
commit 7c0a55fd60

View File

@ -20,3 +20,4 @@
{"event":"db_evidence_backfill","observed_at":"2026-05-09T14:54:00Z","actor":"codex-copper-dac-reach-details-and-api-semantics","profile":"erik-safe-db-plus-api-deploy","wave":"copper-dac-truth-correction","vendor":"all-copper-products+ATGBICS","summary":"Filled deterministic Copper/DAC/Twinax cable lengths, marked source-backed cable details, corrected ATGBICS OSFP cable form factors, and updated comparable-product API logic so Copper/DAC products can compare with wavelengths=N/A while optical products still require numeric wavelength evidence.","input":{"precheck":{"copper_missing_reach_label":464,"copper_missing_reach_meters":467,"copper_missing_details":498,"global_details_verified":11085,"global_fully_verified":9861}},"decision":{"rules":["Only write cable reach when length is deterministic from part_number or product_page_url.","Use wavelengths=N/A for Copper/DAC/Twinax/CU products.","Mark details verified only when product_page_url exists and core technical fields are present.","Correct ATGBICS OSFP cable rows that were parsed as SFP.","Comparable API may compare Copper/DAC/CU with N/A wavelengths, but optical comparisons still require numeric wavelength match."],"runtime_policy":"DB-only update plus small API/scraper code deploy; no crawler wave; no external AI."},"outcome":{"updated":{"copper_reach_detail_rows":342,"atgbics_osfp_form_factor_rows":78,"fully_verified_promoted":310},"postcheck":{"copper_missing_reach_label":122,"copper_missing_reach_meters":125,"copper_missing_details":158,"global_details_verified":11425,"global_fully_verified":10170,"selected_vendors":{"ATGBICS":{"details":"7656/8269","fully":"7646/8269"},"NADDOD":{"details":"726/748","fully":"726/748"},"QSFPTEK":{"details":"165/201","fully":"140/201"},"FS.COM":{"details":"373/383","fully":"300/383"},"Flexoptix":{"details":"626/744","fully":"622/744"},"GAO Tek":{"details":"127/414","fully":"2/414"}}},"deployment":{"scraper_build":"passed","api_build":"passed","tip_api_restart":"online"},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"Copper/DAC/Twinax products are complete only when their cable length and endpoint/form-factor evidence are present; do not invent optical wavelengths for them.","safety_notes":["No external AI was used.","No browser crawler was started.","Erik public health stayed healthy; SSH intermittently refused connections and work paused during those windows."]}
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:02:00Z","actor":"codex-qsfptek-cable-aoc-parser-and-backfill","profile":"erik-safe-db-plus-parser","wave":"qsfptek-detail-gap-closure","vendor":"QSFPTEK","summary":"Fixed QSFPTEK parser so product URLs and generic cable lengths are preserved, then backfilled 28 deterministic cable/AOC rows with source-backed details. Eight additional rows became fully verified.","input":{"precheck":{"qsfptek_missing_details":36,"qsfptek_rows_with_source_url":36,"deterministic_leading_length_rows":28}},"decision":{"code_changes":["Pass productUrl to findOrCreateScrapedTransceiver.","Parse generic m/km reach including leading cable lengths.","Classify MFS/AOC/active fiber as AOC Cable.","Classify MCP/DAC/Copper/Twinax as Cable.","Use wavelengths=N/A for Copper/DAC.","Add safe protocol-family wavelength parsing for future QSFPTEK runs."],"runtime_policy":"DB-only backfill for existing rows plus scraper source patch; no broad crawler run; no external AI."},"outcome":{"updated":{"qsfptek_detail_rows":28,"fully_verified_promoted":8},"deployment":{"scraper_source_synced":true,"scraper_build":"passed"}},"truth_policy":"Only source-backed rows with deterministic length were marked details verified; remaining QSFPTEK rows need source parsing for fiber/reach and should not be guessed.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections, so remote actions were paused during blocked windows."]}
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:05:00Z","actor":"codex-naddod-infrastructure-classification","profile":"erik-safe-db-only","wave":"naddod-non-transceiver-classification","vendor":"NADDOD","summary":"Classified 18 remaining NADDOD detail-gap rows as switch/network infrastructure or NIC/adapter products based on source/title evidence instead of pretending they were optical transceivers.","input":{"precheck":{"naddod_missing_details":22,"infrastructure_hint_rows":18}},"decision":{"rules":["Switch/Quantum/Spectrum/ONIE/ports source-title evidence => Switch / Network Infrastructure.","Adapter/ConnectX source-title evidence => NIC / Adapter.","Use allowed data_confidence=scraped_unverified and notes to preserve the non-transceiver classification.","Mark details verified only when a source product URL exists.","Do not use these rows as optical transceiver equivalence evidence."],"runtime_policy":"DB-only classification; no crawler wave; no external AI."},"outcome":{"updated":{"naddod_infrastructure_rows":18},"public_health_after":{"details_verified":11466,"fully_verified":10177,"total":17647,"status":"healthy","load_status":"ok","memory_used_pct":12}},"truth_policy":"NADDOD switches/NICs/infrastructure can be source/detail verified, but they are not pluggable optical transceiver equivalents and should be handled in a separate product class later.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:11:00Z","actor":"codex-atgbics-cable-aoc-detail-backfill","profile":"erik-safe-db-only","wave":"atgbics-near-complete-cable-aoc-closure","vendor":"ATGBICS","summary":"Backfilled 96 ATGBICS Cable/AOC near-complete rows using deterministic length evidence from product URL/part text. Promoted 109 additional rows to fully_verified.","input":{"precheck":{"atgbics_near_complete_missing_details":581,"source_core_complete_optical":0,"cable_hint_rows":101,"coherent_hint_rows":22}},"decision":{"rules":["Use deterministic length from URL or part text only.","Classify breakout/copper/dac/twinax/base-t/rj45/aoc/active-optical rows as cable classes.","Use wavelengths=N/A for Copper/DAC/Twinax.","Mark details verified only when source URL and deterministic cable length are present.","Leave coherent/ZR/DCO/C-band rows for targeted source-specific parser."],"runtime_policy":"DB-only update; no crawler wave; no external AI."},"outcome":{"updated":{"atgbics_detail_rows":96,"fully_verified_promoted":109},"global_after":{"total":17647,"details_verified":11562,"fully_verified":10286},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"ATGBICS broad JSON refresh is now low-yield for remaining gaps; remaining work requires targeted optical/coherent parser logic rather than repeated catalog refreshes.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}