crawl: add atgbics wavelength semantics learning

This commit is contained in:
Rene Fichtmueller 2026-05-09 16:41:49 +02:00
parent 1a07245c84
commit accf474d91

View File

@ -16,3 +16,4 @@
{"event":"source_correction","observed_at":"2026-05-09T14:08:00Z","actor":"codex-fscom-qdd-2fr4-comparable-filter-fix","profile":"source-truth-api-filter","wave":"operator-spotted-false-comparable","vendor":"FS.COM+Flexoptix+NADDOD","summary":"Fixed false comparable products for FS.com QDD-2FR4-800G. The dashboard had shown a Flexoptix breakout AOC and a NADDOD 500m product as comparable to a 2km SMF FR4 transceiver because the API treated unknown reach_meters as a wildcard.","input":{"user_finding":"Flexoptix DQ.2A858HG.z is wrong because it is a breakout. QDD-2FR4-800G is 800G QSFP-DD 2km SMF; NADDOD row shown was 500m, also not 1:1.","precheck":{"FS.COM QDD-2FR4-800G":{"reach_label":"2km","reach_meters":0,"fiber_type":"SMF","wavelengths":null},"Flexoptix DQ.2A858HG.z":{"category":"breakout AOC","reach_label":"30m","reach_meters":30,"fiber_type":"MMF"},"NADDOD QDD-800LPO-2DR4 Generic":{"reach_label":"500m","reach_meters":500,"fiber_type":"SMF","wavelengths":"1310"}}},"decision":{"db_corrections":["Set FS.COM QDD-2FR4-800G reach_meters to 2000.","Set FS.COM QDD-2FR4-800G wavelengths to 1310 and standard_name to 800G QSFP-DD 2FR4."],"api_changes":["Comparable products now require known reach on both sides and reach ratio >= 0.85.","Comparable products now require matching known fiber type.","Comparable products now require known primary wavelength on both sides within 15nm.","Breakout/AOC/DAC/cable products can only compare to other breakout/AOC/DAC/cable products.","QSFP-DD and QSFP-DD800 are treated as one 800G form-factor family."],"runtime_policy":"Build and restart API only; no crawler wave."},"outcome":{"postcheck":{"FS.COM QDD-2FR4-800G":{"speed":"800G","speed_gbps":800,"reach_label":"2km","reach_meters":2000,"fiber_type":"SMF","wavelengths":"1310","standard_name":"800G QSFP-DD 2FR4","fully_verified":true}},"deployment":"pnpm -C packages/api build passed on Erik; pm2 restart tip-api completed; public health healthy"},"truth_policy":"Unknown reach/fiber/wavelength must never act as wildcard in final product comparisons. Breakout AOC products must not be shown as equivalent to pluggable FR4/DR optics.","safety_notes":["No external AI was used.","No crawler wave was started.","API-only correction kept Erik stable."]} {"event":"source_correction","observed_at":"2026-05-09T14:08:00Z","actor":"codex-fscom-qdd-2fr4-comparable-filter-fix","profile":"source-truth-api-filter","wave":"operator-spotted-false-comparable","vendor":"FS.COM+Flexoptix+NADDOD","summary":"Fixed false comparable products for FS.com QDD-2FR4-800G. The dashboard had shown a Flexoptix breakout AOC and a NADDOD 500m product as comparable to a 2km SMF FR4 transceiver because the API treated unknown reach_meters as a wildcard.","input":{"user_finding":"Flexoptix DQ.2A858HG.z is wrong because it is a breakout. QDD-2FR4-800G is 800G QSFP-DD 2km SMF; NADDOD row shown was 500m, also not 1:1.","precheck":{"FS.COM QDD-2FR4-800G":{"reach_label":"2km","reach_meters":0,"fiber_type":"SMF","wavelengths":null},"Flexoptix DQ.2A858HG.z":{"category":"breakout AOC","reach_label":"30m","reach_meters":30,"fiber_type":"MMF"},"NADDOD QDD-800LPO-2DR4 Generic":{"reach_label":"500m","reach_meters":500,"fiber_type":"SMF","wavelengths":"1310"}}},"decision":{"db_corrections":["Set FS.COM QDD-2FR4-800G reach_meters to 2000.","Set FS.COM QDD-2FR4-800G wavelengths to 1310 and standard_name to 800G QSFP-DD 2FR4."],"api_changes":["Comparable products now require known reach on both sides and reach ratio >= 0.85.","Comparable products now require matching known fiber type.","Comparable products now require known primary wavelength on both sides within 15nm.","Breakout/AOC/DAC/cable products can only compare to other breakout/AOC/DAC/cable products.","QSFP-DD and QSFP-DD800 are treated as one 800G form-factor family."],"runtime_policy":"Build and restart API only; no crawler wave."},"outcome":{"postcheck":{"FS.COM QDD-2FR4-800G":{"speed":"800G","speed_gbps":800,"reach_label":"2km","reach_meters":2000,"fiber_type":"SMF","wavelengths":"1310","standard_name":"800G QSFP-DD 2FR4","fully_verified":true}},"deployment":"pnpm -C packages/api build passed on Erik; pm2 restart tip-api completed; public health healthy"},"truth_policy":"Unknown reach/fiber/wavelength must never act as wildcard in final product comparisons. Breakout AOC products must not be shown as equivalent to pluggable FR4/DR optics.","safety_notes":["No external AI was used.","No crawler wave was started.","API-only correction kept Erik stable."]}
{"event":"equivalence_revalidation_result","observed_at":"2026-05-09T14:22:00Z","actor":"codex-strict-active-match-sweep","profile":"erik-safe-db-only","wave":"global-active-equivalence-risk-sweep","vendor":"all-active-equivalence-matches","summary":"Ran a strict DB-only sweep over active approved/auto-approved equivalence matches after an operator-spotted false comparable. Rejected 16 remaining active false positives and backfilled numeric reach evidence so unknown reach can no longer mask mismatches.","input":{"trigger":"Operator spotted false comparable products in the dashboard; follow-up sweep searched for similar active risks.","precheck":{"active_matches":34067,"breakout_class_mismatch":13,"reach_mismatch":3,"fiber_mismatch":1,"wavelength_mismatch":1,"missing_core_evidence":0}},"decision":{"criteria":["breakout/AOC/DAC/cable class must match unless both sides are non-cable optics","known reach must be close enough and numeric reach_meters should be populated from source labels where deterministic","known fiber types must match exactly","known primary wavelengths must be within 15nm","unknown reach/fiber/wavelength must not act as final approval wildcard"],"runtime_policy":"DB-only correction and measurement on Erik; no browser crawler wave; no external AI."},"outcome":{"rejected_active_false_positives":16,"demoted_flexoptix_rows":0,"reach_meter_backfill":{"km_labels_updated":269,"m_labels_updated":131,"remaining_without_meters":"6 N/A accessory/control rows"},"postcheck":{"active_matches":34051,"breakout_class_mismatch":0,"reach_mismatch":0,"fiber_mismatch":0,"wavelength_mismatch":0,"missing_core_evidence":0},"final_queue":{"pending":0,"approved":1987,"auto_approved":32064,"rejected":148382,"due_research":0},"verification_counters":{"total":17647,"price_verified":11557,"image_verified":11963,"details_verified":11085,"fully_verified":9861}},"truth_policy":"Active equivalence matches have zero known hard 1:1 mismatches by DB evidence after the sweep, but product-level vendor enrichment is still ongoing and must not be represented as 100% complete.","safety_notes":["No external AI was used.","No crawler wave was started.","Erik stayed protected by DB-only work."]} {"event":"equivalence_revalidation_result","observed_at":"2026-05-09T14:22:00Z","actor":"codex-strict-active-match-sweep","profile":"erik-safe-db-only","wave":"global-active-equivalence-risk-sweep","vendor":"all-active-equivalence-matches","summary":"Ran a strict DB-only sweep over active approved/auto-approved equivalence matches after an operator-spotted false comparable. Rejected 16 remaining active false positives and backfilled numeric reach evidence so unknown reach can no longer mask mismatches.","input":{"trigger":"Operator spotted false comparable products in the dashboard; follow-up sweep searched for similar active risks.","precheck":{"active_matches":34067,"breakout_class_mismatch":13,"reach_mismatch":3,"fiber_mismatch":1,"wavelength_mismatch":1,"missing_core_evidence":0}},"decision":{"criteria":["breakout/AOC/DAC/cable class must match unless both sides are non-cable optics","known reach must be close enough and numeric reach_meters should be populated from source labels where deterministic","known fiber types must match exactly","known primary wavelengths must be within 15nm","unknown reach/fiber/wavelength must not act as final approval wildcard"],"runtime_policy":"DB-only correction and measurement on Erik; no browser crawler wave; no external AI."},"outcome":{"rejected_active_false_positives":16,"demoted_flexoptix_rows":0,"reach_meter_backfill":{"km_labels_updated":269,"m_labels_updated":131,"remaining_without_meters":"6 N/A accessory/control rows"},"postcheck":{"active_matches":34051,"breakout_class_mismatch":0,"reach_mismatch":0,"fiber_mismatch":0,"wavelength_mismatch":0,"missing_core_evidence":0},"final_queue":{"pending":0,"approved":1987,"auto_approved":32064,"rejected":148382,"due_research":0},"verification_counters":{"total":17647,"price_verified":11557,"image_verified":11963,"details_verified":11085,"fully_verified":9861}},"truth_policy":"Active equivalence matches have zero known hard 1:1 mismatches by DB evidence after the sweep, but product-level vendor enrichment is still ongoing and must not be represented as 100% complete.","safety_notes":["No external AI was used.","No crawler wave was started.","Erik stayed protected by DB-only work."]}
{"event":"db_evidence_backfill","observed_at":"2026-05-09T14:28:00Z","actor":"codex-highspeed-wavelength-backfill","profile":"erik-safe-db-only","wave":"deterministic-product-evidence-backfill","vendor":"all-highspeed-transceivers","summary":"Backfilled 187 missing highspeed wavelength fields from existing deterministic DB evidence without a crawler wave. Rules required explicit nm text or a known optical family code plus matching fiber class.","input":{"precheck":{"highspeed_rows":4438,"missing_wavelengths":2095},"evidence_sources":["part_number","standard_name","notes","product_page_url"]},"decision":{"rules":["explicit 850nm => 850","explicit 1310nm or 1311nm => 1310","explicit 1550nm => 1550","MMF with SR/SR4/SR8/SRBD/VR/ESR/CSR family => 850","SMF with DR/FR/LR/ER/PSM family => 1310","SMF with CWDM4 => 1271,1291,1311,1331"],"skip_policy":"Do not fill ambiguous rows; do not use unknown fiber class for inference; do not mark full source verification from this alone.","runtime_policy":"Single DB-only UPDATE on Erik; no browser crawler; no external AI."},"outcome":{"updated":{"1310":129,"850":40,"1271,1291,1311,1331":18,"total":187},"postcheck":{"highspeed_rows":4438,"missing_wavelengths":1908,"largest_remaining_gaps":{"ATGBICS":663,"NADDOD":419,"Flexoptix":183,"Eoptolink":141,"FS.COM":114,"QSFPTEK":97}},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"This is technical evidence enrichment, not a complete vendor-source verification claim. Remaining gaps need vendor-specific parser improvements or targeted crawls.","safety_notes":["No external AI was used.","No crawler wave was started.","Erik stayed healthy."]} {"event":"db_evidence_backfill","observed_at":"2026-05-09T14:28:00Z","actor":"codex-highspeed-wavelength-backfill","profile":"erik-safe-db-only","wave":"deterministic-product-evidence-backfill","vendor":"all-highspeed-transceivers","summary":"Backfilled 187 missing highspeed wavelength fields from existing deterministic DB evidence without a crawler wave. Rules required explicit nm text or a known optical family code plus matching fiber class.","input":{"precheck":{"highspeed_rows":4438,"missing_wavelengths":2095},"evidence_sources":["part_number","standard_name","notes","product_page_url"]},"decision":{"rules":["explicit 850nm => 850","explicit 1310nm or 1311nm => 1310","explicit 1550nm => 1550","MMF with SR/SR4/SR8/SRBD/VR/ESR/CSR family => 850","SMF with DR/FR/LR/ER/PSM family => 1310","SMF with CWDM4 => 1271,1291,1311,1331"],"skip_policy":"Do not fill ambiguous rows; do not use unknown fiber class for inference; do not mark full source verification from this alone.","runtime_policy":"Single DB-only UPDATE on Erik; no browser crawler; no external AI."},"outcome":{"updated":{"1310":129,"850":40,"1271,1291,1311,1331":18,"total":187},"postcheck":{"highspeed_rows":4438,"missing_wavelengths":1908,"largest_remaining_gaps":{"ATGBICS":663,"NADDOD":419,"Flexoptix":183,"Eoptolink":141,"FS.COM":114,"QSFPTEK":97}},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"This is technical evidence enrichment, not a complete vendor-source verification claim. Remaining gaps need vendor-specific parser improvements or targeted crawls.","safety_notes":["No external AI was used.","No crawler wave was started.","Erik stayed healthy."]}
{"event":"crawler_result","observed_at":"2026-05-09T14:39:00Z","actor":"codex-atgbics-json-rerun-and-copper-wavelength-semantics","profile":"erik-safe-shopify-json","wave":"targeted-atgbics-light-rerun-plus-db-truth-correction","vendor":"ATGBICS+all-copper-products","summary":"Hardened ATGBICS wavelength detection and ran a low-load Shopify products.json pass. The run confirmed prices/images but did not reduce ATGBICS technical gaps; a separate DB truth correction set Copper/DAC wavelength semantics to N/A for 1044 rows.","input":{"precheck":{"highspeed_missing_wavelengths_after_previous_backfill":1908,"highspeed_copper_missing_wavelengths":548,"atgbics":{"total":8269,"price":8241,"image":8257,"details":7435,"fully":7428,"highspeed_missing_wavelengths":663}}},"decision":{"code_changes":["ATGBICS detects N/A wavelength for Copper/DAC/Twinax/Base-T/RJ45.","ATGBICS detects CWDM4 as 1271,1291,1311,1331.","ATGBICS detects SR-family as 850 and DR/FR/LR/ER/PSM-family as 1310 when explicit protocol code is present."],"runtime_policy":"Run one ATGBICS Shopify products.json pass with nice -n 10; no Playwright/browser; stop if health degrades.","truth_policy":"Copper/DAC products do not have optical wavelengths and should use N/A instead of being counted as unresolved optical wavelength gaps."},"outcome":{"remote_build":"pnpm -C packages/scraper build passed on active /opt/tip","atgbics_run":{"products_processed":7946,"price_updates":61,"image_updates":7943,"result":"counters unchanged for technical completeness"},"atgbics_postcheck":{"total":8269,"price":8241,"image":8257,"details":7435,"fully":7428,"highspeed_missing_wavelengths":663},"db_truth_correction":{"copper_rows_set_wavelength_na":1044,"highspeed_missing_wavelengths_after":1360,"highspeed_copper_missing_after":0,"highspeed_optical_missing_after":1220},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":14}},"next_best_action":"Do targeted ATGBICS parser/classifier work for ZR, DCO, C-band, LAN-WDM, CR8, breakout and OSFP/QSFP-DD cable form-factor correction instead of repeating broad JSON runs.","safety_notes":["No external AI was used.","No browser crawler wave was started.","Erik stayed healthy although SSH briefly refused connections twice; public health stayed green."]}