crawl: add shopfiber24 parser learning
This commit is contained in:
parent
77ad40c617
commit
a7d4232034
@ -22,3 +22,4 @@
|
|||||||
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:05:00Z","actor":"codex-naddod-infrastructure-classification","profile":"erik-safe-db-only","wave":"naddod-non-transceiver-classification","vendor":"NADDOD","summary":"Classified 18 remaining NADDOD detail-gap rows as switch/network infrastructure or NIC/adapter products based on source/title evidence instead of pretending they were optical transceivers.","input":{"precheck":{"naddod_missing_details":22,"infrastructure_hint_rows":18}},"decision":{"rules":["Switch/Quantum/Spectrum/ONIE/ports source-title evidence => Switch / Network Infrastructure.","Adapter/ConnectX source-title evidence => NIC / Adapter.","Use allowed data_confidence=scraped_unverified and notes to preserve the non-transceiver classification.","Mark details verified only when a source product URL exists.","Do not use these rows as optical transceiver equivalence evidence."],"runtime_policy":"DB-only classification; no crawler wave; no external AI."},"outcome":{"updated":{"naddod_infrastructure_rows":18},"public_health_after":{"details_verified":11466,"fully_verified":10177,"total":17647,"status":"healthy","load_status":"ok","memory_used_pct":12}},"truth_policy":"NADDOD switches/NICs/infrastructure can be source/detail verified, but they are not pluggable optical transceiver equivalents and should be handled in a separate product class later.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}
|
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:05:00Z","actor":"codex-naddod-infrastructure-classification","profile":"erik-safe-db-only","wave":"naddod-non-transceiver-classification","vendor":"NADDOD","summary":"Classified 18 remaining NADDOD detail-gap rows as switch/network infrastructure or NIC/adapter products based on source/title evidence instead of pretending they were optical transceivers.","input":{"precheck":{"naddod_missing_details":22,"infrastructure_hint_rows":18}},"decision":{"rules":["Switch/Quantum/Spectrum/ONIE/ports source-title evidence => Switch / Network Infrastructure.","Adapter/ConnectX source-title evidence => NIC / Adapter.","Use allowed data_confidence=scraped_unverified and notes to preserve the non-transceiver classification.","Mark details verified only when a source product URL exists.","Do not use these rows as optical transceiver equivalence evidence."],"runtime_policy":"DB-only classification; no crawler wave; no external AI."},"outcome":{"updated":{"naddod_infrastructure_rows":18},"public_health_after":{"details_verified":11466,"fully_verified":10177,"total":17647,"status":"healthy","load_status":"ok","memory_used_pct":12}},"truth_policy":"NADDOD switches/NICs/infrastructure can be source/detail verified, but they are not pluggable optical transceiver equivalents and should be handled in a separate product class later.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}
|
||||||
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:11:00Z","actor":"codex-atgbics-cable-aoc-detail-backfill","profile":"erik-safe-db-only","wave":"atgbics-near-complete-cable-aoc-closure","vendor":"ATGBICS","summary":"Backfilled 96 ATGBICS Cable/AOC near-complete rows using deterministic length evidence from product URL/part text. Promoted 109 additional rows to fully_verified.","input":{"precheck":{"atgbics_near_complete_missing_details":581,"source_core_complete_optical":0,"cable_hint_rows":101,"coherent_hint_rows":22}},"decision":{"rules":["Use deterministic length from URL or part text only.","Classify breakout/copper/dac/twinax/base-t/rj45/aoc/active-optical rows as cable classes.","Use wavelengths=N/A for Copper/DAC/Twinax.","Mark details verified only when source URL and deterministic cable length are present.","Leave coherent/ZR/DCO/C-band rows for targeted source-specific parser."],"runtime_policy":"DB-only update; no crawler wave; no external AI."},"outcome":{"updated":{"atgbics_detail_rows":96,"fully_verified_promoted":109},"global_after":{"total":17647,"details_verified":11562,"fully_verified":10286},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"ATGBICS broad JSON refresh is now low-yield for remaining gaps; remaining work requires targeted optical/coherent parser logic rather than repeated catalog refreshes.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}
|
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:11:00Z","actor":"codex-atgbics-cable-aoc-detail-backfill","profile":"erik-safe-db-only","wave":"atgbics-near-complete-cable-aoc-closure","vendor":"ATGBICS","summary":"Backfilled 96 ATGBICS Cable/AOC near-complete rows using deterministic length evidence from product URL/part text. Promoted 109 additional rows to fully_verified.","input":{"precheck":{"atgbics_near_complete_missing_details":581,"source_core_complete_optical":0,"cable_hint_rows":101,"coherent_hint_rows":22}},"decision":{"rules":["Use deterministic length from URL or part text only.","Classify breakout/copper/dac/twinax/base-t/rj45/aoc/active-optical rows as cable classes.","Use wavelengths=N/A for Copper/DAC/Twinax.","Mark details verified only when source URL and deterministic cable length are present.","Leave coherent/ZR/DCO/C-band rows for targeted source-specific parser."],"runtime_policy":"DB-only update; no crawler wave; no external AI."},"outcome":{"updated":{"atgbics_detail_rows":96,"fully_verified_promoted":109},"global_after":{"total":17647,"details_verified":11562,"fully_verified":10286},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"ATGBICS broad JSON refresh is now low-yield for remaining gaps; remaining work requires targeted optical/coherent parser logic rather than repeated catalog refreshes.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}
|
||||||
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:15:00Z","actor":"codex-fibermall-source-title-backfill","profile":"erik-safe-db-only","wave":"fibermall-near-complete-optical-closure","vendor":"FiberMall","summary":"Backfilled 33 FiberMall near-complete optical rows using deterministic reach/fiber/wavelength evidence from product title and URL. Promoted 33 additional rows to fully_verified.","input":{"precheck":{"fibermall_near_complete_missing_details":69,"optical_hint_rows":69,"deterministic_reach_rows":33}},"decision":{"rules":["Use explicit m/km reach only.","Infer fiber type from SMF/MMF/source-title evidence only when missing.","Use explicit nm or safe protocol-family evidence for wavelength when present.","Do not complete rows whose part number is only a brand name unless source title/URL provides deterministic reach."],"runtime_policy":"DB-only update; no crawler wave; no external AI."},"outcome":{"updated":{"fibermall_detail_rows":33,"fully_verified_promoted":33},"global_after":{"total":17647,"details_verified":11595,"fully_verified":10319},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"FiberMall remaining gaps require stronger source parsing; brand-name-only rows must not be blindly marked complete.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}
|
{"event":"db_evidence_backfill","observed_at":"2026-05-09T15:15:00Z","actor":"codex-fibermall-source-title-backfill","profile":"erik-safe-db-only","wave":"fibermall-near-complete-optical-closure","vendor":"FiberMall","summary":"Backfilled 33 FiberMall near-complete optical rows using deterministic reach/fiber/wavelength evidence from product title and URL. Promoted 33 additional rows to fully_verified.","input":{"precheck":{"fibermall_near_complete_missing_details":69,"optical_hint_rows":69,"deterministic_reach_rows":33}},"decision":{"rules":["Use explicit m/km reach only.","Infer fiber type from SMF/MMF/source-title evidence only when missing.","Use explicit nm or safe protocol-family evidence for wavelength when present.","Do not complete rows whose part number is only a brand name unless source title/URL provides deterministic reach."],"runtime_policy":"DB-only update; no crawler wave; no external AI."},"outcome":{"updated":{"fibermall_detail_rows":33,"fully_verified_promoted":33},"global_after":{"total":17647,"details_verified":11595,"fully_verified":10319},"tip_health":{"status":"healthy","load_status":"ok","memory_used_pct":13}},"truth_policy":"FiberMall remaining gaps require stronger source parsing; brand-name-only rows must not be blindly marked complete.","safety_notes":["No external AI was used.","No browser crawler was started.","SSH intermittently refused connections; public health stayed green."]}
|
||||||
|
{"event":"parser_hardening","observed_at":"2026-05-09T15:23:00Z","actor":"codex-shopfiber24-parser-hardening","profile":"local-code-only","wave":"shopfiber24-deterministic-reach-guard","vendor":"ShopFiber24","summary":"Hardened ShopFiber24 reach and speed parsing so variable-length cable families are not falsely detail-verified and 800G/QSFP-DD800 text is not normalized as 400G.","input":{"risk_examples":["variable cable ranges like 1 - 30 m","800G or QSFP-DD800 product text"]},"decision":{"rules":["Reject variable reach ranges such as 1 - 30 m, 1 to 30 m, and 1 bis 30 m for detail verification.","Parse explicit single m/km values as deterministic reach.","Normalize 800G/QSFP-DD800 as 800G/800Gbps rather than 400G."],"runtime_policy":"Local parser patch only; no crawler wave; no external AI."},"outcome":{"code_changed":["packages/scraper/src/scrapers/fiber24.ts"],"verification":{"local_scraper_build":"passed"},"deployment":{"remote_erik":"pending because SSH refused connections"}},"truth_policy":"ShopFiber24 variable cable-family rows require variant-level extraction; do not approve them as one fixed reach product.","safety_notes":["No external AI was used.","No browser crawler was started.","No Erik load was created."]}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user