From 91a1c2282a58c7936b10479b5951a1d6c3a408c3 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 9 May 2026 17:30:08 +0200 Subject: [PATCH] fix: harden atgbics evidence parsing --- packages/scraper/src/scrapers/atgbics.ts | 8 +++-- sync/CURRENT.md | 23 ++++++++++++-- ...26-05-09-atgbics-parser-truth-hardening.md | 31 +++++++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 sync/history/2026-05-09-atgbics-parser-truth-hardening.md diff --git a/packages/scraper/src/scrapers/atgbics.ts b/packages/scraper/src/scrapers/atgbics.ts index c1bc542..1df81c5 100644 --- a/packages/scraper/src/scrapers/atgbics.ts +++ b/packages/scraper/src/scrapers/atgbics.ts @@ -106,6 +106,10 @@ function detectSpeed(text: string, fallbackGbps: number): { speed: string; speed } function detectReach(text: string): { label: string; meters: number } | undefined { + if (/\b\d+(?:[.,]\d+)?\s*(?:-|–|to|bis)\s*\d+(?:[.,]\d+)?\s*(?:m|km)\b/i.test(text)) { + return undefined; + } + const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i); if (generic) { const value = parseFloat(generic[1]); @@ -136,11 +140,11 @@ function detectReach(text: string): { label: string; meters: number } | undefine } function detectFiber(text: string): string { - if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; + if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm|[^a-z]dr\d?[^a-z]?|[^a-z]fr\d?[^a-z]?|psm4|2dr4|2fr4/i.test(text)) return "SMF"; if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper"; if (/aoc|active.?optical/i.test(text)) return "MMF"; - return "SMF"; + return ""; } function detectWavelength(text: string): string { diff --git a/sync/CURRENT.md b/sync/CURRENT.md index 052523b..354ef20 100644 --- a/sync/CURRENT.md +++ b/sync/CURRENT.md @@ -1,9 +1,27 @@ # Current TIP Sync State -Updated: 2026-05-09 15:23 UTC +Updated: 2026-05-09 15:33 UTC ## Newest Work +- ATGBICS parser truth hardening on 2026-05-09: + - root cause: + - ATGBICS parser defaulted unknown fiber type to `SMF` + - automatic detail verification needs positive fiber evidence, not a fallback + - variable-length ranges must not be collapsed into a fixed reach + - code hardened: + - `packages/scraper/src/scrapers/atgbics.ts` + - refuses variable reach ranges such as `1 - 30 m` + - only returns `SMF` from explicit SMF/single-mode or protocol evidence such as LR/ER/ZR/BiDi/CWDM/DWDM/DR/FR/PSM + - returns empty fiber type when evidence is missing instead of assuming SMF + - verification: + - `npm run build -w packages/scraper` passed locally + - deployment: + - source file synced to `/opt/tip` + - `pnpm -C packages/scraper build` passed on Erik after SSH recovered + - truth: + - future ATGBICS runs should not promote rows to detail-verified from default fiber assumptions + - ShopFiber24 parser hardening for deterministic cable/detail verification on 2026-05-09: - root cause: - ShopFiber24 contains variable-length AOC/DAC products such as `1 - 30 m` @@ -17,7 +35,8 @@ Updated: 2026-05-09 15:23 UTC - verification: - `npm run build -w packages/scraper` passed locally - deployment: - - not deployed yet because Erik SSH currently refuses connections + - source file synced to `/opt/tip` + - `pnpm -C packages/scraper build` passed on Erik - truth: - future ShopFiber24 passes should only mark product details verified when reach is deterministic - variable cable-family rows need variant-level extraction instead of broad approval diff --git a/sync/history/2026-05-09-atgbics-parser-truth-hardening.md b/sync/history/2026-05-09-atgbics-parser-truth-hardening.md new file mode 100644 index 0000000..57be8ce --- /dev/null +++ b/sync/history/2026-05-09-atgbics-parser-truth-hardening.md @@ -0,0 +1,31 @@ +# ATGBICS Parser Truth Hardening - 2026-05-09 + +## Context + +ATGBICS still has the largest near-complete detail backlog. The parser must be strict because broad Shopify refreshes can otherwise promote rows based on inferred defaults rather than evidence. + +## Root Cause + +- Unknown fiber type defaulted to `SMF` +- Variable cable/range text could be interpreted as a fixed reach +- Automatic detail verification should require positive source or protocol evidence + +## Code Hardened + +- `packages/scraper/src/scrapers/atgbics.ts` + - refuses variable reach ranges such as `1 - 30 m` + - only returns `SMF` from explicit SMF/single-mode or protocol evidence such as LR/ER/ZR/BiDi/CWDM/DWDM/DR/FR/PSM + - returns empty fiber type when evidence is missing instead of assuming SMF + +## Verification + +- `npm run build -w packages/scraper` passed locally + +## Deployment + +- Source file synced to `/opt/tip` +- `pnpm -C packages/scraper build` passed on Erik after SSH recovered + +## Truth Policy + +Future ATGBICS runs should not promote rows to detail-verified from default fiber assumptions. Remaining ATGBICS rows need targeted optical/coherent parsing or source-specific evidence.