fix: harden atgbics evidence parsing

This commit is contained in:
Rene Fichtmueller 2026-05-09 17:30:08 +02:00
parent c2421c03a3
commit 91a1c2282a
3 changed files with 58 additions and 4 deletions

View File

@ -106,6 +106,10 @@ function detectSpeed(text: string, fallbackGbps: number): { speed: string; speed
}
function detectReach(text: string): { label: string; meters: number } | undefined {
if (/\b\d+(?:[.,]\d+)?\s*(?:-||to|bis)\s*\d+(?:[.,]\d+)?\s*(?:m|km)\b/i.test(text)) {
return undefined;
}
const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i);
if (generic) {
const value = parseFloat(generic[1]);
@ -136,11 +140,11 @@ function detectReach(text: string): { label: string; meters: number } | undefine
}
function detectFiber(text: string): string {
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm|[^a-z]dr\d?[^a-z]?|[^a-z]fr\d?[^a-z]?|psm4|2dr4|2fr4/i.test(text)) return "SMF";
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
if (/aoc|active.?optical/i.test(text)) return "MMF";
return "SMF";
return "";
}
function detectWavelength(text: string): string {

View File

@ -1,9 +1,27 @@
# Current TIP Sync State
Updated: 2026-05-09 15:23 UTC
Updated: 2026-05-09 15:33 UTC
## Newest Work
- ATGBICS parser truth hardening on 2026-05-09:
- root cause:
- ATGBICS parser defaulted unknown fiber type to `SMF`
- automatic detail verification needs positive fiber evidence, not a fallback
- variable-length ranges must not be collapsed into a fixed reach
- code hardened:
- `packages/scraper/src/scrapers/atgbics.ts`
- refuses variable reach ranges such as `1 - 30 m`
- only returns `SMF` from explicit SMF/single-mode or protocol evidence such as LR/ER/ZR/BiDi/CWDM/DWDM/DR/FR/PSM
- returns empty fiber type when evidence is missing instead of assuming SMF
- verification:
- `npm run build -w packages/scraper` passed locally
- deployment:
- source file synced to `/opt/tip`
- `pnpm -C packages/scraper build` passed on Erik after SSH recovered
- truth:
- future ATGBICS runs should not promote rows to detail-verified from default fiber assumptions
- ShopFiber24 parser hardening for deterministic cable/detail verification on 2026-05-09:
- root cause:
- ShopFiber24 contains variable-length AOC/DAC products such as `1 - 30 m`
@ -17,7 +35,8 @@ Updated: 2026-05-09 15:23 UTC
- verification:
- `npm run build -w packages/scraper` passed locally
- deployment:
- not deployed yet because Erik SSH currently refuses connections
- source file synced to `/opt/tip`
- `pnpm -C packages/scraper build` passed on Erik
- truth:
- future ShopFiber24 passes should only mark product details verified when reach is deterministic
- variable cable-family rows need variant-level extraction instead of broad approval

View File

@ -0,0 +1,31 @@
# ATGBICS Parser Truth Hardening - 2026-05-09
## Context
ATGBICS still has the largest near-complete detail backlog. The parser must be strict because broad Shopify refreshes can otherwise promote rows based on inferred defaults rather than evidence.
## Root Cause
- Unknown fiber type defaulted to `SMF`
- Variable cable/range text could be interpreted as a fixed reach
- Automatic detail verification should require positive source or protocol evidence
## Code Hardened
- `packages/scraper/src/scrapers/atgbics.ts`
- refuses variable reach ranges such as `1 - 30 m`
- only returns `SMF` from explicit SMF/single-mode or protocol evidence such as LR/ER/ZR/BiDi/CWDM/DWDM/DR/FR/PSM
- returns empty fiber type when evidence is missing instead of assuming SMF
## Verification
- `npm run build -w packages/scraper` passed locally
## Deployment
- Source file synced to `/opt/tip`
- `pnpm -C packages/scraper build` passed on Erik after SSH recovered
## Truth Policy
Future ATGBICS runs should not promote rows to detail-verified from default fiber assumptions. Remaining ATGBICS rows need targeted optical/coherent parsing or source-specific evidence.