fix: harden atgbics evidence parsing
This commit is contained in:
parent
c2421c03a3
commit
91a1c2282a
@ -106,6 +106,10 @@ function detectSpeed(text: string, fallbackGbps: number): { speed: string; speed
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
if (/\b\d+(?:[.,]\d+)?\s*(?:-|–|to|bis)\s*\d+(?:[.,]\d+)?\s*(?:m|km)\b/i.test(text)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i);
|
||||
if (generic) {
|
||||
const value = parseFloat(generic[1]);
|
||||
@ -136,11 +140,11 @@ function detectReach(text: string): { label: string; meters: number } | undefine
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm|[^a-z]dr\d?[^a-z]?|[^a-z]fr\d?[^a-z]?|psm4|2dr4|2fr4/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
|
||||
if (/aoc|active.?optical/i.test(text)) return "MMF";
|
||||
return "SMF";
|
||||
return "";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
|
||||
@ -1,9 +1,27 @@
|
||||
# Current TIP Sync State
|
||||
|
||||
Updated: 2026-05-09 15:23 UTC
|
||||
Updated: 2026-05-09 15:33 UTC
|
||||
|
||||
## Newest Work
|
||||
|
||||
- ATGBICS parser truth hardening on 2026-05-09:
|
||||
- root cause:
|
||||
- ATGBICS parser defaulted unknown fiber type to `SMF`
|
||||
- automatic detail verification needs positive fiber evidence, not a fallback
|
||||
- variable-length ranges must not be collapsed into a fixed reach
|
||||
- code hardened:
|
||||
- `packages/scraper/src/scrapers/atgbics.ts`
|
||||
- refuses variable reach ranges such as `1 - 30 m`
|
||||
- only returns `SMF` from explicit SMF/single-mode or protocol evidence such as LR/ER/ZR/BiDi/CWDM/DWDM/DR/FR/PSM
|
||||
- returns empty fiber type when evidence is missing instead of assuming SMF
|
||||
- verification:
|
||||
- `npm run build -w packages/scraper` passed locally
|
||||
- deployment:
|
||||
- source file synced to `/opt/tip`
|
||||
- `pnpm -C packages/scraper build` passed on Erik after SSH recovered
|
||||
- truth:
|
||||
- future ATGBICS runs should not promote rows to detail-verified from default fiber assumptions
|
||||
|
||||
- ShopFiber24 parser hardening for deterministic cable/detail verification on 2026-05-09:
|
||||
- root cause:
|
||||
- ShopFiber24 contains variable-length AOC/DAC products such as `1 - 30 m`
|
||||
@ -17,7 +35,8 @@ Updated: 2026-05-09 15:23 UTC
|
||||
- verification:
|
||||
- `npm run build -w packages/scraper` passed locally
|
||||
- deployment:
|
||||
- not deployed yet because Erik SSH currently refuses connections
|
||||
- source file synced to `/opt/tip`
|
||||
- `pnpm -C packages/scraper build` passed on Erik
|
||||
- truth:
|
||||
- future ShopFiber24 passes should only mark product details verified when reach is deterministic
|
||||
- variable cable-family rows need variant-level extraction instead of broad approval
|
||||
|
||||
31
sync/history/2026-05-09-atgbics-parser-truth-hardening.md
Normal file
31
sync/history/2026-05-09-atgbics-parser-truth-hardening.md
Normal file
@ -0,0 +1,31 @@
|
||||
# ATGBICS Parser Truth Hardening - 2026-05-09
|
||||
|
||||
## Context
|
||||
|
||||
ATGBICS still has the largest near-complete detail backlog. The parser must be strict because broad Shopify refreshes can otherwise promote rows based on inferred defaults rather than evidence.
|
||||
|
||||
## Root Cause
|
||||
|
||||
- Unknown fiber type defaulted to `SMF`
|
||||
- Variable cable/range text could be interpreted as a fixed reach
|
||||
- Automatic detail verification should require positive source or protocol evidence
|
||||
|
||||
## Code Hardened
|
||||
|
||||
- `packages/scraper/src/scrapers/atgbics.ts`
|
||||
- refuses variable reach ranges such as `1 - 30 m`
|
||||
- only returns `SMF` from explicit SMF/single-mode or protocol evidence such as LR/ER/ZR/BiDi/CWDM/DWDM/DR/FR/PSM
|
||||
- returns empty fiber type when evidence is missing instead of assuming SMF
|
||||
|
||||
## Verification
|
||||
|
||||
- `npm run build -w packages/scraper` passed locally
|
||||
|
||||
## Deployment
|
||||
|
||||
- Source file synced to `/opt/tip`
|
||||
- `pnpm -C packages/scraper build` passed on Erik after SSH recovered
|
||||
|
||||
## Truth Policy
|
||||
|
||||
Future ATGBICS runs should not promote rows to detail-verified from default fiber assumptions. Remaining ATGBICS rows need targeted optical/coherent parsing or source-specific evidence.
|
||||
Loading…
x
Reference in New Issue
Block a user