fix: harden shopfiber24 reach parsing
This commit is contained in:
parent
6d8d7874d3
commit
c2421c03a3
@ -49,7 +49,8 @@ function sleep(ms: number): Promise<void> {
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("400g") || lower.includes("qsfp-dd800") || lower.includes("800g")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("800g") || lower.includes("qsfp-dd800")) return { formFactor: lower.includes("osfp") ? "OSFP" : "QSFP-DD", speed: "800G", speedGbps: 800 };
|
||||
if (lower.includes("400g")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd") || lower.includes("qsfpdd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28") || lower.includes("100g")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
@ -63,6 +64,23 @@ function detectFormFactor(text: string): { formFactor: string; speed: string; sp
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
// Variable-length products such as "1 - 30 m" must not be marked as one
|
||||
// deterministic cable reach. They need variant-level extraction first.
|
||||
if (/\b\d+(?:[.,]\d+)?\s*(?:-|–|to|bis)\s*\d+(?:[.,]\d+)?\s*(?:m|km)\b/i.test(text)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const explicit = text.match(/\b(\d+(?:[.,]\d+)?)\s*(km|m)\b/i);
|
||||
if (explicit) {
|
||||
const value = parseFloat(explicit[1].replace(",", "."));
|
||||
const unit = explicit[2].toLowerCase();
|
||||
if (Number.isFinite(value) && value > 0) {
|
||||
const meters = unit === "km" ? Math.round(value * 1000) : Math.round(value);
|
||||
const label = unit === "km" ? `${explicit[1].replace(",", ".")}km` : `${explicit[1].replace(",", ".")}m`;
|
||||
return { label, meters };
|
||||
}
|
||||
}
|
||||
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000], [/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000], [/\b15\s*km\b/i, "15km", 15000],
|
||||
|
||||
@ -1,9 +1,27 @@
|
||||
# Current TIP Sync State
|
||||
|
||||
Updated: 2026-05-09 15:15 UTC
|
||||
Updated: 2026-05-09 15:23 UTC
|
||||
|
||||
## Newest Work
|
||||
|
||||
- ShopFiber24 parser hardening for deterministic cable/detail verification on 2026-05-09:
|
||||
- root cause:
|
||||
- ShopFiber24 contains variable-length AOC/DAC products such as `1 - 30 m`
|
||||
- those must not be interpreted as one fixed `30m` reach and marked detail-verified
|
||||
- the scraper also treated `800G` / `QSFP-DD800` product text as `400G`
|
||||
- code hardened:
|
||||
- `packages/scraper/src/scrapers/fiber24.ts`
|
||||
- detects `800G` as `800G` / `800Gbps`
|
||||
- parses explicit single `m/km` reach values generically
|
||||
- refuses variable ranges like `1 - 30 m`, `1 to 30 m`, `1 bis 30 m`
|
||||
- verification:
|
||||
- `npm run build -w packages/scraper` passed locally
|
||||
- deployment:
|
||||
- not deployed yet because Erik SSH currently refuses connections
|
||||
- truth:
|
||||
- future ShopFiber24 passes should only mark product details verified when reach is deterministic
|
||||
- variable cable-family rows need variant-level extraction instead of broad approval
|
||||
|
||||
- FiberMall source-title optical detail backfill on 2026-05-09:
|
||||
- precheck:
|
||||
- `69` FiberMall rows had price + image + source URL but lacked detail verification
|
||||
|
||||
30
sync/history/2026-05-09-shopfiber24-parser-hardening.md
Normal file
30
sync/history/2026-05-09-shopfiber24-parser-hardening.md
Normal file
@ -0,0 +1,30 @@
|
||||
# ShopFiber24 Parser Hardening - 2026-05-09
|
||||
|
||||
## Context
|
||||
|
||||
ShopFiber24 has many near-complete rows where prices, images and source URLs exist. Several remaining products are variable-length cable families, so they must not be treated as deterministic variants.
|
||||
|
||||
## Root Cause
|
||||
|
||||
- Variable cable ranges such as `1 - 30 m` can be misread by simple reach patterns as a fixed `30m` reach
|
||||
- `800G` / `QSFP-DD800` product text was classified as `400G`
|
||||
|
||||
## Code Hardened
|
||||
|
||||
- `packages/scraper/src/scrapers/fiber24.ts`
|
||||
- detects `800G` as `800G` / `800Gbps`
|
||||
- parses explicit single `m/km` reach values generically
|
||||
- refuses variable ranges like `1 - 30 m`, `1 to 30 m`, `1 bis 30 m`
|
||||
|
||||
## Verification
|
||||
|
||||
- `npm run build -w packages/scraper` passed locally
|
||||
|
||||
## Deployment
|
||||
|
||||
Not deployed yet because Erik SSH was refusing connections. This should be synced to `/opt/tip` and rebuilt once Erik is reachable.
|
||||
|
||||
## Truth Policy
|
||||
|
||||
Future ShopFiber24 passes should only mark product details verified when reach is deterministic. Variable cable-family rows need variant-level extraction and must not be blindly approved.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user