fix: harden shopfiber24 reach parsing
This commit is contained in:
parent
6d8d7874d3
commit
c2421c03a3
@ -49,7 +49,8 @@ function sleep(ms: number): Promise<void> {
|
|||||||
|
|
||||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||||
const lower = text.toLowerCase();
|
const lower = text.toLowerCase();
|
||||||
if (lower.includes("400g") || lower.includes("qsfp-dd800") || lower.includes("800g")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
if (lower.includes("800g") || lower.includes("qsfp-dd800")) return { formFactor: lower.includes("osfp") ? "OSFP" : "QSFP-DD", speed: "800G", speedGbps: 800 };
|
||||||
|
if (lower.includes("400g")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||||
if (lower.includes("qsfp-dd") || lower.includes("qsfpdd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
if (lower.includes("qsfp-dd") || lower.includes("qsfpdd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||||
if (lower.includes("qsfp28") || lower.includes("100g")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
if (lower.includes("qsfp28") || lower.includes("100g")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||||
@ -63,6 +64,23 @@ function detectFormFactor(text: string): { formFactor: string; speed: string; sp
|
|||||||
}
|
}
|
||||||
|
|
||||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||||
|
// Variable-length products such as "1 - 30 m" must not be marked as one
|
||||||
|
// deterministic cable reach. They need variant-level extraction first.
|
||||||
|
if (/\b\d+(?:[.,]\d+)?\s*(?:-|–|to|bis)\s*\d+(?:[.,]\d+)?\s*(?:m|km)\b/i.test(text)) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const explicit = text.match(/\b(\d+(?:[.,]\d+)?)\s*(km|m)\b/i);
|
||||||
|
if (explicit) {
|
||||||
|
const value = parseFloat(explicit[1].replace(",", "."));
|
||||||
|
const unit = explicit[2].toLowerCase();
|
||||||
|
if (Number.isFinite(value) && value > 0) {
|
||||||
|
const meters = unit === "km" ? Math.round(value * 1000) : Math.round(value);
|
||||||
|
const label = unit === "km" ? `${explicit[1].replace(",", ".")}km` : `${explicit[1].replace(",", ".")}m`;
|
||||||
|
return { label, meters };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const patterns: [RegExp, string, number][] = [
|
const patterns: [RegExp, string, number][] = [
|
||||||
[/\b80\s*km\b/i, "80km", 80000], [/\b40\s*km\b/i, "40km", 40000],
|
[/\b80\s*km\b/i, "80km", 80000], [/\b40\s*km\b/i, "40km", 40000],
|
||||||
[/\b20\s*km\b/i, "20km", 20000], [/\b15\s*km\b/i, "15km", 15000],
|
[/\b20\s*km\b/i, "20km", 20000], [/\b15\s*km\b/i, "15km", 15000],
|
||||||
|
|||||||
@ -1,9 +1,27 @@
|
|||||||
# Current TIP Sync State
|
# Current TIP Sync State
|
||||||
|
|
||||||
Updated: 2026-05-09 15:15 UTC
|
Updated: 2026-05-09 15:23 UTC
|
||||||
|
|
||||||
## Newest Work
|
## Newest Work
|
||||||
|
|
||||||
|
- ShopFiber24 parser hardening for deterministic cable/detail verification on 2026-05-09:
|
||||||
|
- root cause:
|
||||||
|
- ShopFiber24 contains variable-length AOC/DAC products such as `1 - 30 m`
|
||||||
|
- those must not be interpreted as one fixed `30m` reach and marked detail-verified
|
||||||
|
- the scraper also treated `800G` / `QSFP-DD800` product text as `400G`
|
||||||
|
- code hardened:
|
||||||
|
- `packages/scraper/src/scrapers/fiber24.ts`
|
||||||
|
- detects `800G` as `800G` / `800Gbps`
|
||||||
|
- parses explicit single `m/km` reach values generically
|
||||||
|
- refuses variable ranges like `1 - 30 m`, `1 to 30 m`, `1 bis 30 m`
|
||||||
|
- verification:
|
||||||
|
- `npm run build -w packages/scraper` passed locally
|
||||||
|
- deployment:
|
||||||
|
- not deployed yet because Erik SSH currently refuses connections
|
||||||
|
- truth:
|
||||||
|
- future ShopFiber24 passes should only mark product details verified when reach is deterministic
|
||||||
|
- variable cable-family rows need variant-level extraction instead of broad approval
|
||||||
|
|
||||||
- FiberMall source-title optical detail backfill on 2026-05-09:
|
- FiberMall source-title optical detail backfill on 2026-05-09:
|
||||||
- precheck:
|
- precheck:
|
||||||
- `69` FiberMall rows had price + image + source URL but lacked detail verification
|
- `69` FiberMall rows had price + image + source URL but lacked detail verification
|
||||||
|
|||||||
30
sync/history/2026-05-09-shopfiber24-parser-hardening.md
Normal file
30
sync/history/2026-05-09-shopfiber24-parser-hardening.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# ShopFiber24 Parser Hardening - 2026-05-09
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
ShopFiber24 has many near-complete rows where prices, images and source URLs exist. Several remaining products are variable-length cable families, so they must not be treated as deterministic variants.
|
||||||
|
|
||||||
|
## Root Cause
|
||||||
|
|
||||||
|
- Variable cable ranges such as `1 - 30 m` can be misread by simple reach patterns as a fixed `30m` reach
|
||||||
|
- `800G` / `QSFP-DD800` product text was classified as `400G`
|
||||||
|
|
||||||
|
## Code Hardened
|
||||||
|
|
||||||
|
- `packages/scraper/src/scrapers/fiber24.ts`
|
||||||
|
- detects `800G` as `800G` / `800Gbps`
|
||||||
|
- parses explicit single `m/km` reach values generically
|
||||||
|
- refuses variable ranges like `1 - 30 m`, `1 to 30 m`, `1 bis 30 m`
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
- `npm run build -w packages/scraper` passed locally
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
Not deployed yet because Erik SSH was refusing connections. This should be synced to `/opt/tip` and rebuilt once Erik is reachable.
|
||||||
|
|
||||||
|
## Truth Policy
|
||||||
|
|
||||||
|
Future ShopFiber24 passes should only mark product details verified when reach is deterministic. Variable cable-family rows need variant-level extraction and must not be blindly approved.
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user