fix: verify qsfptek cable details

This commit is contained in:
Rene Fichtmueller 2026-05-09 17:03:35 +02:00
parent fc18b00157
commit bb9c495497
3 changed files with 91 additions and 4 deletions

View File

@ -78,6 +78,17 @@ function sleep(ms: number): Promise<void> {
}
function detectReach(text: string): { label: string; meters: number } | undefined {
const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i);
if (generic) {
const value = parseFloat(generic[1]);
const unit = generic[2].toLowerCase();
if (Number.isFinite(value) && value > 0) {
const meters = unit === "km" ? Math.round(value * 1000) : Math.max(1, Math.round(value));
const labelValue = String(value).replace(/\.0$/, "");
return { label: `${labelValue}${unit}`, meters };
}
}
const patterns: [RegExp, string, number][] = [
[/\b120\s*km\b/i, "120km", 120000],
[/\b80\s*km\b/i, "80km", 80000],
@ -100,15 +111,28 @@ function detectReach(text: string): { label: string; meters: number } | undefine
}
function detectFiber(text: string): string {
if (/copper|dac|twinax|rj.?45|base-t|\bmcp/i.test(text)) return "Copper";
if (/aoc|active.?optical|active.?fiber|\bmfs/i.test(text)) return "MMF";
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
return "";
}
function detectWavelength(text: string): string {
if (/copper|dac|twinax|base-t|rj.?45|\bmcp/i.test(text)) return "N/A";
const m = text.match(/(\d{3,4})\s*nm/i);
return m ? m[1] : "";
if (m) return m[1] === "1311" ? "1310" : m[1];
if (/\bCWDM4\b/i.test(text)) return "1271,1291,1311,1331";
if (/\b(?:SR|SR4|SR8|SRBD|VR|VR4|ESR4|CSR4)\b/i.test(text)) return "850";
if (/\b(?:DR|DR4|DR8|FR|FR4|FR8|LR|LR4|ER|ER4|PSM4|2DR4|2FR4)\b/i.test(text)) return "1310";
return "";
}
function detectCategory(text: string): string {
if (/breakout/i.test(text)) return "Cable Breakout";
if (/aoc|active.?optical|active.?fiber|\bmfs/i.test(text)) return "AOC Cable";
if (/copper|dac|twinax|base-t|rj.?45|\bmcp/i.test(text)) return "Cable";
return "DataCenter";
}
/**
@ -268,6 +292,7 @@ export async function scrapeQsfptek(): Promise<void> {
const txId = await findOrCreateScrapedTransceiver({
partNumber: product.partNumber,
vendorId,
productUrl: product.url,
formFactor: product.formFactor,
speedGbps: product.speedGbps,
speed: product.speed,
@ -275,7 +300,7 @@ export async function scrapeQsfptek(): Promise<void> {
reachLabel: product.reachLabel,
fiberType: product.fiberType,
wavelengths: product.wavelength,
category: "DataCenter",
category: detectCategory(product.name),
});
// Price observation from listing page

View File

@ -1,9 +1,33 @@
# Current TIP Sync State
Updated: 2026-05-09 14:54 UTC
Updated: 2026-05-09 15:02 UTC
## Newest Work
- QSFPTEK cable/AOC parser hardening and DB detail backfill on 2026-05-09:
- root cause:
- QSFPTEK scraper parsed catalog rows but did not pass `productUrl` into `findOrCreateScrapedTransceiver`
- generic leading cable lengths like `1m`, `2m`, `10m`, `15m`, `30m` were not parsed
- MFS/MCP AOC/DAC product families were not classified as cable/AOC products
- code hardened:
- `packages/scraper/src/scrapers/qsfptek.ts`
- parses generic `m/km` reach, including leading lengths
- classifies `MFS`/AOC/active fiber as `AOC Cable`
- classifies `MCP`/DAC/Copper/Twinax as `Cable`
- writes `productUrl` into the DB upsert
- sets Copper/DAC wavelength to `N/A`
- adds safe optical family wavelength parsing for future catalog runs
- DB correction:
- found `36` QSFPTEK rows missing details
- `28` had deterministic leading length and source URL
- updated those `28` with reach, cable/AOC classification and source-backed details
- `8` additional rows became fully verified after promotion
- deployment:
- synced patched QSFPTEK scraper to active `/opt/tip`
- `pnpm -C packages/scraper build` passed
- truth:
- QSFPTEK is now much closer, but remaining rows include long-reach 1G optics missing fiber/detail fields and should be handled separately by source parsing, not guessed
- Copper/DAC reach/detail verification and comparable API semantics on 2026-05-09:
- purpose:
- continue toward full TIP verification without inventing optical data

View File

@ -0,0 +1,38 @@
# QSFPTEK Cable AOC Parser And Backfill
Date: 2026-05-09
Actor: Codex
Scope: QSFPTEK remaining detail gaps
Mode: DB-only correction plus scraper parser hardening
## Root Cause
QSFPTEK still had detail gaps because:
- the scraper did not pass `productUrl` to `findOrCreateScrapedTransceiver`
- generic leading lengths like `1m`, `2m`, `10m`, `15m`, `30m` were not parsed
- MFS/MCP cable families were not classified as AOC/DAC cable products
## Code Change
Patched `packages/scraper/src/scrapers/qsfptek.ts`:
- parse generic meter/kilometer reach
- classify `MFS`/AOC/active fiber as `AOC Cable`
- classify `MCP`/DAC/Copper/Twinax as `Cable`
- write `productUrl` in the DB upsert
- set Copper/DAC wavelength to `N/A`
- add safe optical family wavelength parsing for future catalog runs
Synced to active `/opt/tip`; `pnpm -C packages/scraper build` passed.
## DB Backfill
- QSFPTEK rows missing details: `36`
- deterministic cable/AOC rows with source URL and leading length: `28`
- rows updated: `28`
- additional rows promoted to `fully_verified`: `8`
## Training Note
TIPLLM should treat QSFPTEK `MFS...` rows as AOC/active fiber cable candidates and `MCP...` rows as DAC/Copper cable candidates, but only mark details verified when source URL and deterministic length are present.