fix: verify qsfptek cable details
This commit is contained in:
parent
fc18b00157
commit
bb9c495497
@ -78,6 +78,17 @@ function sleep(ms: number): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||||
|
const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i);
|
||||||
|
if (generic) {
|
||||||
|
const value = parseFloat(generic[1]);
|
||||||
|
const unit = generic[2].toLowerCase();
|
||||||
|
if (Number.isFinite(value) && value > 0) {
|
||||||
|
const meters = unit === "km" ? Math.round(value * 1000) : Math.max(1, Math.round(value));
|
||||||
|
const labelValue = String(value).replace(/\.0$/, "");
|
||||||
|
return { label: `${labelValue}${unit}`, meters };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const patterns: [RegExp, string, number][] = [
|
const patterns: [RegExp, string, number][] = [
|
||||||
[/\b120\s*km\b/i, "120km", 120000],
|
[/\b120\s*km\b/i, "120km", 120000],
|
||||||
[/\b80\s*km\b/i, "80km", 80000],
|
[/\b80\s*km\b/i, "80km", 80000],
|
||||||
@ -100,15 +111,28 @@ function detectReach(text: string): { label: string; meters: number } | undefine
|
|||||||
}
|
}
|
||||||
|
|
||||||
function detectFiber(text: string): string {
|
function detectFiber(text: string): string {
|
||||||
|
if (/copper|dac|twinax|rj.?45|base-t|\bmcp/i.test(text)) return "Copper";
|
||||||
|
if (/aoc|active.?optical|active.?fiber|\bmfs/i.test(text)) return "MMF";
|
||||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||||
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectWavelength(text: string): string {
|
function detectWavelength(text: string): string {
|
||||||
|
if (/copper|dac|twinax|base-t|rj.?45|\bmcp/i.test(text)) return "N/A";
|
||||||
const m = text.match(/(\d{3,4})\s*nm/i);
|
const m = text.match(/(\d{3,4})\s*nm/i);
|
||||||
return m ? m[1] : "";
|
if (m) return m[1] === "1311" ? "1310" : m[1];
|
||||||
|
if (/\bCWDM4\b/i.test(text)) return "1271,1291,1311,1331";
|
||||||
|
if (/\b(?:SR|SR4|SR8|SRBD|VR|VR4|ESR4|CSR4)\b/i.test(text)) return "850";
|
||||||
|
if (/\b(?:DR|DR4|DR8|FR|FR4|FR8|LR|LR4|ER|ER4|PSM4|2DR4|2FR4)\b/i.test(text)) return "1310";
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectCategory(text: string): string {
|
||||||
|
if (/breakout/i.test(text)) return "Cable Breakout";
|
||||||
|
if (/aoc|active.?optical|active.?fiber|\bmfs/i.test(text)) return "AOC Cable";
|
||||||
|
if (/copper|dac|twinax|base-t|rj.?45|\bmcp/i.test(text)) return "Cable";
|
||||||
|
return "DataCenter";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -268,6 +292,7 @@ export async function scrapeQsfptek(): Promise<void> {
|
|||||||
const txId = await findOrCreateScrapedTransceiver({
|
const txId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber: product.partNumber,
|
partNumber: product.partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
|
productUrl: product.url,
|
||||||
formFactor: product.formFactor,
|
formFactor: product.formFactor,
|
||||||
speedGbps: product.speedGbps,
|
speedGbps: product.speedGbps,
|
||||||
speed: product.speed,
|
speed: product.speed,
|
||||||
@ -275,7 +300,7 @@ export async function scrapeQsfptek(): Promise<void> {
|
|||||||
reachLabel: product.reachLabel,
|
reachLabel: product.reachLabel,
|
||||||
fiberType: product.fiberType,
|
fiberType: product.fiberType,
|
||||||
wavelengths: product.wavelength,
|
wavelengths: product.wavelength,
|
||||||
category: "DataCenter",
|
category: detectCategory(product.name),
|
||||||
});
|
});
|
||||||
|
|
||||||
// Price observation from listing page
|
// Price observation from listing page
|
||||||
|
|||||||
@ -1,9 +1,33 @@
|
|||||||
# Current TIP Sync State
|
# Current TIP Sync State
|
||||||
|
|
||||||
Updated: 2026-05-09 14:54 UTC
|
Updated: 2026-05-09 15:02 UTC
|
||||||
|
|
||||||
## Newest Work
|
## Newest Work
|
||||||
|
|
||||||
|
- QSFPTEK cable/AOC parser hardening and DB detail backfill on 2026-05-09:
|
||||||
|
- root cause:
|
||||||
|
- QSFPTEK scraper parsed catalog rows but did not pass `productUrl` into `findOrCreateScrapedTransceiver`
|
||||||
|
- generic leading cable lengths like `1m`, `2m`, `10m`, `15m`, `30m` were not parsed
|
||||||
|
- MFS/MCP AOC/DAC product families were not classified as cable/AOC products
|
||||||
|
- code hardened:
|
||||||
|
- `packages/scraper/src/scrapers/qsfptek.ts`
|
||||||
|
- parses generic `m/km` reach, including leading lengths
|
||||||
|
- classifies `MFS`/AOC/active fiber as `AOC Cable`
|
||||||
|
- classifies `MCP`/DAC/Copper/Twinax as `Cable`
|
||||||
|
- writes `productUrl` into the DB upsert
|
||||||
|
- sets Copper/DAC wavelength to `N/A`
|
||||||
|
- adds safe optical family wavelength parsing for future catalog runs
|
||||||
|
- DB correction:
|
||||||
|
- found `36` QSFPTEK rows missing details
|
||||||
|
- `28` had deterministic leading length and source URL
|
||||||
|
- updated those `28` with reach, cable/AOC classification and source-backed details
|
||||||
|
- `8` additional rows became fully verified after promotion
|
||||||
|
- deployment:
|
||||||
|
- synced patched QSFPTEK scraper to active `/opt/tip`
|
||||||
|
- `pnpm -C packages/scraper build` passed
|
||||||
|
- truth:
|
||||||
|
- QSFPTEK is now much closer, but remaining rows include long-reach 1G optics missing fiber/detail fields and should be handled separately by source parsing, not guessed
|
||||||
|
|
||||||
- Copper/DAC reach/detail verification and comparable API semantics on 2026-05-09:
|
- Copper/DAC reach/detail verification and comparable API semantics on 2026-05-09:
|
||||||
- purpose:
|
- purpose:
|
||||||
- continue toward full TIP verification without inventing optical data
|
- continue toward full TIP verification without inventing optical data
|
||||||
|
|||||||
@ -0,0 +1,38 @@
|
|||||||
|
# QSFPTEK Cable AOC Parser And Backfill
|
||||||
|
|
||||||
|
Date: 2026-05-09
|
||||||
|
Actor: Codex
|
||||||
|
Scope: QSFPTEK remaining detail gaps
|
||||||
|
Mode: DB-only correction plus scraper parser hardening
|
||||||
|
|
||||||
|
## Root Cause
|
||||||
|
|
||||||
|
QSFPTEK still had detail gaps because:
|
||||||
|
|
||||||
|
- the scraper did not pass `productUrl` to `findOrCreateScrapedTransceiver`
|
||||||
|
- generic leading lengths like `1m`, `2m`, `10m`, `15m`, `30m` were not parsed
|
||||||
|
- MFS/MCP cable families were not classified as AOC/DAC cable products
|
||||||
|
|
||||||
|
## Code Change
|
||||||
|
|
||||||
|
Patched `packages/scraper/src/scrapers/qsfptek.ts`:
|
||||||
|
|
||||||
|
- parse generic meter/kilometer reach
|
||||||
|
- classify `MFS`/AOC/active fiber as `AOC Cable`
|
||||||
|
- classify `MCP`/DAC/Copper/Twinax as `Cable`
|
||||||
|
- write `productUrl` in the DB upsert
|
||||||
|
- set Copper/DAC wavelength to `N/A`
|
||||||
|
- add safe optical family wavelength parsing for future catalog runs
|
||||||
|
|
||||||
|
Synced to active `/opt/tip`; `pnpm -C packages/scraper build` passed.
|
||||||
|
|
||||||
|
## DB Backfill
|
||||||
|
|
||||||
|
- QSFPTEK rows missing details: `36`
|
||||||
|
- deterministic cable/AOC rows with source URL and leading length: `28`
|
||||||
|
- rows updated: `28`
|
||||||
|
- additional rows promoted to `fully_verified`: `8`
|
||||||
|
|
||||||
|
## Training Note
|
||||||
|
|
||||||
|
TIPLLM should treat QSFPTEK `MFS...` rows as AOC/active fiber cable candidates and `MCP...` rows as DAC/Copper cable candidates, but only mark details verified when source URL and deterministic length are present.
|
||||||
Loading…
x
Reference in New Issue
Block a user