diff --git a/packages/scraper/src/scrapers/qsfptek.ts b/packages/scraper/src/scrapers/qsfptek.ts index 68e9fe6..89fb7f1 100644 --- a/packages/scraper/src/scrapers/qsfptek.ts +++ b/packages/scraper/src/scrapers/qsfptek.ts @@ -78,6 +78,17 @@ function sleep(ms: number): Promise { } function detectReach(text: string): { label: string; meters: number } | undefined { + const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i); + if (generic) { + const value = parseFloat(generic[1]); + const unit = generic[2].toLowerCase(); + if (Number.isFinite(value) && value > 0) { + const meters = unit === "km" ? Math.round(value * 1000) : Math.max(1, Math.round(value)); + const labelValue = String(value).replace(/\.0$/, ""); + return { label: `${labelValue}${unit}`, meters }; + } + } + const patterns: [RegExp, string, number][] = [ [/\b120\s*km\b/i, "120km", 120000], [/\b80\s*km\b/i, "80km", 80000], @@ -100,15 +111,28 @@ function detectReach(text: string): { label: string; meters: number } | undefine } function detectFiber(text: string): string { + if (/copper|dac|twinax|rj.?45|base-t|\bmcp/i.test(text)) return "Copper"; + if (/aoc|active.?optical|active.?fiber|\bmfs/i.test(text)) return "MMF"; if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF"; if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF"; - if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper"; return ""; } function detectWavelength(text: string): string { + if (/copper|dac|twinax|base-t|rj.?45|\bmcp/i.test(text)) return "N/A"; const m = text.match(/(\d{3,4})\s*nm/i); - return m ? m[1] : ""; + if (m) return m[1] === "1311" ? "1310" : m[1]; + if (/\bCWDM4\b/i.test(text)) return "1271,1291,1311,1331"; + if (/\b(?:SR|SR4|SR8|SRBD|VR|VR4|ESR4|CSR4)\b/i.test(text)) return "850"; + if (/\b(?:DR|DR4|DR8|FR|FR4|FR8|LR|LR4|ER|ER4|PSM4|2DR4|2FR4)\b/i.test(text)) return "1310"; + return ""; +} + +function detectCategory(text: string): string { + if (/breakout/i.test(text)) return "Cable Breakout"; + if (/aoc|active.?optical|active.?fiber|\bmfs/i.test(text)) return "AOC Cable"; + if (/copper|dac|twinax|base-t|rj.?45|\bmcp/i.test(text)) return "Cable"; + return "DataCenter"; } /** @@ -268,6 +292,7 @@ export async function scrapeQsfptek(): Promise { const txId = await findOrCreateScrapedTransceiver({ partNumber: product.partNumber, vendorId, + productUrl: product.url, formFactor: product.formFactor, speedGbps: product.speedGbps, speed: product.speed, @@ -275,7 +300,7 @@ export async function scrapeQsfptek(): Promise { reachLabel: product.reachLabel, fiberType: product.fiberType, wavelengths: product.wavelength, - category: "DataCenter", + category: detectCategory(product.name), }); // Price observation from listing page diff --git a/sync/CURRENT.md b/sync/CURRENT.md index 4740fdf..6ba19d2 100644 --- a/sync/CURRENT.md +++ b/sync/CURRENT.md @@ -1,9 +1,33 @@ # Current TIP Sync State -Updated: 2026-05-09 14:54 UTC +Updated: 2026-05-09 15:02 UTC ## Newest Work +- QSFPTEK cable/AOC parser hardening and DB detail backfill on 2026-05-09: + - root cause: + - QSFPTEK scraper parsed catalog rows but did not pass `productUrl` into `findOrCreateScrapedTransceiver` + - generic leading cable lengths like `1m`, `2m`, `10m`, `15m`, `30m` were not parsed + - MFS/MCP AOC/DAC product families were not classified as cable/AOC products + - code hardened: + - `packages/scraper/src/scrapers/qsfptek.ts` + - parses generic `m/km` reach, including leading lengths + - classifies `MFS`/AOC/active fiber as `AOC Cable` + - classifies `MCP`/DAC/Copper/Twinax as `Cable` + - writes `productUrl` into the DB upsert + - sets Copper/DAC wavelength to `N/A` + - adds safe optical family wavelength parsing for future catalog runs + - DB correction: + - found `36` QSFPTEK rows missing details + - `28` had deterministic leading length and source URL + - updated those `28` with reach, cable/AOC classification and source-backed details + - `8` additional rows became fully verified after promotion + - deployment: + - synced patched QSFPTEK scraper to active `/opt/tip` + - `pnpm -C packages/scraper build` passed + - truth: + - QSFPTEK is now much closer, but remaining rows include long-reach 1G optics missing fiber/detail fields and should be handled separately by source parsing, not guessed + - Copper/DAC reach/detail verification and comparable API semantics on 2026-05-09: - purpose: - continue toward full TIP verification without inventing optical data diff --git a/sync/history/2026-05-09-qsfptek-cable-aoc-parser-and-backfill.md b/sync/history/2026-05-09-qsfptek-cable-aoc-parser-and-backfill.md new file mode 100644 index 0000000..6d5d873 --- /dev/null +++ b/sync/history/2026-05-09-qsfptek-cable-aoc-parser-and-backfill.md @@ -0,0 +1,38 @@ +# QSFPTEK Cable AOC Parser And Backfill + +Date: 2026-05-09 +Actor: Codex +Scope: QSFPTEK remaining detail gaps +Mode: DB-only correction plus scraper parser hardening + +## Root Cause + +QSFPTEK still had detail gaps because: + +- the scraper did not pass `productUrl` to `findOrCreateScrapedTransceiver` +- generic leading lengths like `1m`, `2m`, `10m`, `15m`, `30m` were not parsed +- MFS/MCP cable families were not classified as AOC/DAC cable products + +## Code Change + +Patched `packages/scraper/src/scrapers/qsfptek.ts`: + +- parse generic meter/kilometer reach +- classify `MFS`/AOC/active fiber as `AOC Cable` +- classify `MCP`/DAC/Copper/Twinax as `Cable` +- write `productUrl` in the DB upsert +- set Copper/DAC wavelength to `N/A` +- add safe optical family wavelength parsing for future catalog runs + +Synced to active `/opt/tip`; `pnpm -C packages/scraper build` passed. + +## DB Backfill + +- QSFPTEK rows missing details: `36` +- deterministic cable/AOC rows with source URL and leading length: `28` +- rows updated: `28` +- additional rows promoted to `fully_verified`: `8` + +## Training Note + +TIPLLM should treat QSFPTEK `MFS...` rows as AOC/active fiber cable candidates and `MCP...` rows as DAC/Copper cable candidates, but only mark details verified when source URL and deterministic length are present.