diff --git a/packages/api/src/routes/transceivers.ts b/packages/api/src/routes/transceivers.ts index 8cff948..1d12179 100644 --- a/packages/api/src/routes/transceivers.ts +++ b/packages/api/src/routes/transceivers.ts @@ -98,9 +98,19 @@ transceiverRouter.get("/:id", async (req: Request, res: Response) => { AND NULLIF(t1.fiber_type, '') IS NOT NULL AND NULLIF(t2.fiber_type, '') IS NOT NULL AND LOWER(t2.fiber_type) = LOWER(t1.fiber_type) - AND substring(t1.wavelengths from '(\\d{3,4})') IS NOT NULL - AND substring(t2.wavelengths from '(\\d{3,4})') IS NOT NULL - AND ABS((substring(t1.wavelengths from '(\\d{3,4})'))::int - (substring(t2.wavelengths from '(\\d{3,4})'))::int) <= 15 + AND ( + ( + LOWER(t1.fiber_type) IN ('copper', 'dac', 'cu') + AND LOWER(t2.fiber_type) IN ('copper', 'dac', 'cu') + AND COALESCE(NULLIF(UPPER(t1.wavelengths), ''), 'N/A') IN ('N/A', 'NA', 'NONE') + AND COALESCE(NULLIF(UPPER(t2.wavelengths), ''), 'N/A') IN ('N/A', 'NA', 'NONE') + ) + OR ( + substring(t1.wavelengths from '(\\d{3,4})') IS NOT NULL + AND substring(t2.wavelengths from '(\\d{3,4})') IS NOT NULL + AND ABS((substring(t1.wavelengths from '(\\d{3,4})'))::int - (substring(t2.wavelengths from '(\\d{3,4})'))::int) <= 15 + ) + ) AND ( ( (t1.part_number || ' ' || COALESCE(t1.standard_name, '') || ' ' || COALESCE(t1.category, '')) !~* '(breakout|aoc|dac|twinax|active optical cable)' diff --git a/packages/scraper/src/scrapers/atgbics.ts b/packages/scraper/src/scrapers/atgbics.ts index 1f6baea..c1bc542 100644 --- a/packages/scraper/src/scrapers/atgbics.ts +++ b/packages/scraper/src/scrapers/atgbics.ts @@ -78,6 +78,7 @@ function sleep(ms: number): Promise { function detectFormFactor(text: string, fallback: string): string { const lower = text.toLowerCase(); + if (lower.includes("osfp")) return "OSFP"; if (lower.includes("qsfp-dd") || lower.includes("qsfp dd")) return "QSFP-DD"; if (lower.includes("qsfp56")) return "QSFP56"; if (lower.includes("qsfp28")) return "QSFP28"; @@ -105,6 +106,17 @@ function detectSpeed(text: string, fallbackGbps: number): { speed: string; speed } function detectReach(text: string): { label: string; meters: number } | undefined { + const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i); + if (generic) { + const value = parseFloat(generic[1]); + const unit = generic[2].toLowerCase(); + if (Number.isFinite(value) && value > 0) { + const meters = unit === "km" ? Math.round(value * 1000) : Math.max(1, Math.round(value)); + const labelValue = String(value).replace(/\.0$/, ""); + return { label: `${labelValue}${unit}`, meters }; + } + } + const patterns: [RegExp, string, number][] = [ [/\b120\s*km\b/i, "120km", 120000], [/\b80\s*km\b/i, "80km", 80000], [/\b40\s*km\b/i, "40km", 40000], [/\b20\s*km\b/i, "20km", 20000], diff --git a/sync/CURRENT.md b/sync/CURRENT.md index 46ff08b..4740fdf 100644 --- a/sync/CURRENT.md +++ b/sync/CURRENT.md @@ -1,9 +1,54 @@ # Current TIP Sync State -Updated: 2026-05-09 14:39 UTC +Updated: 2026-05-09 14:54 UTC ## Newest Work +- Copper/DAC reach/detail verification and comparable API semantics on 2026-05-09: + - purpose: + - continue toward full TIP verification without inventing optical data + - treat Copper/DAC/Twinax as cable products with `wavelengths=N/A`, not missing optical products + - DB correction: + - found `467` Copper rows still missing reach label/meters + - `342` had deterministic length evidence in part number or product URL + - wrote `reach_label`, `reach_meters`, `wavelengths=N/A`, cable category and detail verification for those `342` + - corrected `78` ATGBICS OSFP cable rows that had been parsed as `SFP` + - code hardened: + - `packages/scraper/src/scrapers/atgbics.ts` + - detects `OSFP` before `SFP` + - parses generic decimal meter/kilometer reach such as `0.5m`, `1.5m`, `2.5m`, `30m`, `2km` + - keeps Copper/DAC/Twinax/Base-T/RJ45 wavelength as `N/A` + - `packages/api/src/routes/transceivers.ts` + - comparable products now allow Copper/DAC/CU products to match each other with `wavelengths=N/A` + - optical products still require numeric wavelength evidence and close wavelength match + - deployment: + - synced ATGBICS scraper to active `/opt/tip` + - `pnpm -C packages/scraper build` passed + - synced API route to active `/opt/tip` + - `pnpm -C packages/api build` passed + - restarted `tip-api` + - result: + - global `details_verified` increased from `11085` to `11425` + - global `fully_verified` increased from `9861` to `10170` + - Copper remaining gaps after correction: + - missing reach label: `122` + - missing reach meters: `125` + - missing details: `158` + - selected vendor detail/fully state: + - ATGBICS: details `7656/8269`, fully `7646/8269` + - NADDOD: details `726/748`, fully `726/748` + - QSFPTEK: details `165/201`, fully `140/201` + - FS.COM: details `373/383`, fully `300/383` + - Flexoptix: details `626/744`, fully `622/744` + - GAO Tek: details `127/414`, fully `2/414` + - health: + - public TIP health after restart: `healthy` + - load status `ok` + - memory used `13%` + - truth: + - this is real progress toward trustworthy complete data, not cosmetic flag setting + - remaining gaps are now smaller targeted vendor/parser/source tasks; NADDOD and QSFPTEK are next high-yield targets + - ATGBICS safe JSON rerun + Copper wavelength semantics on 2026-05-09: - code hardened: - `packages/scraper/src/scrapers/atgbics.ts` diff --git a/sync/history/2026-05-09-copper-dac-reach-details-and-api-semantics.md b/sync/history/2026-05-09-copper-dac-reach-details-and-api-semantics.md new file mode 100644 index 0000000..340f863 --- /dev/null +++ b/sync/history/2026-05-09-copper-dac-reach-details-and-api-semantics.md @@ -0,0 +1,85 @@ +# Copper DAC Reach Details And API Semantics + +Date: 2026-05-09 +Actor: Codex +Scope: Copper/DAC/Twinax rows, ATGBICS parser, comparable-product API +Mode: DB-only correction plus tiny API/scraper deploy + +## Why + +Many remaining highspeed "missing wavelength" and detail gaps were not optical transceiver gaps. They were Copper/DAC/Twinax cable rows. These products do not have optical wavelengths and should be represented with `wavelengths=N/A`, concrete cable length, and cable category. + +## DB Correction + +Precheck: + +- Copper rows missing reach label: `464` +- Copper rows missing reach meters: `467` +- Copper rows missing details: `498` + +Found deterministic length evidence in part number or product URL for `342` rows. + +Updated those rows: + +- `reach_label` +- `reach_meters` +- `wavelengths=N/A` +- cable category +- `details_verified=true` when a product source URL existed + +Corrected `78` ATGBICS OSFP cable rows parsed as `SFP`. + +Postcheck: + +- Copper rows missing reach label: `122` +- Copper rows missing reach meters: `125` +- Copper rows missing details: `158` + +## Code Changes + +`packages/scraper/src/scrapers/atgbics.ts`: + +- detect `OSFP` before `SFP` +- parse decimal meter/kilometer reach such as `0.5m`, `1.5m`, `2.5m`, `30m`, `2km` +- keep Copper/DAC/Twinax/Base-T/RJ45 `wavelengths=N/A` + +`packages/api/src/routes/transceivers.ts`: + +- allow Copper/DAC/CU comparable products to match each other with `wavelengths=N/A` +- keep numeric wavelength requirement for optical comparable products + +## Deployment + +- synced scraper to active `/opt/tip` +- `pnpm -C packages/scraper build` passed +- synced API route to active `/opt/tip` +- `pnpm -C packages/api build` passed +- restarted `tip-api` + +## Result + +Global counters: + +- `details_verified`: `11085` -> `11425` +- `fully_verified`: `9861` -> `10170` + +Selected vendors: + +- ATGBICS: details `7656/8269`, fully `7646/8269` +- NADDOD: details `726/748`, fully `726/748` +- QSFPTEK: details `165/201`, fully `140/201` +- FS.COM: details `373/383`, fully `300/383` +- Flexoptix: details `626/744`, fully `622/744` +- GAO Tek: details `127/414`, fully `2/414` + +## Health + +Public TIP health after restart: + +- status: `healthy` +- load status: `ok` +- memory used: `13%` + +## Training Note + +TIPLLM should treat Copper/DAC/Twinax products as cable products. They need length, endpoints/form factor, speed, price/image/source evidence, and `wavelengths=N/A`; they must not be forced into optical wavelength rules.