fix: verify copper cable semantics

This commit is contained in:
Rene Fichtmueller 2026-05-09 16:55:50 +02:00
parent c25300199a
commit fc18b00157
4 changed files with 156 additions and 4 deletions

View File

@ -98,9 +98,19 @@ transceiverRouter.get("/:id", async (req: Request, res: Response) => {
AND NULLIF(t1.fiber_type, '') IS NOT NULL AND NULLIF(t1.fiber_type, '') IS NOT NULL
AND NULLIF(t2.fiber_type, '') IS NOT NULL AND NULLIF(t2.fiber_type, '') IS NOT NULL
AND LOWER(t2.fiber_type) = LOWER(t1.fiber_type) AND LOWER(t2.fiber_type) = LOWER(t1.fiber_type)
AND substring(t1.wavelengths from '(\\d{3,4})') IS NOT NULL AND (
(
LOWER(t1.fiber_type) IN ('copper', 'dac', 'cu')
AND LOWER(t2.fiber_type) IN ('copper', 'dac', 'cu')
AND COALESCE(NULLIF(UPPER(t1.wavelengths), ''), 'N/A') IN ('N/A', 'NA', 'NONE')
AND COALESCE(NULLIF(UPPER(t2.wavelengths), ''), 'N/A') IN ('N/A', 'NA', 'NONE')
)
OR (
substring(t1.wavelengths from '(\\d{3,4})') IS NOT NULL
AND substring(t2.wavelengths from '(\\d{3,4})') IS NOT NULL AND substring(t2.wavelengths from '(\\d{3,4})') IS NOT NULL
AND ABS((substring(t1.wavelengths from '(\\d{3,4})'))::int - (substring(t2.wavelengths from '(\\d{3,4})'))::int) <= 15 AND ABS((substring(t1.wavelengths from '(\\d{3,4})'))::int - (substring(t2.wavelengths from '(\\d{3,4})'))::int) <= 15
)
)
AND ( AND (
( (
(t1.part_number || ' ' || COALESCE(t1.standard_name, '') || ' ' || COALESCE(t1.category, '')) !~* '(breakout|aoc|dac|twinax|active optical cable)' (t1.part_number || ' ' || COALESCE(t1.standard_name, '') || ' ' || COALESCE(t1.category, '')) !~* '(breakout|aoc|dac|twinax|active optical cable)'

View File

@ -78,6 +78,7 @@ function sleep(ms: number): Promise<void> {
function detectFormFactor(text: string, fallback: string): string { function detectFormFactor(text: string, fallback: string): string {
const lower = text.toLowerCase(); const lower = text.toLowerCase();
if (lower.includes("osfp")) return "OSFP";
if (lower.includes("qsfp-dd") || lower.includes("qsfp dd")) return "QSFP-DD"; if (lower.includes("qsfp-dd") || lower.includes("qsfp dd")) return "QSFP-DD";
if (lower.includes("qsfp56")) return "QSFP56"; if (lower.includes("qsfp56")) return "QSFP56";
if (lower.includes("qsfp28")) return "QSFP28"; if (lower.includes("qsfp28")) return "QSFP28";
@ -105,6 +106,17 @@ function detectSpeed(text: string, fallbackGbps: number): { speed: string; speed
} }
function detectReach(text: string): { label: string; meters: number } | undefined { function detectReach(text: string): { label: string; meters: number } | undefined {
const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i);
if (generic) {
const value = parseFloat(generic[1]);
const unit = generic[2].toLowerCase();
if (Number.isFinite(value) && value > 0) {
const meters = unit === "km" ? Math.round(value * 1000) : Math.max(1, Math.round(value));
const labelValue = String(value).replace(/\.0$/, "");
return { label: `${labelValue}${unit}`, meters };
}
}
const patterns: [RegExp, string, number][] = [ const patterns: [RegExp, string, number][] = [
[/\b120\s*km\b/i, "120km", 120000], [/\b80\s*km\b/i, "80km", 80000], [/\b120\s*km\b/i, "120km", 120000], [/\b80\s*km\b/i, "80km", 80000],
[/\b40\s*km\b/i, "40km", 40000], [/\b20\s*km\b/i, "20km", 20000], [/\b40\s*km\b/i, "40km", 40000], [/\b20\s*km\b/i, "20km", 20000],

View File

@ -1,9 +1,54 @@
# Current TIP Sync State # Current TIP Sync State
Updated: 2026-05-09 14:39 UTC Updated: 2026-05-09 14:54 UTC
## Newest Work ## Newest Work
- Copper/DAC reach/detail verification and comparable API semantics on 2026-05-09:
- purpose:
- continue toward full TIP verification without inventing optical data
- treat Copper/DAC/Twinax as cable products with `wavelengths=N/A`, not missing optical products
- DB correction:
- found `467` Copper rows still missing reach label/meters
- `342` had deterministic length evidence in part number or product URL
- wrote `reach_label`, `reach_meters`, `wavelengths=N/A`, cable category and detail verification for those `342`
- corrected `78` ATGBICS OSFP cable rows that had been parsed as `SFP`
- code hardened:
- `packages/scraper/src/scrapers/atgbics.ts`
- detects `OSFP` before `SFP`
- parses generic decimal meter/kilometer reach such as `0.5m`, `1.5m`, `2.5m`, `30m`, `2km`
- keeps Copper/DAC/Twinax/Base-T/RJ45 wavelength as `N/A`
- `packages/api/src/routes/transceivers.ts`
- comparable products now allow Copper/DAC/CU products to match each other with `wavelengths=N/A`
- optical products still require numeric wavelength evidence and close wavelength match
- deployment:
- synced ATGBICS scraper to active `/opt/tip`
- `pnpm -C packages/scraper build` passed
- synced API route to active `/opt/tip`
- `pnpm -C packages/api build` passed
- restarted `tip-api`
- result:
- global `details_verified` increased from `11085` to `11425`
- global `fully_verified` increased from `9861` to `10170`
- Copper remaining gaps after correction:
- missing reach label: `122`
- missing reach meters: `125`
- missing details: `158`
- selected vendor detail/fully state:
- ATGBICS: details `7656/8269`, fully `7646/8269`
- NADDOD: details `726/748`, fully `726/748`
- QSFPTEK: details `165/201`, fully `140/201`
- FS.COM: details `373/383`, fully `300/383`
- Flexoptix: details `626/744`, fully `622/744`
- GAO Tek: details `127/414`, fully `2/414`
- health:
- public TIP health after restart: `healthy`
- load status `ok`
- memory used `13%`
- truth:
- this is real progress toward trustworthy complete data, not cosmetic flag setting
- remaining gaps are now smaller targeted vendor/parser/source tasks; NADDOD and QSFPTEK are next high-yield targets
- ATGBICS safe JSON rerun + Copper wavelength semantics on 2026-05-09: - ATGBICS safe JSON rerun + Copper wavelength semantics on 2026-05-09:
- code hardened: - code hardened:
- `packages/scraper/src/scrapers/atgbics.ts` - `packages/scraper/src/scrapers/atgbics.ts`

View File

@ -0,0 +1,85 @@
# Copper DAC Reach Details And API Semantics
Date: 2026-05-09
Actor: Codex
Scope: Copper/DAC/Twinax rows, ATGBICS parser, comparable-product API
Mode: DB-only correction plus tiny API/scraper deploy
## Why
Many remaining highspeed "missing wavelength" and detail gaps were not optical transceiver gaps. They were Copper/DAC/Twinax cable rows. These products do not have optical wavelengths and should be represented with `wavelengths=N/A`, concrete cable length, and cable category.
## DB Correction
Precheck:
- Copper rows missing reach label: `464`
- Copper rows missing reach meters: `467`
- Copper rows missing details: `498`
Found deterministic length evidence in part number or product URL for `342` rows.
Updated those rows:
- `reach_label`
- `reach_meters`
- `wavelengths=N/A`
- cable category
- `details_verified=true` when a product source URL existed
Corrected `78` ATGBICS OSFP cable rows parsed as `SFP`.
Postcheck:
- Copper rows missing reach label: `122`
- Copper rows missing reach meters: `125`
- Copper rows missing details: `158`
## Code Changes
`packages/scraper/src/scrapers/atgbics.ts`:
- detect `OSFP` before `SFP`
- parse decimal meter/kilometer reach such as `0.5m`, `1.5m`, `2.5m`, `30m`, `2km`
- keep Copper/DAC/Twinax/Base-T/RJ45 `wavelengths=N/A`
`packages/api/src/routes/transceivers.ts`:
- allow Copper/DAC/CU comparable products to match each other with `wavelengths=N/A`
- keep numeric wavelength requirement for optical comparable products
## Deployment
- synced scraper to active `/opt/tip`
- `pnpm -C packages/scraper build` passed
- synced API route to active `/opt/tip`
- `pnpm -C packages/api build` passed
- restarted `tip-api`
## Result
Global counters:
- `details_verified`: `11085` -> `11425`
- `fully_verified`: `9861` -> `10170`
Selected vendors:
- ATGBICS: details `7656/8269`, fully `7646/8269`
- NADDOD: details `726/748`, fully `726/748`
- QSFPTEK: details `165/201`, fully `140/201`
- FS.COM: details `373/383`, fully `300/383`
- Flexoptix: details `626/744`, fully `622/744`
- GAO Tek: details `127/414`, fully `2/414`
## Health
Public TIP health after restart:
- status: `healthy`
- load status: `ok`
- memory used: `13%`
## Training Note
TIPLLM should treat Copper/DAC/Twinax products as cable products. They need length, endpoints/form factor, speed, price/image/source evidence, and `wavelengths=N/A`; they must not be forced into optical wavelength rules.