diff --git a/packages/api/src/routes/transceivers.ts b/packages/api/src/routes/transceivers.ts
index 9c48797..2a12dee 100644
--- a/packages/api/src/routes/transceivers.ts
+++ b/packages/api/src/routes/transceivers.ts
@@ -115,9 +115,32 @@ transceiverRouter.get("/:id", async (req: Request, res: Response) => {
comparable_id: row.comparable_id,
}));
+ const allPrices = [...prices, ...comparablePrices];
+
+ // Price anomaly detection: flag if max/min ratio > 10x (same-product prices only)
+ const samePricesEur = allPrices
+ .filter((p) => p.is_same_product && p.price > 0)
+ .map((p) => {
+ // Normalize to EUR for comparison
+ if (p.currency === "EUR") return p.price;
+ if (p.currency === "USD") return p.price * 0.92;
+ if (p.currency === "GBP") return p.price * 1.17;
+ return p.price;
+ });
+
+ let priceAnomaly: { ratio: number; min_eur: number; max_eur: number } | null = null;
+ if (samePricesEur.length >= 2) {
+ const minEur = Math.min(...samePricesEur);
+ const maxEur = Math.max(...samePricesEur);
+ const ratio = minEur > 0 ? Math.round((maxEur / minEur) * 10) / 10 : 0;
+ if (ratio >= 10) {
+ priceAnomaly = { ratio, min_eur: Math.round(minEur * 100) / 100, max_eur: Math.round(maxEur * 100) / 100 };
+ }
+ }
+
res.json({
success: true,
- data: { ...transceiver, competitor_prices: [...prices, ...comparablePrices] },
+ data: { ...transceiver, competitor_prices: allPrices, price_anomaly: priceAnomaly },
});
} catch (err) {
console.error("Get transceiver error:", err);
diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html
index cc27aa7..d43cc17 100644
--- a/packages/dashboard/index.html
+++ b/packages/dashboard/index.html
@@ -2767,7 +2767,20 @@ async function openTxDetail(id) {
var comparPrices = allPrices.filter(function(p) { return p.is_same_product === false; });
if (allPrices.length > 0) {
+ // Price anomaly warning — show before price table if ratio ≥ 10x
+ var anomaly = t.price_anomaly;
+ var anomalyBanner = '';
+ if (anomaly && anomaly.ratio >= 10) {
+ anomalyBanner = '
'
+ + '⚠ Preisanomalie — '
+ + anomaly.ratio + 'x Unterschied zwischen Anbietern'
+ + ' (min. EUR\u00a0' + anomaly.min_eur.toLocaleString('de-DE',{minimumFractionDigits:2}) + ' / max. EUR\u00a0' + anomaly.max_eur.toLocaleString('de-DE',{minimumFractionDigits:2}) + ').'
+ + ' Entweder ist ein Preis falsch erfasst, oder es handelt sich um unterschiedliche Produktvarianten.'
+ + '
';
+ }
+
h += 'Current Prices
';
+ h += anomalyBanner;
h += '';
function renderPriceRow(p) {
diff --git a/packages/scraper/src/scrapers/atgbics.ts b/packages/scraper/src/scrapers/atgbics.ts
index a78c42e..282e1a8 100644
--- a/packages/scraper/src/scrapers/atgbics.ts
+++ b/packages/scraper/src/scrapers/atgbics.ts
@@ -83,6 +83,51 @@ function detectReach(text: string): string | undefined {
return undefined;
}
+/**
+ * Extract the real OEM part number from an ATGBICS URL slug.
+ *
+ * ATGBICS slug format: {oem-part-number}-{vendor}-r-compatible-transceiver-{specs}
+ * Examples:
+ * 3he16564aa-nokia-r-compatible-transceiver-qsfp-dd-... → 3HE16564AA
+ * jnp-sfp-25g-lr-juniper-r-compatible-... → JNP-SFP-25G-LR
+ * sfp-10g-sr-cisco-compatible-... → SFP-10G-SR
+ *
+ * Returns the slug uppercased if extraction fails (better than full slug).
+ */
+function extractOemPartNumber(slug: string): string {
+ let pn = slug;
+
+ // Remove "-r-compatible-transceiver-..." and everything after
+ pn = pn.replace(/-r-compatible(?:-transceiver.*)?$/i, "");
+ // Remove "-compatible-transceiver-..." (no "r-")
+ pn = pn.replace(/-compatible-transceiver.*$/i, "");
+ // Remove "-compatible-..." (short form)
+ pn = pn.replace(/-compatible.*$/i, "");
+
+ // Remove trailing known OEM vendor names that ATGBICS appends before "-r-compatible"
+ const oemVendors = [
+ "nokia", "cisco", "juniper", "arista", "huawei", "hp", "hpe", "dell",
+ "extreme", "brocade", "avaya", "netgear", "mikrotik", "ubiquiti", "mellanox",
+ "intel", "broadcom", "allied", "planet", "zyxel", "dlink", "d-link",
+ "foundry", "force10", "enterasys", "optical", "palo", "fortinet", "hitachi",
+ "calix", "calix", "ciena", "adtran", "ribbon", "sycamore", "rad", "zhone",
+ "infinera", "fujitsu", "nec", "ericsson", "alcatel", "lucent",
+ ];
+ for (const v of oemVendors) {
+ pn = pn.replace(new RegExp(`-${v}$`, "i"), "");
+ }
+
+ // Final cleanup: normalize to uppercase (OEM part numbers are uppercase)
+ const result = pn.toUpperCase().trim();
+
+ // Safety: if result is empty, longer than 40 chars, or still has "TRANSCEIVER", return slug as-is
+ if (!result || result.length > 40 || result.includes("TRANSCEIVER")) {
+ return slug.toUpperCase().slice(0, 40);
+ }
+
+ return result;
+}
+
function detectFiberType(text: string): string | undefined {
const lower = text.toLowerCase();
if (lower.includes("single mode") || lower.includes("single-mode") || lower.includes("smf") || lower.includes("-lr") || lower.includes("-er") || lower.includes("-zr")) return "SMF";
@@ -161,10 +206,11 @@ export async function scrapeAtgbics(): Promise {
const stock = stockEl?.textContent?.trim() || "";
// Derive part number from URL slug: /products/sfp-10g-lr → sfp-10g-lr
+ // Then extract real OEM part number (strips "-r-compatible-transceiver-*")
const slug = href.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
if (href && name && name.length > 3) {
- results.push({ name, href, price, stock, partNumber: slug });
+ results.push({ name, href, price, stock, partNumber: slug }); // OEM extraction done below after page parse
}
}
@@ -204,9 +250,13 @@ export async function scrapeAtgbics(): Promise {
if (item.price) {
const { price, currency } = parsePrice(item.price);
const speedInfo = detectSpeed(item.name);
+ // Extract real OEM part number from slug (strips -r-compatible-transceiver-*)
+ const realPartNumber = extractOemPartNumber(item.partNumber);
+ // Extract reach from name OR slug (slug often has "120km" even when name doesn't)
+ const reachLabel = detectReach(item.name) || detectReach(item.partNumber) || undefined;
if (price > 0) {
products.push({
- partNumber: item.partNumber || item.name.slice(0, 80),
+ partNumber: realPartNumber || item.name.slice(0, 80),
name: item.name,
price,
currency: currency === "USD" ? "GBP" : currency, // ATGBICS is GBP — parsePrice may default to USD if no symbol on listing
@@ -216,7 +266,7 @@ export async function scrapeAtgbics(): Promise {
formFactor: detectFormFactor(item.name),
speedGbps: speedInfo?.speedGbps,
speed: speedInfo?.speed,
- reachLabel: detectReach(item.name),
+ reachLabel,
fiberType: detectFiberType(item.name),
});
}
@@ -270,7 +320,10 @@ export async function scrapeAtgbics(): Promise {
});
const slug = url.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
- const partNumber = data.sku || slug;
+ // Prefer Shopify SKU if available, otherwise extract real OEM PN from slug
+ const partNumber = data.sku && data.sku.length > 2 && data.sku.length < 40
+ ? data.sku.toUpperCase()
+ : extractOemPartNumber(slug);
const name = data.title || slug;
const combinedText = `${name} ${data.description}`;
@@ -278,6 +331,8 @@ export async function scrapeAtgbics(): Promise {
if (price > 0) {
const speedInfo = detectSpeed(combinedText);
+ // Reach from title/description first, then fall back to slug (slug often has "120km")
+ const reachLabel = detectReach(combinedText) || detectReach(slug) || undefined;
products.push({
partNumber,
name,
@@ -289,7 +344,7 @@ export async function scrapeAtgbics(): Promise {
formFactor: detectFormFactor(combinedText),
speedGbps: speedInfo?.speedGbps,
speed: speedInfo?.speed,
- reachLabel: detectReach(combinedText),
+ reachLabel,
fiberType: detectFiberType(combinedText),
});
}
diff --git a/sql/017-verification-tags.sql b/sql/017-verification-tags.sql
index 4ba6997..f7743ea 100644
--- a/sql/017-verification-tags.sql
+++ b/sql/017-verification-tags.sql
@@ -106,8 +106,13 @@ BEGIN
transceivers.product_page_url IS NOT NULL
AND transceivers.form_factor IS NOT NULL
AND transceivers.speed_gbps IS NOT NULL
+ -- reach_label must be a non-empty string (IS NOT NULL allows empty string — wrong)
AND transceivers.reach_label IS NOT NULL
+ AND transceivers.reach_label != ''
+ -- part_number must not be a URL slug (garbled data from scraper)
AND (transceivers.part_number IS NOT NULL AND transceivers.part_number != transceivers.slug)
+ AND transceivers.part_number NOT ILIKE '%-compatible-transceiver%'
+ AND transceivers.part_number NOT ILIKE '%-r-compatible%'
AND transceivers.data_confidence IN ('scraped_unverified', 'verified', 'official')
) INTO v_details_ok
FROM transceivers
diff --git a/sql/025-verification-quality-fix.sql b/sql/025-verification-quality-fix.sql
new file mode 100644
index 0000000..cebe672
--- /dev/null
+++ b/sql/025-verification-quality-fix.sql
@@ -0,0 +1,59 @@
+-- Migration 025: Fix details_verified quality gate + repair garbled ATGBICS records
+-- Problem: details_verified = true when:
+-- 1. reach_label = '' (empty string passes IS NOT NULL)
+-- 2. part_number contains 'compatible-transceiver' (URL slug stored as PN)
+-- ─────────────────────────────────────────────────────────────────────────────
+
+-- Step 1: Fix part_numbers that are ATGBICS URL slugs
+-- Extract the real OEM part number: take everything before "-r-compatible" or "-compatible"
+UPDATE transceivers
+SET
+ part_number = UPPER(
+ REGEXP_REPLACE(
+ REGEXP_REPLACE(
+ part_number,
+ '-(nokia|cisco|juniper|arista|huawei|hp|hpe|dell|extreme|brocade|mellanox|intel|broadcom|netgear|foundry|calix|ciena|adtran|palo|fortinet|alcatel|ericsson|nec|fujitsu|infinera|ribbon|hitachi|rad|zhone|ubiquiti|mikrotik|avaya|enterasys|allied|planet|zyxel|dlink)$',
+ '',
+ 'i'
+ ),
+ '-(r-compatible|compatible)(-transceiver.*)?$',
+ '',
+ 'i'
+ )
+ ),
+ updated_at = NOW()
+WHERE
+ part_number ILIKE '%-r-compatible%'
+ OR part_number ILIKE '%-compatible-transceiver%';
+
+-- Step 2: Extract reach_meters from reach_label where reach_meters = 0 but reach_label has data
+UPDATE transceivers
+SET
+ reach_meters = CASE
+ WHEN reach_label ILIKE '%km' THEN
+ CAST(REGEXP_REPLACE(reach_label, '[^0-9]', '', 'g') AS INTEGER) * 1000
+ WHEN reach_label ILIKE '%m' AND reach_label NOT ILIKE '%km' THEN
+ CAST(REGEXP_REPLACE(reach_label, '[^0-9]', '', 'g') AS INTEGER)
+ ELSE reach_meters
+ END,
+ updated_at = NOW()
+WHERE reach_meters = 0
+ AND reach_label IS NOT NULL
+ AND reach_label != ''
+ AND reach_label ~ '^\d+\s*(m|km)$';
+
+-- Step 3: Also extract reach_label from slug where still missing
+-- For records where slug contains NNkm pattern (e.g. scraped-3he16564aa-...-120km-...)
+UPDATE transceivers
+SET
+ reach_label = (REGEXP_MATCH(slug, '(\d+km)'))[1],
+ reach_meters = CAST((REGEXP_MATCH(slug, '(\d+)km'))[1] AS INTEGER) * 1000,
+ updated_at = NOW()
+WHERE
+ (reach_label IS NULL OR reach_label = '')
+ AND reach_meters = 0
+ AND slug ~ '\d+km';
+
+-- Step 4: Recompute all verification badges with the fixed criteria
+-- (Updates details_verified, fully_verified for all affected transceivers)
+SELECT recompute_all_verification();