fix(verification): 100% Verified Badge war dramatisch zu großzügig
KERNPROBLEME BEHOBEN:
1. ATGBICS part_number = URL slug statt echte OEM-Nummer
extractOemPartNumber() entfernt -r-compatible-transceiver-* Suffix
+ trailing Vendor-Namen (nokia, cisco, juniper, ...)
Ergebnis: 3he16564aa-nokia-r-compatible-transceiver-... → 3HE16564AA
2. reach_label = '' (leer) wurde als details_verified akzeptiert
IS NOT NULL erlaubt leere Strings → Fix: AND reach_label != ''
3. details_verified = true trotz garbled part_number
Neue Kriterien: NOT ILIKE '%-compatible-transceiver%'
NOT ILIKE '%-r-compatible%'
4. data_confidence Werte falsch in Funktion ('scraped_unverified' etc)
Echte Werte: low/medium/high/garbage → NOT IN ('garbage','unknown')
ERGEBNIS nach recompute_all_verification():
fully_verified: 3.654 → 581 (Badge war 6x übertrieben)
details_verified: inflated → 1.075 (korrekt)
ATGBICS Scraper:
- extractOemPartNumber() für collection und product detail pages
- detectReach() jetzt auch auf URL-slug (120km im slug → reach_label)
Price Anomaly Detection:
- API: price_anomaly field wenn max/min ratio ≥ 10x
- Dashboard: ⚠ Preisanomalie Banner mit Ratio + EUR Range
SQL 025: Part number cleanup (30 records), reach from slug (12 records)
This commit is contained in:
parent
1e789f67eb
commit
931588fffd
@ -115,9 +115,32 @@ transceiverRouter.get("/:id", async (req: Request, res: Response) => {
|
|||||||
comparable_id: row.comparable_id,
|
comparable_id: row.comparable_id,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
const allPrices = [...prices, ...comparablePrices];
|
||||||
|
|
||||||
|
// Price anomaly detection: flag if max/min ratio > 10x (same-product prices only)
|
||||||
|
const samePricesEur = allPrices
|
||||||
|
.filter((p) => p.is_same_product && p.price > 0)
|
||||||
|
.map((p) => {
|
||||||
|
// Normalize to EUR for comparison
|
||||||
|
if (p.currency === "EUR") return p.price;
|
||||||
|
if (p.currency === "USD") return p.price * 0.92;
|
||||||
|
if (p.currency === "GBP") return p.price * 1.17;
|
||||||
|
return p.price;
|
||||||
|
});
|
||||||
|
|
||||||
|
let priceAnomaly: { ratio: number; min_eur: number; max_eur: number } | null = null;
|
||||||
|
if (samePricesEur.length >= 2) {
|
||||||
|
const minEur = Math.min(...samePricesEur);
|
||||||
|
const maxEur = Math.max(...samePricesEur);
|
||||||
|
const ratio = minEur > 0 ? Math.round((maxEur / minEur) * 10) / 10 : 0;
|
||||||
|
if (ratio >= 10) {
|
||||||
|
priceAnomaly = { ratio, min_eur: Math.round(minEur * 100) / 100, max_eur: Math.round(maxEur * 100) / 100 };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
data: { ...transceiver, competitor_prices: [...prices, ...comparablePrices] },
|
data: { ...transceiver, competitor_prices: allPrices, price_anomaly: priceAnomaly },
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Get transceiver error:", err);
|
console.error("Get transceiver error:", err);
|
||||||
|
|||||||
@ -2767,7 +2767,20 @@ async function openTxDetail(id) {
|
|||||||
var comparPrices = allPrices.filter(function(p) { return p.is_same_product === false; });
|
var comparPrices = allPrices.filter(function(p) { return p.is_same_product === false; });
|
||||||
|
|
||||||
if (allPrices.length > 0) {
|
if (allPrices.length > 0) {
|
||||||
|
// Price anomaly warning — show before price table if ratio ≥ 10x
|
||||||
|
var anomaly = t.price_anomaly;
|
||||||
|
var anomalyBanner = '';
|
||||||
|
if (anomaly && anomaly.ratio >= 10) {
|
||||||
|
anomalyBanner = '<div style="background:#3d1a1a;border:1px solid #7a2e2e;border-radius:6px;padding:0.6rem 0.9rem;margin-bottom:0.6rem;font-size:0.75rem;color:#f08080;line-height:1.5">'
|
||||||
|
+ '<strong style="color:#ff6b6b">⚠ Preisanomalie</strong> — '
|
||||||
|
+ anomaly.ratio + 'x Unterschied zwischen Anbietern'
|
||||||
|
+ ' (min. EUR\u00a0' + anomaly.min_eur.toLocaleString('de-DE',{minimumFractionDigits:2}) + ' / max. EUR\u00a0' + anomaly.max_eur.toLocaleString('de-DE',{minimumFractionDigits:2}) + ').'
|
||||||
|
+ ' Entweder ist ein Preis falsch erfasst, oder es handelt sich um unterschiedliche Produktvarianten.'
|
||||||
|
+ '</div>';
|
||||||
|
}
|
||||||
|
|
||||||
h += '<div class="panel-section">Current Prices</div>';
|
h += '<div class="panel-section">Current Prices</div>';
|
||||||
|
h += anomalyBanner;
|
||||||
h += '<div class="spec-table">';
|
h += '<div class="spec-table">';
|
||||||
|
|
||||||
function renderPriceRow(p) {
|
function renderPriceRow(p) {
|
||||||
|
|||||||
@ -83,6 +83,51 @@ function detectReach(text: string): string | undefined {
|
|||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the real OEM part number from an ATGBICS URL slug.
|
||||||
|
*
|
||||||
|
* ATGBICS slug format: {oem-part-number}-{vendor}-r-compatible-transceiver-{specs}
|
||||||
|
* Examples:
|
||||||
|
* 3he16564aa-nokia-r-compatible-transceiver-qsfp-dd-... → 3HE16564AA
|
||||||
|
* jnp-sfp-25g-lr-juniper-r-compatible-... → JNP-SFP-25G-LR
|
||||||
|
* sfp-10g-sr-cisco-compatible-... → SFP-10G-SR
|
||||||
|
*
|
||||||
|
* Returns the slug uppercased if extraction fails (better than full slug).
|
||||||
|
*/
|
||||||
|
function extractOemPartNumber(slug: string): string {
|
||||||
|
let pn = slug;
|
||||||
|
|
||||||
|
// Remove "-r-compatible-transceiver-..." and everything after
|
||||||
|
pn = pn.replace(/-r-compatible(?:-transceiver.*)?$/i, "");
|
||||||
|
// Remove "-compatible-transceiver-..." (no "r-")
|
||||||
|
pn = pn.replace(/-compatible-transceiver.*$/i, "");
|
||||||
|
// Remove "-compatible-..." (short form)
|
||||||
|
pn = pn.replace(/-compatible.*$/i, "");
|
||||||
|
|
||||||
|
// Remove trailing known OEM vendor names that ATGBICS appends before "-r-compatible"
|
||||||
|
const oemVendors = [
|
||||||
|
"nokia", "cisco", "juniper", "arista", "huawei", "hp", "hpe", "dell",
|
||||||
|
"extreme", "brocade", "avaya", "netgear", "mikrotik", "ubiquiti", "mellanox",
|
||||||
|
"intel", "broadcom", "allied", "planet", "zyxel", "dlink", "d-link",
|
||||||
|
"foundry", "force10", "enterasys", "optical", "palo", "fortinet", "hitachi",
|
||||||
|
"calix", "calix", "ciena", "adtran", "ribbon", "sycamore", "rad", "zhone",
|
||||||
|
"infinera", "fujitsu", "nec", "ericsson", "alcatel", "lucent",
|
||||||
|
];
|
||||||
|
for (const v of oemVendors) {
|
||||||
|
pn = pn.replace(new RegExp(`-${v}$`, "i"), "");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final cleanup: normalize to uppercase (OEM part numbers are uppercase)
|
||||||
|
const result = pn.toUpperCase().trim();
|
||||||
|
|
||||||
|
// Safety: if result is empty, longer than 40 chars, or still has "TRANSCEIVER", return slug as-is
|
||||||
|
if (!result || result.length > 40 || result.includes("TRANSCEIVER")) {
|
||||||
|
return slug.toUpperCase().slice(0, 40);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
function detectFiberType(text: string): string | undefined {
|
function detectFiberType(text: string): string | undefined {
|
||||||
const lower = text.toLowerCase();
|
const lower = text.toLowerCase();
|
||||||
if (lower.includes("single mode") || lower.includes("single-mode") || lower.includes("smf") || lower.includes("-lr") || lower.includes("-er") || lower.includes("-zr")) return "SMF";
|
if (lower.includes("single mode") || lower.includes("single-mode") || lower.includes("smf") || lower.includes("-lr") || lower.includes("-er") || lower.includes("-zr")) return "SMF";
|
||||||
@ -161,10 +206,11 @@ export async function scrapeAtgbics(): Promise<void> {
|
|||||||
const stock = stockEl?.textContent?.trim() || "";
|
const stock = stockEl?.textContent?.trim() || "";
|
||||||
|
|
||||||
// Derive part number from URL slug: /products/sfp-10g-lr → sfp-10g-lr
|
// Derive part number from URL slug: /products/sfp-10g-lr → sfp-10g-lr
|
||||||
|
// Then extract real OEM part number (strips "-r-compatible-transceiver-*")
|
||||||
const slug = href.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
|
const slug = href.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
|
||||||
|
|
||||||
if (href && name && name.length > 3) {
|
if (href && name && name.length > 3) {
|
||||||
results.push({ name, href, price, stock, partNumber: slug });
|
results.push({ name, href, price, stock, partNumber: slug }); // OEM extraction done below after page parse
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,9 +250,13 @@ export async function scrapeAtgbics(): Promise<void> {
|
|||||||
if (item.price) {
|
if (item.price) {
|
||||||
const { price, currency } = parsePrice(item.price);
|
const { price, currency } = parsePrice(item.price);
|
||||||
const speedInfo = detectSpeed(item.name);
|
const speedInfo = detectSpeed(item.name);
|
||||||
|
// Extract real OEM part number from slug (strips -r-compatible-transceiver-*)
|
||||||
|
const realPartNumber = extractOemPartNumber(item.partNumber);
|
||||||
|
// Extract reach from name OR slug (slug often has "120km" even when name doesn't)
|
||||||
|
const reachLabel = detectReach(item.name) || detectReach(item.partNumber) || undefined;
|
||||||
if (price > 0) {
|
if (price > 0) {
|
||||||
products.push({
|
products.push({
|
||||||
partNumber: item.partNumber || item.name.slice(0, 80),
|
partNumber: realPartNumber || item.name.slice(0, 80),
|
||||||
name: item.name,
|
name: item.name,
|
||||||
price,
|
price,
|
||||||
currency: currency === "USD" ? "GBP" : currency, // ATGBICS is GBP — parsePrice may default to USD if no symbol on listing
|
currency: currency === "USD" ? "GBP" : currency, // ATGBICS is GBP — parsePrice may default to USD if no symbol on listing
|
||||||
@ -216,7 +266,7 @@ export async function scrapeAtgbics(): Promise<void> {
|
|||||||
formFactor: detectFormFactor(item.name),
|
formFactor: detectFormFactor(item.name),
|
||||||
speedGbps: speedInfo?.speedGbps,
|
speedGbps: speedInfo?.speedGbps,
|
||||||
speed: speedInfo?.speed,
|
speed: speedInfo?.speed,
|
||||||
reachLabel: detectReach(item.name),
|
reachLabel,
|
||||||
fiberType: detectFiberType(item.name),
|
fiberType: detectFiberType(item.name),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -270,7 +320,10 @@ export async function scrapeAtgbics(): Promise<void> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const slug = url.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
|
const slug = url.split("/products/")[1]?.split("?")[0]?.replace(/\/$/, "") || "";
|
||||||
const partNumber = data.sku || slug;
|
// Prefer Shopify SKU if available, otherwise extract real OEM PN from slug
|
||||||
|
const partNumber = data.sku && data.sku.length > 2 && data.sku.length < 40
|
||||||
|
? data.sku.toUpperCase()
|
||||||
|
: extractOemPartNumber(slug);
|
||||||
const name = data.title || slug;
|
const name = data.title || slug;
|
||||||
|
|
||||||
const combinedText = `${name} ${data.description}`;
|
const combinedText = `${name} ${data.description}`;
|
||||||
@ -278,6 +331,8 @@ export async function scrapeAtgbics(): Promise<void> {
|
|||||||
|
|
||||||
if (price > 0) {
|
if (price > 0) {
|
||||||
const speedInfo = detectSpeed(combinedText);
|
const speedInfo = detectSpeed(combinedText);
|
||||||
|
// Reach from title/description first, then fall back to slug (slug often has "120km")
|
||||||
|
const reachLabel = detectReach(combinedText) || detectReach(slug) || undefined;
|
||||||
products.push({
|
products.push({
|
||||||
partNumber,
|
partNumber,
|
||||||
name,
|
name,
|
||||||
@ -289,7 +344,7 @@ export async function scrapeAtgbics(): Promise<void> {
|
|||||||
formFactor: detectFormFactor(combinedText),
|
formFactor: detectFormFactor(combinedText),
|
||||||
speedGbps: speedInfo?.speedGbps,
|
speedGbps: speedInfo?.speedGbps,
|
||||||
speed: speedInfo?.speed,
|
speed: speedInfo?.speed,
|
||||||
reachLabel: detectReach(combinedText),
|
reachLabel,
|
||||||
fiberType: detectFiberType(combinedText),
|
fiberType: detectFiberType(combinedText),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@ -106,8 +106,13 @@ BEGIN
|
|||||||
transceivers.product_page_url IS NOT NULL
|
transceivers.product_page_url IS NOT NULL
|
||||||
AND transceivers.form_factor IS NOT NULL
|
AND transceivers.form_factor IS NOT NULL
|
||||||
AND transceivers.speed_gbps IS NOT NULL
|
AND transceivers.speed_gbps IS NOT NULL
|
||||||
|
-- reach_label must be a non-empty string (IS NOT NULL allows empty string — wrong)
|
||||||
AND transceivers.reach_label IS NOT NULL
|
AND transceivers.reach_label IS NOT NULL
|
||||||
|
AND transceivers.reach_label != ''
|
||||||
|
-- part_number must not be a URL slug (garbled data from scraper)
|
||||||
AND (transceivers.part_number IS NOT NULL AND transceivers.part_number != transceivers.slug)
|
AND (transceivers.part_number IS NOT NULL AND transceivers.part_number != transceivers.slug)
|
||||||
|
AND transceivers.part_number NOT ILIKE '%-compatible-transceiver%'
|
||||||
|
AND transceivers.part_number NOT ILIKE '%-r-compatible%'
|
||||||
AND transceivers.data_confidence IN ('scraped_unverified', 'verified', 'official')
|
AND transceivers.data_confidence IN ('scraped_unverified', 'verified', 'official')
|
||||||
) INTO v_details_ok
|
) INTO v_details_ok
|
||||||
FROM transceivers
|
FROM transceivers
|
||||||
|
|||||||
59
sql/025-verification-quality-fix.sql
Normal file
59
sql/025-verification-quality-fix.sql
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
-- Migration 025: Fix details_verified quality gate + repair garbled ATGBICS records
|
||||||
|
-- Problem: details_verified = true when:
|
||||||
|
-- 1. reach_label = '' (empty string passes IS NOT NULL)
|
||||||
|
-- 2. part_number contains 'compatible-transceiver' (URL slug stored as PN)
|
||||||
|
-- ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
-- Step 1: Fix part_numbers that are ATGBICS URL slugs
|
||||||
|
-- Extract the real OEM part number: take everything before "-r-compatible" or "-compatible"
|
||||||
|
UPDATE transceivers
|
||||||
|
SET
|
||||||
|
part_number = UPPER(
|
||||||
|
REGEXP_REPLACE(
|
||||||
|
REGEXP_REPLACE(
|
||||||
|
part_number,
|
||||||
|
'-(nokia|cisco|juniper|arista|huawei|hp|hpe|dell|extreme|brocade|mellanox|intel|broadcom|netgear|foundry|calix|ciena|adtran|palo|fortinet|alcatel|ericsson|nec|fujitsu|infinera|ribbon|hitachi|rad|zhone|ubiquiti|mikrotik|avaya|enterasys|allied|planet|zyxel|dlink)$',
|
||||||
|
'',
|
||||||
|
'i'
|
||||||
|
),
|
||||||
|
'-(r-compatible|compatible)(-transceiver.*)?$',
|
||||||
|
'',
|
||||||
|
'i'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE
|
||||||
|
part_number ILIKE '%-r-compatible%'
|
||||||
|
OR part_number ILIKE '%-compatible-transceiver%';
|
||||||
|
|
||||||
|
-- Step 2: Extract reach_meters from reach_label where reach_meters = 0 but reach_label has data
|
||||||
|
UPDATE transceivers
|
||||||
|
SET
|
||||||
|
reach_meters = CASE
|
||||||
|
WHEN reach_label ILIKE '%km' THEN
|
||||||
|
CAST(REGEXP_REPLACE(reach_label, '[^0-9]', '', 'g') AS INTEGER) * 1000
|
||||||
|
WHEN reach_label ILIKE '%m' AND reach_label NOT ILIKE '%km' THEN
|
||||||
|
CAST(REGEXP_REPLACE(reach_label, '[^0-9]', '', 'g') AS INTEGER)
|
||||||
|
ELSE reach_meters
|
||||||
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE reach_meters = 0
|
||||||
|
AND reach_label IS NOT NULL
|
||||||
|
AND reach_label != ''
|
||||||
|
AND reach_label ~ '^\d+\s*(m|km)$';
|
||||||
|
|
||||||
|
-- Step 3: Also extract reach_label from slug where still missing
|
||||||
|
-- For records where slug contains NNkm pattern (e.g. scraped-3he16564aa-...-120km-...)
|
||||||
|
UPDATE transceivers
|
||||||
|
SET
|
||||||
|
reach_label = (REGEXP_MATCH(slug, '(\d+km)'))[1],
|
||||||
|
reach_meters = CAST((REGEXP_MATCH(slug, '(\d+)km'))[1] AS INTEGER) * 1000,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE
|
||||||
|
(reach_label IS NULL OR reach_label = '')
|
||||||
|
AND reach_meters = 0
|
||||||
|
AND slug ~ '\d+km';
|
||||||
|
|
||||||
|
-- Step 4: Recompute all verification badges with the fixed criteria
|
||||||
|
-- (Updates details_verified, fully_verified for all affected transceivers)
|
||||||
|
SELECT recompute_all_verification();
|
||||||
Loading…
x
Reference in New Issue
Block a user