fix(price-movers): dedup by part_number, median price, CV-filter for tier noise

Group by part_number instead of transceiver_id (eliminates OEM duplicate rows).
Use PERCENTILE_CONT median instead of AVG to reduce single-outlier impact.
Add CV-filter (stddev/avg <= 0.35 over 2x window) to exclude high-variance
sources like Mouser quantity-tier pricing that produces artificial swings.
This commit is contained in:
Rene Fichtmueller 2026-06-05 21:23:20 +00:00
parent c6e79e9967
commit 842a85120b

View File

@ -1083,41 +1083,68 @@ procurementRouter.get("/price-movers", async (req: Request, res: Response) => {
try {
const result = await pool.query(`
WITH cur AS (
SELECT transceiver_id, source_vendor_id, currency,
AVG(price) AS avg_price,
-- Group by part_number+source_vendor+currency to avoid duplicates from multiple
-- vendor-OEM transceiver_ids with the same part number.
-- Use PERCENTILE_CONT (median) to suppress multi-tier list-price noise
-- (e.g. Mouser 1x/10x/100x tiers appearing as price swings).
SELECT t.part_number, po.source_vendor_id, po.currency,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
COUNT(*) AS obs
FROM price_observations
WHERE time >= NOW() - INTERVAL '${days} days'
AND price > 0 AND COALESCE(is_anomalous, false) = false
GROUP BY transceiver_id, source_vendor_id, currency
FROM price_observations po
JOIN transceivers t ON t.id = po.transceiver_id
WHERE po.time >= NOW() - INTERVAL '${days} days'
AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
GROUP BY t.part_number, po.source_vendor_id, po.currency
),
prior AS (
SELECT transceiver_id, source_vendor_id,
AVG(price) AS avg_price
FROM price_observations
WHERE time >= NOW() - INTERVAL '${days * 2} days'
AND time < NOW() - INTERVAL '${days} days'
AND price > 0 AND COALESCE(is_anomalous, false) = false
GROUP BY transceiver_id, source_vendor_id
SELECT t.part_number, po.source_vendor_id,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
COUNT(*) AS obs
FROM price_observations po
JOIN transceivers t ON t.id = po.transceiver_id
WHERE po.time >= NOW() - INTERVAL '${days * 2} days'
AND po.time < NOW() - INTERVAL '${days} days'
AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
GROUP BY t.part_number, po.source_vendor_id
),
ref_tx AS (
-- pick one canonical transceiver_id per part_number for metadata
SELECT DISTINCT ON (part_number) id, part_number, form_factor, speed_gbps, standard_name
FROM transceivers ORDER BY part_number, id
)
SELECT
t.id, t.part_number, t.form_factor,
t.speed_gbps::text AS speed_gbps,
t.standard_name,
ref.id, ref.part_number, ref.form_factor,
ref.speed_gbps::text AS speed_gbps,
ref.standard_name,
sv.name AS vendor_name,
ROUND(c.avg_price::numeric, 2) AS current_avg,
ROUND(p.avg_price::numeric, 2) AS prior_avg,
ROUND(((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100)::numeric, 1) AS delta_pct,
ROUND(c.med_price::numeric, 2) AS current_avg,
ROUND(p.med_price::numeric, 2) AS prior_avg,
ROUND(((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100)::numeric, 1) AS delta_pct,
c.currency,
c.obs::int AS observations
(c.obs + p.obs)::int AS observations
FROM cur c
JOIN prior p ON p.transceiver_id = c.transceiver_id
AND p.source_vendor_id = c.source_vendor_id
JOIN transceivers t ON t.id = c.transceiver_id
JOIN vendors sv ON sv.id = c.source_vendor_id
WHERE ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) >= 2
AND c.obs::int >= 2
ORDER BY ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) DESC
JOIN prior p ON p.part_number = c.part_number
AND p.source_vendor_id = c.source_vendor_id
JOIN ref_tx ref ON ref.part_number = c.part_number
JOIN vendors sv ON sv.id = c.source_vendor_id
-- cv_filter: exclude SKUs where the source has high price variance across the full
-- 2*days window (e.g. Mouser quantity-tier noise). CV > 0.35 = unreliable source.
JOIN (
SELECT t2.part_number, po2.source_vendor_id,
STDDEV(po2.price) / NULLIF(AVG(po2.price), 0) AS cv
FROM price_observations po2
JOIN transceivers t2 ON t2.id = po2.transceiver_id
WHERE po2.time >= NOW() - INTERVAL '${days * 2} days'
AND po2.price > 0 AND COALESCE(po2.is_anomalous, false) = false
GROUP BY t2.part_number, po2.source_vendor_id
HAVING COUNT(*) >= 2
) cv_filter
ON cv_filter.part_number = c.part_number
AND cv_filter.source_vendor_id = c.source_vendor_id
WHERE ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) >= 2
AND (c.obs + p.obs) >= 4
AND COALESCE(cv_filter.cv, 0) <= 0.35
ORDER BY ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) DESC
LIMIT ${limit * 2}
`);