fix(price-movers): dedup by part_number, median price, CV-filter for tier noise

Group by part_number instead of transceiver_id (eliminates OEM duplicate rows).
Use PERCENTILE_CONT median instead of AVG to reduce single-outlier impact.
Add CV-filter (stddev/avg <= 0.35 over 2x window) to exclude high-variance
sources like Mouser quantity-tier pricing that produces artificial swings.
This commit is contained in:
Rene Fichtmueller 2026-06-05 21:23:20 +00:00
parent c6e79e9967
commit 842a85120b

View File

@ -1083,41 +1083,68 @@ procurementRouter.get("/price-movers", async (req: Request, res: Response) => {
try { try {
const result = await pool.query(` const result = await pool.query(`
WITH cur AS ( WITH cur AS (
SELECT transceiver_id, source_vendor_id, currency, -- Group by part_number+source_vendor+currency to avoid duplicates from multiple
AVG(price) AS avg_price, -- vendor-OEM transceiver_ids with the same part number.
-- Use PERCENTILE_CONT (median) to suppress multi-tier list-price noise
-- (e.g. Mouser 1x/10x/100x tiers appearing as price swings).
SELECT t.part_number, po.source_vendor_id, po.currency,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
COUNT(*) AS obs COUNT(*) AS obs
FROM price_observations FROM price_observations po
WHERE time >= NOW() - INTERVAL '${days} days' JOIN transceivers t ON t.id = po.transceiver_id
AND price > 0 AND COALESCE(is_anomalous, false) = false WHERE po.time >= NOW() - INTERVAL '${days} days'
GROUP BY transceiver_id, source_vendor_id, currency AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
GROUP BY t.part_number, po.source_vendor_id, po.currency
), ),
prior AS ( prior AS (
SELECT transceiver_id, source_vendor_id, SELECT t.part_number, po.source_vendor_id,
AVG(price) AS avg_price PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
FROM price_observations COUNT(*) AS obs
WHERE time >= NOW() - INTERVAL '${days * 2} days' FROM price_observations po
AND time < NOW() - INTERVAL '${days} days' JOIN transceivers t ON t.id = po.transceiver_id
AND price > 0 AND COALESCE(is_anomalous, false) = false WHERE po.time >= NOW() - INTERVAL '${days * 2} days'
GROUP BY transceiver_id, source_vendor_id AND po.time < NOW() - INTERVAL '${days} days'
AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
GROUP BY t.part_number, po.source_vendor_id
),
ref_tx AS (
-- pick one canonical transceiver_id per part_number for metadata
SELECT DISTINCT ON (part_number) id, part_number, form_factor, speed_gbps, standard_name
FROM transceivers ORDER BY part_number, id
) )
SELECT SELECT
t.id, t.part_number, t.form_factor, ref.id, ref.part_number, ref.form_factor,
t.speed_gbps::text AS speed_gbps, ref.speed_gbps::text AS speed_gbps,
t.standard_name, ref.standard_name,
sv.name AS vendor_name, sv.name AS vendor_name,
ROUND(c.avg_price::numeric, 2) AS current_avg, ROUND(c.med_price::numeric, 2) AS current_avg,
ROUND(p.avg_price::numeric, 2) AS prior_avg, ROUND(p.med_price::numeric, 2) AS prior_avg,
ROUND(((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100)::numeric, 1) AS delta_pct, ROUND(((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100)::numeric, 1) AS delta_pct,
c.currency, c.currency,
c.obs::int AS observations (c.obs + p.obs)::int AS observations
FROM cur c FROM cur c
JOIN prior p ON p.transceiver_id = c.transceiver_id JOIN prior p ON p.part_number = c.part_number
AND p.source_vendor_id = c.source_vendor_id AND p.source_vendor_id = c.source_vendor_id
JOIN transceivers t ON t.id = c.transceiver_id JOIN ref_tx ref ON ref.part_number = c.part_number
JOIN vendors sv ON sv.id = c.source_vendor_id JOIN vendors sv ON sv.id = c.source_vendor_id
WHERE ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) >= 2 -- cv_filter: exclude SKUs where the source has high price variance across the full
AND c.obs::int >= 2 -- 2*days window (e.g. Mouser quantity-tier noise). CV > 0.35 = unreliable source.
ORDER BY ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) DESC JOIN (
SELECT t2.part_number, po2.source_vendor_id,
STDDEV(po2.price) / NULLIF(AVG(po2.price), 0) AS cv
FROM price_observations po2
JOIN transceivers t2 ON t2.id = po2.transceiver_id
WHERE po2.time >= NOW() - INTERVAL '${days * 2} days'
AND po2.price > 0 AND COALESCE(po2.is_anomalous, false) = false
GROUP BY t2.part_number, po2.source_vendor_id
HAVING COUNT(*) >= 2
) cv_filter
ON cv_filter.part_number = c.part_number
AND cv_filter.source_vendor_id = c.source_vendor_id
WHERE ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) >= 2
AND (c.obs + p.obs) >= 4
AND COALESCE(cv_filter.cv, 0) <= 0.35
ORDER BY ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) DESC
LIMIT ${limit * 2} LIMIT ${limit * 2}
`); `);