fix(price-movers): dedup by part_number, median price, CV-filter for tier noise
Group by part_number instead of transceiver_id (eliminates OEM duplicate rows). Use PERCENTILE_CONT median instead of AVG to reduce single-outlier impact. Add CV-filter (stddev/avg <= 0.35 over 2x window) to exclude high-variance sources like Mouser quantity-tier pricing that produces artificial swings.
This commit is contained in:
parent
c6e79e9967
commit
842a85120b
@ -1083,41 +1083,68 @@ procurementRouter.get("/price-movers", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
WITH cur AS (
|
||||
SELECT transceiver_id, source_vendor_id, currency,
|
||||
AVG(price) AS avg_price,
|
||||
-- Group by part_number+source_vendor+currency to avoid duplicates from multiple
|
||||
-- vendor-OEM transceiver_ids with the same part number.
|
||||
-- Use PERCENTILE_CONT (median) to suppress multi-tier list-price noise
|
||||
-- (e.g. Mouser 1x/10x/100x tiers appearing as price swings).
|
||||
SELECT t.part_number, po.source_vendor_id, po.currency,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
|
||||
COUNT(*) AS obs
|
||||
FROM price_observations
|
||||
WHERE time >= NOW() - INTERVAL '${days} days'
|
||||
AND price > 0 AND COALESCE(is_anomalous, false) = false
|
||||
GROUP BY transceiver_id, source_vendor_id, currency
|
||||
FROM price_observations po
|
||||
JOIN transceivers t ON t.id = po.transceiver_id
|
||||
WHERE po.time >= NOW() - INTERVAL '${days} days'
|
||||
AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
|
||||
GROUP BY t.part_number, po.source_vendor_id, po.currency
|
||||
),
|
||||
prior AS (
|
||||
SELECT transceiver_id, source_vendor_id,
|
||||
AVG(price) AS avg_price
|
||||
FROM price_observations
|
||||
WHERE time >= NOW() - INTERVAL '${days * 2} days'
|
||||
AND time < NOW() - INTERVAL '${days} days'
|
||||
AND price > 0 AND COALESCE(is_anomalous, false) = false
|
||||
GROUP BY transceiver_id, source_vendor_id
|
||||
SELECT t.part_number, po.source_vendor_id,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
|
||||
COUNT(*) AS obs
|
||||
FROM price_observations po
|
||||
JOIN transceivers t ON t.id = po.transceiver_id
|
||||
WHERE po.time >= NOW() - INTERVAL '${days * 2} days'
|
||||
AND po.time < NOW() - INTERVAL '${days} days'
|
||||
AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
|
||||
GROUP BY t.part_number, po.source_vendor_id
|
||||
),
|
||||
ref_tx AS (
|
||||
-- pick one canonical transceiver_id per part_number for metadata
|
||||
SELECT DISTINCT ON (part_number) id, part_number, form_factor, speed_gbps, standard_name
|
||||
FROM transceivers ORDER BY part_number, id
|
||||
)
|
||||
SELECT
|
||||
t.id, t.part_number, t.form_factor,
|
||||
t.speed_gbps::text AS speed_gbps,
|
||||
t.standard_name,
|
||||
ref.id, ref.part_number, ref.form_factor,
|
||||
ref.speed_gbps::text AS speed_gbps,
|
||||
ref.standard_name,
|
||||
sv.name AS vendor_name,
|
||||
ROUND(c.avg_price::numeric, 2) AS current_avg,
|
||||
ROUND(p.avg_price::numeric, 2) AS prior_avg,
|
||||
ROUND(((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100)::numeric, 1) AS delta_pct,
|
||||
ROUND(c.med_price::numeric, 2) AS current_avg,
|
||||
ROUND(p.med_price::numeric, 2) AS prior_avg,
|
||||
ROUND(((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100)::numeric, 1) AS delta_pct,
|
||||
c.currency,
|
||||
c.obs::int AS observations
|
||||
(c.obs + p.obs)::int AS observations
|
||||
FROM cur c
|
||||
JOIN prior p ON p.transceiver_id = c.transceiver_id
|
||||
JOIN prior p ON p.part_number = c.part_number
|
||||
AND p.source_vendor_id = c.source_vendor_id
|
||||
JOIN transceivers t ON t.id = c.transceiver_id
|
||||
JOIN ref_tx ref ON ref.part_number = c.part_number
|
||||
JOIN vendors sv ON sv.id = c.source_vendor_id
|
||||
WHERE ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) >= 2
|
||||
AND c.obs::int >= 2
|
||||
ORDER BY ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) DESC
|
||||
-- cv_filter: exclude SKUs where the source has high price variance across the full
|
||||
-- 2*days window (e.g. Mouser quantity-tier noise). CV > 0.35 = unreliable source.
|
||||
JOIN (
|
||||
SELECT t2.part_number, po2.source_vendor_id,
|
||||
STDDEV(po2.price) / NULLIF(AVG(po2.price), 0) AS cv
|
||||
FROM price_observations po2
|
||||
JOIN transceivers t2 ON t2.id = po2.transceiver_id
|
||||
WHERE po2.time >= NOW() - INTERVAL '${days * 2} days'
|
||||
AND po2.price > 0 AND COALESCE(po2.is_anomalous, false) = false
|
||||
GROUP BY t2.part_number, po2.source_vendor_id
|
||||
HAVING COUNT(*) >= 2
|
||||
) cv_filter
|
||||
ON cv_filter.part_number = c.part_number
|
||||
AND cv_filter.source_vendor_id = c.source_vendor_id
|
||||
WHERE ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) >= 2
|
||||
AND (c.obs + p.obs) >= 4
|
||||
AND COALESCE(cv_filter.cv, 0) <= 0.35
|
||||
ORDER BY ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) DESC
|
||||
LIMIT ${limit * 2}
|
||||
`);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user