fix(price-movers): dedup by part_number, median price, CV-filter for tier noise
Group by part_number instead of transceiver_id (eliminates OEM duplicate rows). Use PERCENTILE_CONT median instead of AVG to reduce single-outlier impact. Add CV-filter (stddev/avg <= 0.35 over 2x window) to exclude high-variance sources like Mouser quantity-tier pricing that produces artificial swings.
This commit is contained in:
parent
c6e79e9967
commit
842a85120b
@ -1083,41 +1083,68 @@ procurementRouter.get("/price-movers", async (req: Request, res: Response) => {
|
|||||||
try {
|
try {
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
WITH cur AS (
|
WITH cur AS (
|
||||||
SELECT transceiver_id, source_vendor_id, currency,
|
-- Group by part_number+source_vendor+currency to avoid duplicates from multiple
|
||||||
AVG(price) AS avg_price,
|
-- vendor-OEM transceiver_ids with the same part number.
|
||||||
|
-- Use PERCENTILE_CONT (median) to suppress multi-tier list-price noise
|
||||||
|
-- (e.g. Mouser 1x/10x/100x tiers appearing as price swings).
|
||||||
|
SELECT t.part_number, po.source_vendor_id, po.currency,
|
||||||
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
|
||||||
COUNT(*) AS obs
|
COUNT(*) AS obs
|
||||||
FROM price_observations
|
FROM price_observations po
|
||||||
WHERE time >= NOW() - INTERVAL '${days} days'
|
JOIN transceivers t ON t.id = po.transceiver_id
|
||||||
AND price > 0 AND COALESCE(is_anomalous, false) = false
|
WHERE po.time >= NOW() - INTERVAL '${days} days'
|
||||||
GROUP BY transceiver_id, source_vendor_id, currency
|
AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
|
||||||
|
GROUP BY t.part_number, po.source_vendor_id, po.currency
|
||||||
),
|
),
|
||||||
prior AS (
|
prior AS (
|
||||||
SELECT transceiver_id, source_vendor_id,
|
SELECT t.part_number, po.source_vendor_id,
|
||||||
AVG(price) AS avg_price
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) AS med_price,
|
||||||
FROM price_observations
|
COUNT(*) AS obs
|
||||||
WHERE time >= NOW() - INTERVAL '${days * 2} days'
|
FROM price_observations po
|
||||||
AND time < NOW() - INTERVAL '${days} days'
|
JOIN transceivers t ON t.id = po.transceiver_id
|
||||||
AND price > 0 AND COALESCE(is_anomalous, false) = false
|
WHERE po.time >= NOW() - INTERVAL '${days * 2} days'
|
||||||
GROUP BY transceiver_id, source_vendor_id
|
AND po.time < NOW() - INTERVAL '${days} days'
|
||||||
|
AND po.price > 0 AND COALESCE(po.is_anomalous, false) = false
|
||||||
|
GROUP BY t.part_number, po.source_vendor_id
|
||||||
|
),
|
||||||
|
ref_tx AS (
|
||||||
|
-- pick one canonical transceiver_id per part_number for metadata
|
||||||
|
SELECT DISTINCT ON (part_number) id, part_number, form_factor, speed_gbps, standard_name
|
||||||
|
FROM transceivers ORDER BY part_number, id
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
t.id, t.part_number, t.form_factor,
|
ref.id, ref.part_number, ref.form_factor,
|
||||||
t.speed_gbps::text AS speed_gbps,
|
ref.speed_gbps::text AS speed_gbps,
|
||||||
t.standard_name,
|
ref.standard_name,
|
||||||
sv.name AS vendor_name,
|
sv.name AS vendor_name,
|
||||||
ROUND(c.avg_price::numeric, 2) AS current_avg,
|
ROUND(c.med_price::numeric, 2) AS current_avg,
|
||||||
ROUND(p.avg_price::numeric, 2) AS prior_avg,
|
ROUND(p.med_price::numeric, 2) AS prior_avg,
|
||||||
ROUND(((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100)::numeric, 1) AS delta_pct,
|
ROUND(((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100)::numeric, 1) AS delta_pct,
|
||||||
c.currency,
|
c.currency,
|
||||||
c.obs::int AS observations
|
(c.obs + p.obs)::int AS observations
|
||||||
FROM cur c
|
FROM cur c
|
||||||
JOIN prior p ON p.transceiver_id = c.transceiver_id
|
JOIN prior p ON p.part_number = c.part_number
|
||||||
AND p.source_vendor_id = c.source_vendor_id
|
AND p.source_vendor_id = c.source_vendor_id
|
||||||
JOIN transceivers t ON t.id = c.transceiver_id
|
JOIN ref_tx ref ON ref.part_number = c.part_number
|
||||||
JOIN vendors sv ON sv.id = c.source_vendor_id
|
JOIN vendors sv ON sv.id = c.source_vendor_id
|
||||||
WHERE ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) >= 2
|
-- cv_filter: exclude SKUs where the source has high price variance across the full
|
||||||
AND c.obs::int >= 2
|
-- 2*days window (e.g. Mouser quantity-tier noise). CV > 0.35 = unreliable source.
|
||||||
ORDER BY ABS((c.avg_price - p.avg_price) / NULLIF(p.avg_price, 0) * 100) DESC
|
JOIN (
|
||||||
|
SELECT t2.part_number, po2.source_vendor_id,
|
||||||
|
STDDEV(po2.price) / NULLIF(AVG(po2.price), 0) AS cv
|
||||||
|
FROM price_observations po2
|
||||||
|
JOIN transceivers t2 ON t2.id = po2.transceiver_id
|
||||||
|
WHERE po2.time >= NOW() - INTERVAL '${days * 2} days'
|
||||||
|
AND po2.price > 0 AND COALESCE(po2.is_anomalous, false) = false
|
||||||
|
GROUP BY t2.part_number, po2.source_vendor_id
|
||||||
|
HAVING COUNT(*) >= 2
|
||||||
|
) cv_filter
|
||||||
|
ON cv_filter.part_number = c.part_number
|
||||||
|
AND cv_filter.source_vendor_id = c.source_vendor_id
|
||||||
|
WHERE ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) >= 2
|
||||||
|
AND (c.obs + p.obs) >= 4
|
||||||
|
AND COALESCE(cv_filter.cv, 0) <= 0.35
|
||||||
|
ORDER BY ABS((c.med_price - p.med_price) / NULLIF(p.med_price, 0) * 100) DESC
|
||||||
LIMIT ${limit * 2}
|
LIMIT ${limit * 2}
|
||||||
`);
|
`);
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user