From 0cf607040fadb03ccd4bb1509974639cb2235736 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Sat, 6 Jun 2026 16:48:15 +0000 Subject: [PATCH] fix(supply-squeeze): per-SKU paired price comparison eliminates catalog-composition bias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 30d-vs-60d price momentum aggregated AVG/median across whatever SKUs happened to be in a speed/form-factor bucket each period. New expensive SKUs entering the catalog (NVIDIA switches at 30k USD, AOC cables) faked huge jumps — 400G OSFP showed +151% when matched-SKU reality was 0%. Now: compute per-transceiver median price in each period, keep only SKUs present in BOTH periods (>=2 obs each), report the median of per-SKU pct deltas. Also excludes non-transceiver form factors, AOC/DAC cables, switch SKUs, price>15k, and anomalous observations. Result: 400G OSFP +151%->0%, signals 21->8, and the ones that remain (NVIDIA MFA7U10 +84% same-SKU) are genuine price moves. --- packages/api/src/routes/procurement.ts | 56 ++++++++++++++++++++------ 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/packages/api/src/routes/procurement.ts b/packages/api/src/routes/procurement.ts index 19f696f..0f58743 100644 --- a/packages/api/src/routes/procurement.ts +++ b/packages/api/src/routes/procurement.ts @@ -787,16 +787,43 @@ procurementRouter.get("/supply-squeeze", async (_req: Request, res: Response) => const [priceSignals, aiDemand, hypeData, stockData] = await Promise.all([ // Price momentum: 30d vs 60d avg by speed/form_factor pool.query(` + -- Per-SKU paired comparison: only transceivers present in BOTH periods. + -- This eliminates catalog-composition bias (new expensive SKUs entering a + -- speed/form-factor bucket would otherwise fake a huge price jump). + WITH per_sku AS ( + SELECT + t.id, t.speed_gbps, t.form_factor, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) + FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') AS med_now, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price) + FILTER (WHERE po.time >= NOW() - INTERVAL '60 days' AND po.time < NOW() - INTERVAL '30 days') AS med_prior, + COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') AS obs_now, + COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '60 days' AND po.time < NOW() - INTERVAL '30 days') AS obs_prior + FROM price_observations po + JOIN transceivers t ON t.id = po.transceiver_id + WHERE po.price > 5 AND po.currency = 'USD' + AND COALESCE(po.is_anomalous, false) = false + AND t.form_factor IN ('SFP','SFP+','SFP28','SFP56','QSFP+','QSFP28','QSFP56','QSFP-DD','QSFP-DD800','OSFP','OSFP-XD','XFP','CFP','CFP2','CFP4','CDFP','DSFP') + AND po.price < 15000 + AND t.part_number NOT ILIKE '%AOC%' + AND t.part_number NOT ILIKE '%-DAC-%' + AND (t.standard_name IS NULL OR (t.standard_name NOT ILIKE '%Switch%' AND t.standard_name NOT ILIKE '%InfiniBand%')) + GROUP BY t.id, t.speed_gbps, t.form_factor + ) SELECT - t.speed_gbps, t.form_factor, - ROUND(AVG(po.price) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days')::numeric,2) AS avg_30d, - ROUND(AVG(po.price) FILTER (WHERE po.time >= NOW() - INTERVAL '60 days' AND po.time < NOW() - INTERVAL '30 days')::numeric,2) AS avg_prior_30d, - COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') AS obs_30d - FROM price_observations po - JOIN transceivers t ON t.id = po.transceiver_id - WHERE po.price > 5 AND po.currency = 'USD' - GROUP BY t.speed_gbps, t.form_factor - HAVING COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') >= 3 + speed_gbps, form_factor, + -- avg_30d / avg_prior_30d kept as column names for downstream compatibility, + -- but they now carry MEDIAN-of-matched-SKU prices (only SKUs in both periods) + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY med_now) AS avg_30d, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY med_prior) AS avg_prior_30d, + -- The real signal: median of per-SKU percentage deltas + ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (med_now - med_prior) / NULLIF(med_prior,0) * 100)::numeric, 1) AS sku_median_delta_pct, + COUNT(*) AS obs_30d + FROM per_sku + WHERE med_now IS NOT NULL AND med_prior IS NOT NULL AND med_prior > 0 + AND obs_now >= 2 AND obs_prior >= 2 + GROUP BY speed_gbps, form_factor + HAVING COUNT(*) >= 3 `), // AI cluster demand by speed tier pool.query(` @@ -835,7 +862,7 @@ procurementRouter.get("/supply-squeeze", async (_req: Request, res: Response) => `).catch(() => ({ rows: [] })), ]); - type PriceRow = { speed_gbps: string; form_factor: string; avg_30d: string; avg_prior_30d: string; obs_30d: string }; + type PriceRow = { speed_gbps: string; form_factor: string; avg_30d: string; avg_prior_30d: string; obs_30d: string; sku_median_delta_pct: string | null }; type HypeRow = { technology: string; hype_phase: string; hype_score: string }; type AiRow = { speed_tier: string; total_tx: string; cluster_count: string }; type StockRow = { speed_gbps: string; form_factor: string; out_of_stock: string; in_stock: string; total_obs: string }; @@ -861,9 +888,12 @@ procurementRouter.get("/supply-squeeze", async (_req: Request, res: Response) => const signals = (priceSignals.rows as PriceRow[]) .map((r) => { const speed = parseFloat(r.speed_gbps); - const priceUp = r.avg_30d && r.avg_prior_30d - ? ((parseFloat(r.avg_30d) - parseFloat(r.avg_prior_30d)) / parseFloat(r.avg_prior_30d)) * 100 - : 0; + // Prefer the per-SKU median delta (composition-bias-free); fall back to aggregate + const priceUp = r.sku_median_delta_pct != null + ? parseFloat(r.sku_median_delta_pct) + : (r.avg_30d && r.avg_prior_30d + ? ((parseFloat(r.avg_30d) - parseFloat(r.avg_prior_30d)) / parseFloat(r.avg_prior_30d)) * 100 + : 0); const hype = speedToHype.get(speed); const ai = aiBySpeed.get(speed); const stock = stockByKey.get(`${r.speed_gbps}:${r.form_factor}`);