fix(supply-squeeze): per-SKU paired price comparison eliminates catalog-composition bias

The 30d-vs-60d price momentum aggregated AVG/median across whatever SKUs
happened to be in a speed/form-factor bucket each period. New expensive SKUs
entering the catalog (NVIDIA switches at 30k USD, AOC cables) faked huge jumps
— 400G OSFP showed +151% when matched-SKU reality was 0%.

Now: compute per-transceiver median price in each period, keep only SKUs present
in BOTH periods (>=2 obs each), report the median of per-SKU pct deltas. Also
excludes non-transceiver form factors, AOC/DAC cables, switch SKUs, price>15k,
and anomalous observations. Result: 400G OSFP +151%->0%, signals 21->8, and the
ones that remain (NVIDIA MFA7U10 +84% same-SKU) are genuine price moves.
This commit is contained in:
Rene Fichtmueller 2026-06-06 16:48:15 +00:00
parent 03fdfa7d51
commit 0cf607040f

View File

@ -787,16 +787,43 @@ procurementRouter.get("/supply-squeeze", async (_req: Request, res: Response) =>
const [priceSignals, aiDemand, hypeData, stockData] = await Promise.all([ const [priceSignals, aiDemand, hypeData, stockData] = await Promise.all([
// Price momentum: 30d vs 60d avg by speed/form_factor // Price momentum: 30d vs 60d avg by speed/form_factor
pool.query(` pool.query(`
-- Per-SKU paired comparison: only transceivers present in BOTH periods.
-- This eliminates catalog-composition bias (new expensive SKUs entering a
-- speed/form-factor bucket would otherwise fake a huge price jump).
WITH per_sku AS (
SELECT
t.id, t.speed_gbps, t.form_factor,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price)
FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') AS med_now,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY po.price)
FILTER (WHERE po.time >= NOW() - INTERVAL '60 days' AND po.time < NOW() - INTERVAL '30 days') AS med_prior,
COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') AS obs_now,
COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '60 days' AND po.time < NOW() - INTERVAL '30 days') AS obs_prior
FROM price_observations po
JOIN transceivers t ON t.id = po.transceiver_id
WHERE po.price > 5 AND po.currency = 'USD'
AND COALESCE(po.is_anomalous, false) = false
AND t.form_factor IN ('SFP','SFP+','SFP28','SFP56','QSFP+','QSFP28','QSFP56','QSFP-DD','QSFP-DD800','OSFP','OSFP-XD','XFP','CFP','CFP2','CFP4','CDFP','DSFP')
AND po.price < 15000
AND t.part_number NOT ILIKE '%AOC%'
AND t.part_number NOT ILIKE '%-DAC-%'
AND (t.standard_name IS NULL OR (t.standard_name NOT ILIKE '%Switch%' AND t.standard_name NOT ILIKE '%InfiniBand%'))
GROUP BY t.id, t.speed_gbps, t.form_factor
)
SELECT SELECT
t.speed_gbps, t.form_factor, speed_gbps, form_factor,
ROUND(AVG(po.price) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days')::numeric,2) AS avg_30d, -- avg_30d / avg_prior_30d kept as column names for downstream compatibility,
ROUND(AVG(po.price) FILTER (WHERE po.time >= NOW() - INTERVAL '60 days' AND po.time < NOW() - INTERVAL '30 days')::numeric,2) AS avg_prior_30d, -- but they now carry MEDIAN-of-matched-SKU prices (only SKUs in both periods)
COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') AS obs_30d PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY med_now) AS avg_30d,
FROM price_observations po PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY med_prior) AS avg_prior_30d,
JOIN transceivers t ON t.id = po.transceiver_id -- The real signal: median of per-SKU percentage deltas
WHERE po.price > 5 AND po.currency = 'USD' ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (med_now - med_prior) / NULLIF(med_prior,0) * 100)::numeric, 1) AS sku_median_delta_pct,
GROUP BY t.speed_gbps, t.form_factor COUNT(*) AS obs_30d
HAVING COUNT(*) FILTER (WHERE po.time >= NOW() - INTERVAL '30 days') >= 3 FROM per_sku
WHERE med_now IS NOT NULL AND med_prior IS NOT NULL AND med_prior > 0
AND obs_now >= 2 AND obs_prior >= 2
GROUP BY speed_gbps, form_factor
HAVING COUNT(*) >= 3
`), `),
// AI cluster demand by speed tier // AI cluster demand by speed tier
pool.query(` pool.query(`
@ -835,7 +862,7 @@ procurementRouter.get("/supply-squeeze", async (_req: Request, res: Response) =>
`).catch(() => ({ rows: [] })), `).catch(() => ({ rows: [] })),
]); ]);
type PriceRow = { speed_gbps: string; form_factor: string; avg_30d: string; avg_prior_30d: string; obs_30d: string }; type PriceRow = { speed_gbps: string; form_factor: string; avg_30d: string; avg_prior_30d: string; obs_30d: string; sku_median_delta_pct: string | null };
type HypeRow = { technology: string; hype_phase: string; hype_score: string }; type HypeRow = { technology: string; hype_phase: string; hype_score: string };
type AiRow = { speed_tier: string; total_tx: string; cluster_count: string }; type AiRow = { speed_tier: string; total_tx: string; cluster_count: string };
type StockRow = { speed_gbps: string; form_factor: string; out_of_stock: string; in_stock: string; total_obs: string }; type StockRow = { speed_gbps: string; form_factor: string; out_of_stock: string; in_stock: string; total_obs: string };
@ -861,9 +888,12 @@ procurementRouter.get("/supply-squeeze", async (_req: Request, res: Response) =>
const signals = (priceSignals.rows as PriceRow[]) const signals = (priceSignals.rows as PriceRow[])
.map((r) => { .map((r) => {
const speed = parseFloat(r.speed_gbps); const speed = parseFloat(r.speed_gbps);
const priceUp = r.avg_30d && r.avg_prior_30d // Prefer the per-SKU median delta (composition-bias-free); fall back to aggregate
? ((parseFloat(r.avg_30d) - parseFloat(r.avg_prior_30d)) / parseFloat(r.avg_prior_30d)) * 100 const priceUp = r.sku_median_delta_pct != null
: 0; ? parseFloat(r.sku_median_delta_pct)
: (r.avg_30d && r.avg_prior_30d
? ((parseFloat(r.avg_30d) - parseFloat(r.avg_prior_30d)) / parseFloat(r.avg_prior_30d)) * 100
: 0);
const hype = speedToHype.get(speed); const hype = speedToHype.get(speed);
const ai = aiBySpeed.get(speed); const ai = aiBySpeed.get(speed);
const stock = stockByKey.get(`${r.speed_gbps}:${r.form_factor}`); const stock = stockByKey.get(`${r.speed_gbps}:${r.form_factor}`);