-- Migration 026: Price cleanup + verification function fix -- -- Problem 1: Old parsePrice bug accepted bare numbers (no currency symbol) -- → stock counts like "1914" scraped as $1914, shipping costs like "1.30" as $1.30 -- Problem 2: data_confidence mismatch — verification expected 'scraped_unverified'/'verified' -- but scrapers write 'low'/'medium'/'high'/'garbage' -- Problem 3: optictransceiver.com domain repurposed as plant shop — remove observations -- -- Applied on Erik: 2026-04-06 -- ── Step 1: Remove obviously wrong prices (likely misread stock/shipping) ────── -- Suspiciously low prices per form factor (below manufacturing cost) DELETE FROM price_observations po USING transceivers t WHERE po.transceiver_id = t.id AND ( -- Sub-$2 for any optical transceiver is clearly wrong (po.price < 2.00 AND po.currency = 'USD') OR (po.price < 1.80 AND po.currency = 'EUR') OR (po.price < 1.50 AND po.currency = 'GBP') -- 800G under $50 — no 800G transceiver costs this OR (t.speed_gbps >= 800 AND po.price < 50 AND po.currency IN ('USD', 'EUR', 'GBP')) -- 400G under $20 — below any real compatible price OR (t.speed_gbps = 400 AND po.price < 20 AND po.currency IN ('USD', 'EUR', 'GBP')) -- 100G under $5 OR (t.speed_gbps = 100 AND po.price < 5 AND po.currency IN ('USD', 'EUR', 'GBP')) -- Coherent modules (ZR/ZR+) under $80 OR (t.form_factor IN ('QSFP-DD', 'OSFP') AND t.speed_gbps >= 400 AND lower(t.part_number) LIKE '%zr%' AND po.price < 80 AND po.currency IN ('USD', 'EUR', 'GBP')) ); -- ── Step 2: Remove optictransceiver.com observations (domain now sells plants) ── DELETE FROM price_observations po USING vendors v WHERE po.source_vendor_id = v.id AND v.slug = 'optictransceiver'; -- Also mark vendor as inactive UPDATE vendors SET notes = 'Domain repurposed as plant shop 2026-04-06. Scraper disabled.' WHERE slug = 'optictransceiver'; -- ── Step 3: Fix verification function — accept 'low'/'medium'/'high' confidence ── CREATE OR REPLACE FUNCTION compute_transceiver_verification() RETURNS void AS $$ DECLARE v_rec RECORD; v_price_row RECORD; v_price_eur NUMERIC; v_price_usd NUMERIC; v_price_verified BOOLEAN; v_image_verified BOOLEAN; v_details_verified BOOLEAN; BEGIN FOR v_rec IN SELECT id FROM transceivers LOOP -- Price: has any real price observation in last 60 days SELECT price, currency, time INTO v_price_row FROM price_observations WHERE transceiver_id = v_rec.id AND price > 0 AND time > NOW() - INTERVAL '60 days' ORDER BY price DESC, time DESC LIMIT 1; v_price_verified := v_price_row IS NOT NULL; -- Convert to EUR/USD for storage IF v_price_verified THEN CASE v_price_row.currency WHEN 'EUR' THEN v_price_eur := v_price_row.price; v_price_usd := NULL; WHEN 'USD' THEN v_price_usd := v_price_row.price; v_price_eur := NULL; WHEN 'GBP' THEN v_price_eur := v_price_row.price * 1.17; v_price_usd := NULL; ELSE v_price_eur := NULL; v_price_usd := NULL; END CASE; ELSE v_price_eur := NULL; v_price_usd := NULL; END IF; -- Image: has image_url v_image_verified := EXISTS ( SELECT 1 FROM transceivers WHERE id = v_rec.id AND image_url IS NOT NULL AND image_url != '' ); -- Details: has usable data_confidence (not garbage/unknown) -- Accepts: low, medium, high, scraped_unverified, verified, official, enriched_estimated v_details_verified := EXISTS ( SELECT 1 FROM transceivers WHERE id = v_rec.id AND data_confidence NOT IN ('garbage', 'unknown', '') AND data_confidence IS NOT NULL AND (connector IS NOT NULL OR wavelengths IS NOT NULL OR fiber_type IS NOT NULL) ); UPDATE transceivers SET price_verified = v_price_verified, price_verified_eur = v_price_eur, street_price_usd = v_price_usd, image_verified = v_image_verified, details_verified = v_details_verified, fully_verified = v_price_verified AND v_image_verified AND v_details_verified, updated_at = NOW() WHERE id = v_rec.id; END LOOP; END; $$ LANGUAGE plpgsql; -- ── Step 4: Run verification refresh ────────────────────────────────────────── SELECT compute_transceiver_verification(); -- ── Step 5: Report ───────────────────────────────────────────────────────────── SELECT COUNT(*) AS total, SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) AS price_verified, SUM(CASE WHEN image_verified THEN 1 ELSE 0 END) AS image_verified, SUM(CASE WHEN details_verified THEN 1 ELSE 0 END) AS details_verified, SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) AS fully_verified, ROUND(100.0 * SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS price_pct, ROUND(100.0 * SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS fully_pct FROM transceivers;