026: Remove invalid price observations (sub-manufacturing-cost), disable
optictransceiver.com (domain repurposed as plant shop), fix verification
function to accept low/medium/high data_confidence values
027: Clean up FS.COM USD→EUR converted prices, force re-scrape with
new de.fs.com EUR-primary scraper
132 lines
5.2 KiB
PL/PgSQL
132 lines
5.2 KiB
PL/PgSQL
-- Migration 026: Price cleanup + verification function fix
|
|
--
|
|
-- Problem 1: Old parsePrice bug accepted bare numbers (no currency symbol)
|
|
-- → stock counts like "1914" scraped as $1914, shipping costs like "1.30" as $1.30
|
|
-- Problem 2: data_confidence mismatch — verification expected 'scraped_unverified'/'verified'
|
|
-- but scrapers write 'low'/'medium'/'high'/'garbage'
|
|
-- Problem 3: optictransceiver.com domain repurposed as plant shop — remove observations
|
|
--
|
|
-- Applied on Erik: 2026-04-06
|
|
|
|
-- ── Step 1: Remove obviously wrong prices (likely misread stock/shipping) ──────
|
|
-- Suspiciously low prices per form factor (below manufacturing cost)
|
|
DELETE FROM price_observations po
|
|
USING transceivers t
|
|
WHERE po.transceiver_id = t.id
|
|
AND (
|
|
-- Sub-$2 for any optical transceiver is clearly wrong
|
|
(po.price < 2.00 AND po.currency = 'USD')
|
|
OR (po.price < 1.80 AND po.currency = 'EUR')
|
|
OR (po.price < 1.50 AND po.currency = 'GBP')
|
|
-- 800G under $50 — no 800G transceiver costs this
|
|
OR (t.speed_gbps >= 800 AND po.price < 50 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
|
-- 400G under $20 — below any real compatible price
|
|
OR (t.speed_gbps = 400 AND po.price < 20 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
|
-- 100G under $5
|
|
OR (t.speed_gbps = 100 AND po.price < 5 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
|
-- Coherent modules (ZR/ZR+) under $80
|
|
OR (t.form_factor IN ('QSFP-DD', 'OSFP') AND t.speed_gbps >= 400
|
|
AND lower(t.part_number) LIKE '%zr%' AND po.price < 80 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
|
);
|
|
|
|
-- ── Step 2: Remove optictransceiver.com observations (domain now sells plants) ──
|
|
DELETE FROM price_observations po
|
|
USING vendors v
|
|
WHERE po.source_vendor_id = v.id
|
|
AND v.slug = 'optictransceiver';
|
|
|
|
-- Also mark vendor as inactive
|
|
UPDATE vendors
|
|
SET notes = 'Domain repurposed as plant shop 2026-04-06. Scraper disabled.'
|
|
WHERE slug = 'optictransceiver';
|
|
|
|
-- ── Step 3: Fix verification function — accept 'low'/'medium'/'high' confidence ──
|
|
CREATE OR REPLACE FUNCTION compute_transceiver_verification()
|
|
RETURNS void AS $$
|
|
DECLARE
|
|
v_rec RECORD;
|
|
v_price_row RECORD;
|
|
v_price_eur NUMERIC;
|
|
v_price_usd NUMERIC;
|
|
v_price_verified BOOLEAN;
|
|
v_image_verified BOOLEAN;
|
|
v_details_verified BOOLEAN;
|
|
BEGIN
|
|
FOR v_rec IN SELECT id FROM transceivers LOOP
|
|
-- Price: has any real price observation in last 60 days
|
|
SELECT price, currency, time INTO v_price_row
|
|
FROM price_observations
|
|
WHERE transceiver_id = v_rec.id
|
|
AND price > 0
|
|
AND time > NOW() - INTERVAL '60 days'
|
|
ORDER BY price DESC, time DESC
|
|
LIMIT 1;
|
|
|
|
v_price_verified := v_price_row IS NOT NULL;
|
|
|
|
-- Convert to EUR/USD for storage
|
|
IF v_price_verified THEN
|
|
CASE v_price_row.currency
|
|
WHEN 'EUR' THEN
|
|
v_price_eur := v_price_row.price;
|
|
v_price_usd := NULL;
|
|
WHEN 'USD' THEN
|
|
v_price_usd := v_price_row.price;
|
|
v_price_eur := NULL;
|
|
WHEN 'GBP' THEN
|
|
v_price_eur := v_price_row.price * 1.17;
|
|
v_price_usd := NULL;
|
|
ELSE
|
|
v_price_eur := NULL;
|
|
v_price_usd := NULL;
|
|
END CASE;
|
|
ELSE
|
|
v_price_eur := NULL;
|
|
v_price_usd := NULL;
|
|
END IF;
|
|
|
|
-- Image: has image_url
|
|
v_image_verified := EXISTS (
|
|
SELECT 1 FROM transceivers
|
|
WHERE id = v_rec.id
|
|
AND image_url IS NOT NULL
|
|
AND image_url != ''
|
|
);
|
|
|
|
-- Details: has usable data_confidence (not garbage/unknown)
|
|
-- Accepts: low, medium, high, scraped_unverified, verified, official, enriched_estimated
|
|
v_details_verified := EXISTS (
|
|
SELECT 1 FROM transceivers
|
|
WHERE id = v_rec.id
|
|
AND data_confidence NOT IN ('garbage', 'unknown', '')
|
|
AND data_confidence IS NOT NULL
|
|
AND (connector IS NOT NULL OR wavelengths IS NOT NULL OR fiber_type IS NOT NULL)
|
|
);
|
|
|
|
UPDATE transceivers SET
|
|
price_verified = v_price_verified,
|
|
price_verified_eur = v_price_eur,
|
|
street_price_usd = v_price_usd,
|
|
image_verified = v_image_verified,
|
|
details_verified = v_details_verified,
|
|
fully_verified = v_price_verified AND v_image_verified AND v_details_verified,
|
|
updated_at = NOW()
|
|
WHERE id = v_rec.id;
|
|
END LOOP;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- ── Step 4: Run verification refresh ──────────────────────────────────────────
|
|
SELECT compute_transceiver_verification();
|
|
|
|
-- ── Step 5: Report ─────────────────────────────────────────────────────────────
|
|
SELECT
|
|
COUNT(*) AS total,
|
|
SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) AS price_verified,
|
|
SUM(CASE WHEN image_verified THEN 1 ELSE 0 END) AS image_verified,
|
|
SUM(CASE WHEN details_verified THEN 1 ELSE 0 END) AS details_verified,
|
|
SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) AS fully_verified,
|
|
ROUND(100.0 * SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS price_pct,
|
|
ROUND(100.0 * SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS fully_pct
|
|
FROM transceivers;
|