feat(sql): migrations 026+027 for price cleanup and FS.COM EUR fix

026: Remove invalid price observations (sub-manufacturing-cost), disable
     optictransceiver.com (domain repurposed as plant shop), fix verification
     function to accept low/medium/high data_confidence values
027: Clean up FS.COM USD→EUR converted prices, force re-scrape with
     new de.fs.com EUR-primary scraper
This commit is contained in:
Rene Fichtmueller 2026-04-06 02:22:00 +02:00
parent 2e852e0a2f
commit 55de4920b2
2 changed files with 175 additions and 0 deletions

View File

@ -0,0 +1,131 @@
-- Migration 026: Price cleanup + verification function fix
--
-- Problem 1: Old parsePrice bug accepted bare numbers (no currency symbol)
-- → stock counts like "1914" scraped as $1914, shipping costs like "1.30" as $1.30
-- Problem 2: data_confidence mismatch — verification expected 'scraped_unverified'/'verified'
-- but scrapers write 'low'/'medium'/'high'/'garbage'
-- Problem 3: optictransceiver.com domain repurposed as plant shop — remove observations
--
-- Applied on Erik: 2026-04-06
-- ── Step 1: Remove obviously wrong prices (likely misread stock/shipping) ──────
-- Suspiciously low prices per form factor (below manufacturing cost)
DELETE FROM price_observations po
USING transceivers t
WHERE po.transceiver_id = t.id
AND (
-- Sub-$2 for any optical transceiver is clearly wrong
(po.price < 2.00 AND po.currency = 'USD')
OR (po.price < 1.80 AND po.currency = 'EUR')
OR (po.price < 1.50 AND po.currency = 'GBP')
-- 800G under $50 — no 800G transceiver costs this
OR (t.speed_gbps >= 800 AND po.price < 50 AND po.currency IN ('USD', 'EUR', 'GBP'))
-- 400G under $20 — below any real compatible price
OR (t.speed_gbps = 400 AND po.price < 20 AND po.currency IN ('USD', 'EUR', 'GBP'))
-- 100G under $5
OR (t.speed_gbps = 100 AND po.price < 5 AND po.currency IN ('USD', 'EUR', 'GBP'))
-- Coherent modules (ZR/ZR+) under $80
OR (t.form_factor IN ('QSFP-DD', 'OSFP') AND t.speed_gbps >= 400
AND lower(t.part_number) LIKE '%zr%' AND po.price < 80 AND po.currency IN ('USD', 'EUR', 'GBP'))
);
-- ── Step 2: Remove optictransceiver.com observations (domain now sells plants) ──
DELETE FROM price_observations po
USING vendors v
WHERE po.source_vendor_id = v.id
AND v.slug = 'optictransceiver';
-- Also mark vendor as inactive
UPDATE vendors
SET notes = 'Domain repurposed as plant shop 2026-04-06. Scraper disabled.'
WHERE slug = 'optictransceiver';
-- ── Step 3: Fix verification function — accept 'low'/'medium'/'high' confidence ──
CREATE OR REPLACE FUNCTION compute_transceiver_verification()
RETURNS void AS $$
DECLARE
v_rec RECORD;
v_price_row RECORD;
v_price_eur NUMERIC;
v_price_usd NUMERIC;
v_price_verified BOOLEAN;
v_image_verified BOOLEAN;
v_details_verified BOOLEAN;
BEGIN
FOR v_rec IN SELECT id FROM transceivers LOOP
-- Price: has any real price observation in last 60 days
SELECT price, currency, time INTO v_price_row
FROM price_observations
WHERE transceiver_id = v_rec.id
AND price > 0
AND time > NOW() - INTERVAL '60 days'
ORDER BY price DESC, time DESC
LIMIT 1;
v_price_verified := v_price_row IS NOT NULL;
-- Convert to EUR/USD for storage
IF v_price_verified THEN
CASE v_price_row.currency
WHEN 'EUR' THEN
v_price_eur := v_price_row.price;
v_price_usd := NULL;
WHEN 'USD' THEN
v_price_usd := v_price_row.price;
v_price_eur := NULL;
WHEN 'GBP' THEN
v_price_eur := v_price_row.price * 1.17;
v_price_usd := NULL;
ELSE
v_price_eur := NULL;
v_price_usd := NULL;
END CASE;
ELSE
v_price_eur := NULL;
v_price_usd := NULL;
END IF;
-- Image: has image_url
v_image_verified := EXISTS (
SELECT 1 FROM transceivers
WHERE id = v_rec.id
AND image_url IS NOT NULL
AND image_url != ''
);
-- Details: has usable data_confidence (not garbage/unknown)
-- Accepts: low, medium, high, scraped_unverified, verified, official, enriched_estimated
v_details_verified := EXISTS (
SELECT 1 FROM transceivers
WHERE id = v_rec.id
AND data_confidence NOT IN ('garbage', 'unknown', '')
AND data_confidence IS NOT NULL
AND (connector IS NOT NULL OR wavelengths IS NOT NULL OR fiber_type IS NOT NULL)
);
UPDATE transceivers SET
price_verified = v_price_verified,
price_verified_eur = v_price_eur,
street_price_usd = v_price_usd,
image_verified = v_image_verified,
details_verified = v_details_verified,
fully_verified = v_price_verified AND v_image_verified AND v_details_verified,
updated_at = NOW()
WHERE id = v_rec.id;
END LOOP;
END;
$$ LANGUAGE plpgsql;
-- ── Step 4: Run verification refresh ──────────────────────────────────────────
SELECT compute_transceiver_verification();
-- ── Step 5: Report ─────────────────────────────────────────────────────────────
SELECT
COUNT(*) AS total,
SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) AS price_verified,
SUM(CASE WHEN image_verified THEN 1 ELSE 0 END) AS image_verified,
SUM(CASE WHEN details_verified THEN 1 ELSE 0 END) AS details_verified,
SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) AS fully_verified,
ROUND(100.0 * SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS price_pct,
ROUND(100.0 * SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS fully_pct
FROM transceivers;

View File

@ -0,0 +1,44 @@
-- Migration 027: Fix FS.COM price observations currency
--
-- FS.COM scraper previously scraped www.fs.com (USD) and tried to convert USD→EUR.
-- Now scrapes de.fs.com (EUR) directly — prices are real EUR values.
-- This migration cleans up any existing FS.COM prices that were derived (USD→EUR)
-- and marks them for re-scraping on next run.
--
-- Applied: 2026-04-06 (after fs-com.ts BASE_URL switch to de.fs.com)
-- Delete price observations from FS.COM that look like converted USD prices
-- (prices stored as USD but attributed to de.fs.com source after the switch)
-- We just delete them all so the next scrape gets fresh EUR prices
DELETE FROM price_observations po
USING vendors v
WHERE po.source_vendor_id = v.id
AND v.slug = 'fs-com'
AND po.time < NOW() - INTERVAL '7 days';
-- Reset verification for affected transceivers so they get re-verified
UPDATE transceivers t
SET
price_verified = FALSE,
price_verified_eur = NULL,
fully_verified = FALSE,
updated_at = NOW()
FROM vendors v
WHERE t.vendor_id = v.id
AND v.slug = 'fs-com'
AND t.price_verified = TRUE
AND t.price_verified_eur IS NOT NULL;
-- Report remaining FS.COM observations
SELECT
v.name AS vendor,
po.currency,
COUNT(*) AS count,
ROUND(AVG(po.price), 2) AS avg_price,
MIN(po.time) AS oldest,
MAX(po.time) AS newest
FROM price_observations po
JOIN vendors v ON po.source_vendor_id = v.id
WHERE v.slug = 'fs-com'
GROUP BY v.name, po.currency
ORDER BY count DESC;