feat(sql): migrations 026+027 for price cleanup and FS.COM EUR fix
026: Remove invalid price observations (sub-manufacturing-cost), disable
optictransceiver.com (domain repurposed as plant shop), fix verification
function to accept low/medium/high data_confidence values
027: Clean up FS.COM USD→EUR converted prices, force re-scrape with
new de.fs.com EUR-primary scraper
This commit is contained in:
parent
2e852e0a2f
commit
55de4920b2
131
sql/026-price-cleanup-and-verification-fix.sql
Normal file
131
sql/026-price-cleanup-and-verification-fix.sql
Normal file
@ -0,0 +1,131 @@
|
||||
-- Migration 026: Price cleanup + verification function fix
|
||||
--
|
||||
-- Problem 1: Old parsePrice bug accepted bare numbers (no currency symbol)
|
||||
-- → stock counts like "1914" scraped as $1914, shipping costs like "1.30" as $1.30
|
||||
-- Problem 2: data_confidence mismatch — verification expected 'scraped_unverified'/'verified'
|
||||
-- but scrapers write 'low'/'medium'/'high'/'garbage'
|
||||
-- Problem 3: optictransceiver.com domain repurposed as plant shop — remove observations
|
||||
--
|
||||
-- Applied on Erik: 2026-04-06
|
||||
|
||||
-- ── Step 1: Remove obviously wrong prices (likely misread stock/shipping) ──────
|
||||
-- Suspiciously low prices per form factor (below manufacturing cost)
|
||||
DELETE FROM price_observations po
|
||||
USING transceivers t
|
||||
WHERE po.transceiver_id = t.id
|
||||
AND (
|
||||
-- Sub-$2 for any optical transceiver is clearly wrong
|
||||
(po.price < 2.00 AND po.currency = 'USD')
|
||||
OR (po.price < 1.80 AND po.currency = 'EUR')
|
||||
OR (po.price < 1.50 AND po.currency = 'GBP')
|
||||
-- 800G under $50 — no 800G transceiver costs this
|
||||
OR (t.speed_gbps >= 800 AND po.price < 50 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
||||
-- 400G under $20 — below any real compatible price
|
||||
OR (t.speed_gbps = 400 AND po.price < 20 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
||||
-- 100G under $5
|
||||
OR (t.speed_gbps = 100 AND po.price < 5 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
||||
-- Coherent modules (ZR/ZR+) under $80
|
||||
OR (t.form_factor IN ('QSFP-DD', 'OSFP') AND t.speed_gbps >= 400
|
||||
AND lower(t.part_number) LIKE '%zr%' AND po.price < 80 AND po.currency IN ('USD', 'EUR', 'GBP'))
|
||||
);
|
||||
|
||||
-- ── Step 2: Remove optictransceiver.com observations (domain now sells plants) ──
|
||||
DELETE FROM price_observations po
|
||||
USING vendors v
|
||||
WHERE po.source_vendor_id = v.id
|
||||
AND v.slug = 'optictransceiver';
|
||||
|
||||
-- Also mark vendor as inactive
|
||||
UPDATE vendors
|
||||
SET notes = 'Domain repurposed as plant shop 2026-04-06. Scraper disabled.'
|
||||
WHERE slug = 'optictransceiver';
|
||||
|
||||
-- ── Step 3: Fix verification function — accept 'low'/'medium'/'high' confidence ──
|
||||
CREATE OR REPLACE FUNCTION compute_transceiver_verification()
|
||||
RETURNS void AS $$
|
||||
DECLARE
|
||||
v_rec RECORD;
|
||||
v_price_row RECORD;
|
||||
v_price_eur NUMERIC;
|
||||
v_price_usd NUMERIC;
|
||||
v_price_verified BOOLEAN;
|
||||
v_image_verified BOOLEAN;
|
||||
v_details_verified BOOLEAN;
|
||||
BEGIN
|
||||
FOR v_rec IN SELECT id FROM transceivers LOOP
|
||||
-- Price: has any real price observation in last 60 days
|
||||
SELECT price, currency, time INTO v_price_row
|
||||
FROM price_observations
|
||||
WHERE transceiver_id = v_rec.id
|
||||
AND price > 0
|
||||
AND time > NOW() - INTERVAL '60 days'
|
||||
ORDER BY price DESC, time DESC
|
||||
LIMIT 1;
|
||||
|
||||
v_price_verified := v_price_row IS NOT NULL;
|
||||
|
||||
-- Convert to EUR/USD for storage
|
||||
IF v_price_verified THEN
|
||||
CASE v_price_row.currency
|
||||
WHEN 'EUR' THEN
|
||||
v_price_eur := v_price_row.price;
|
||||
v_price_usd := NULL;
|
||||
WHEN 'USD' THEN
|
||||
v_price_usd := v_price_row.price;
|
||||
v_price_eur := NULL;
|
||||
WHEN 'GBP' THEN
|
||||
v_price_eur := v_price_row.price * 1.17;
|
||||
v_price_usd := NULL;
|
||||
ELSE
|
||||
v_price_eur := NULL;
|
||||
v_price_usd := NULL;
|
||||
END CASE;
|
||||
ELSE
|
||||
v_price_eur := NULL;
|
||||
v_price_usd := NULL;
|
||||
END IF;
|
||||
|
||||
-- Image: has image_url
|
||||
v_image_verified := EXISTS (
|
||||
SELECT 1 FROM transceivers
|
||||
WHERE id = v_rec.id
|
||||
AND image_url IS NOT NULL
|
||||
AND image_url != ''
|
||||
);
|
||||
|
||||
-- Details: has usable data_confidence (not garbage/unknown)
|
||||
-- Accepts: low, medium, high, scraped_unverified, verified, official, enriched_estimated
|
||||
v_details_verified := EXISTS (
|
||||
SELECT 1 FROM transceivers
|
||||
WHERE id = v_rec.id
|
||||
AND data_confidence NOT IN ('garbage', 'unknown', '')
|
||||
AND data_confidence IS NOT NULL
|
||||
AND (connector IS NOT NULL OR wavelengths IS NOT NULL OR fiber_type IS NOT NULL)
|
||||
);
|
||||
|
||||
UPDATE transceivers SET
|
||||
price_verified = v_price_verified,
|
||||
price_verified_eur = v_price_eur,
|
||||
street_price_usd = v_price_usd,
|
||||
image_verified = v_image_verified,
|
||||
details_verified = v_details_verified,
|
||||
fully_verified = v_price_verified AND v_image_verified AND v_details_verified,
|
||||
updated_at = NOW()
|
||||
WHERE id = v_rec.id;
|
||||
END LOOP;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ── Step 4: Run verification refresh ──────────────────────────────────────────
|
||||
SELECT compute_transceiver_verification();
|
||||
|
||||
-- ── Step 5: Report ─────────────────────────────────────────────────────────────
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) AS price_verified,
|
||||
SUM(CASE WHEN image_verified THEN 1 ELSE 0 END) AS image_verified,
|
||||
SUM(CASE WHEN details_verified THEN 1 ELSE 0 END) AS details_verified,
|
||||
SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) AS fully_verified,
|
||||
ROUND(100.0 * SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS price_pct,
|
||||
ROUND(100.0 * SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS fully_pct
|
||||
FROM transceivers;
|
||||
44
sql/027-fs-com-eur-currency-fix.sql
Normal file
44
sql/027-fs-com-eur-currency-fix.sql
Normal file
@ -0,0 +1,44 @@
|
||||
-- Migration 027: Fix FS.COM price observations currency
|
||||
--
|
||||
-- FS.COM scraper previously scraped www.fs.com (USD) and tried to convert USD→EUR.
|
||||
-- Now scrapes de.fs.com (EUR) directly — prices are real EUR values.
|
||||
-- This migration cleans up any existing FS.COM prices that were derived (USD→EUR)
|
||||
-- and marks them for re-scraping on next run.
|
||||
--
|
||||
-- Applied: 2026-04-06 (after fs-com.ts BASE_URL switch to de.fs.com)
|
||||
|
||||
-- Delete price observations from FS.COM that look like converted USD prices
|
||||
-- (prices stored as USD but attributed to de.fs.com source after the switch)
|
||||
-- We just delete them all so the next scrape gets fresh EUR prices
|
||||
DELETE FROM price_observations po
|
||||
USING vendors v
|
||||
WHERE po.source_vendor_id = v.id
|
||||
AND v.slug = 'fs-com'
|
||||
AND po.time < NOW() - INTERVAL '7 days';
|
||||
|
||||
-- Reset verification for affected transceivers so they get re-verified
|
||||
UPDATE transceivers t
|
||||
SET
|
||||
price_verified = FALSE,
|
||||
price_verified_eur = NULL,
|
||||
fully_verified = FALSE,
|
||||
updated_at = NOW()
|
||||
FROM vendors v
|
||||
WHERE t.vendor_id = v.id
|
||||
AND v.slug = 'fs-com'
|
||||
AND t.price_verified = TRUE
|
||||
AND t.price_verified_eur IS NOT NULL;
|
||||
|
||||
-- Report remaining FS.COM observations
|
||||
SELECT
|
||||
v.name AS vendor,
|
||||
po.currency,
|
||||
COUNT(*) AS count,
|
||||
ROUND(AVG(po.price), 2) AS avg_price,
|
||||
MIN(po.time) AS oldest,
|
||||
MAX(po.time) AS newest
|
||||
FROM price_observations po
|
||||
JOIN vendors v ON po.source_vendor_id = v.id
|
||||
WHERE v.slug = 'fs-com'
|
||||
GROUP BY v.name, po.currency
|
||||
ORDER BY count DESC;
|
||||
Loading…
x
Reference in New Issue
Block a user