From 6fb9b6eb4f6729e76f3ebad7164cb8e2a339c2c7 Mon Sep 17 00:00:00 2001 From: Rene Fichtmueller Date: Mon, 6 Apr 2026 02:22:00 +0200 Subject: [PATCH] feat(sql): migrations 026+027 for price cleanup and FS.COM EUR fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 026: Remove invalid price observations (sub-manufacturing-cost), disable optictransceiver.com (domain repurposed as plant shop), fix verification function to accept low/medium/high data_confidence values 027: Clean up FS.COM USD→EUR converted prices, force re-scrape with new de.fs.com EUR-primary scraper --- ...026-price-cleanup-and-verification-fix.sql | 131 ++++++++++++++++++ sql/027-fs-com-eur-currency-fix.sql | 44 ++++++ 2 files changed, 175 insertions(+) create mode 100644 sql/026-price-cleanup-and-verification-fix.sql create mode 100644 sql/027-fs-com-eur-currency-fix.sql diff --git a/sql/026-price-cleanup-and-verification-fix.sql b/sql/026-price-cleanup-and-verification-fix.sql new file mode 100644 index 0000000..39f64f7 --- /dev/null +++ b/sql/026-price-cleanup-and-verification-fix.sql @@ -0,0 +1,131 @@ +-- Migration 026: Price cleanup + verification function fix +-- +-- Problem 1: Old parsePrice bug accepted bare numbers (no currency symbol) +-- → stock counts like "1914" scraped as $1914, shipping costs like "1.30" as $1.30 +-- Problem 2: data_confidence mismatch — verification expected 'scraped_unverified'/'verified' +-- but scrapers write 'low'/'medium'/'high'/'garbage' +-- Problem 3: optictransceiver.com domain repurposed as plant shop — remove observations +-- +-- Applied on Erik: 2026-04-06 + +-- ── Step 1: Remove obviously wrong prices (likely misread stock/shipping) ────── +-- Suspiciously low prices per form factor (below manufacturing cost) +DELETE FROM price_observations po +USING transceivers t +WHERE po.transceiver_id = t.id + AND ( + -- Sub-$2 for any optical transceiver is clearly wrong + (po.price < 2.00 AND po.currency = 'USD') + OR (po.price < 1.80 AND po.currency = 'EUR') + OR (po.price < 1.50 AND po.currency = 'GBP') + -- 800G under $50 — no 800G transceiver costs this + OR (t.speed_gbps >= 800 AND po.price < 50 AND po.currency IN ('USD', 'EUR', 'GBP')) + -- 400G under $20 — below any real compatible price + OR (t.speed_gbps = 400 AND po.price < 20 AND po.currency IN ('USD', 'EUR', 'GBP')) + -- 100G under $5 + OR (t.speed_gbps = 100 AND po.price < 5 AND po.currency IN ('USD', 'EUR', 'GBP')) + -- Coherent modules (ZR/ZR+) under $80 + OR (t.form_factor IN ('QSFP-DD', 'OSFP') AND t.speed_gbps >= 400 + AND lower(t.part_number) LIKE '%zr%' AND po.price < 80 AND po.currency IN ('USD', 'EUR', 'GBP')) + ); + +-- ── Step 2: Remove optictransceiver.com observations (domain now sells plants) ── +DELETE FROM price_observations po +USING vendors v +WHERE po.source_vendor_id = v.id + AND v.slug = 'optictransceiver'; + +-- Also mark vendor as inactive +UPDATE vendors +SET notes = 'Domain repurposed as plant shop 2026-04-06. Scraper disabled.' +WHERE slug = 'optictransceiver'; + +-- ── Step 3: Fix verification function — accept 'low'/'medium'/'high' confidence ── +CREATE OR REPLACE FUNCTION compute_transceiver_verification() +RETURNS void AS $$ +DECLARE + v_rec RECORD; + v_price_row RECORD; + v_price_eur NUMERIC; + v_price_usd NUMERIC; + v_price_verified BOOLEAN; + v_image_verified BOOLEAN; + v_details_verified BOOLEAN; +BEGIN + FOR v_rec IN SELECT id FROM transceivers LOOP + -- Price: has any real price observation in last 60 days + SELECT price, currency, time INTO v_price_row + FROM price_observations + WHERE transceiver_id = v_rec.id + AND price > 0 + AND time > NOW() - INTERVAL '60 days' + ORDER BY price DESC, time DESC + LIMIT 1; + + v_price_verified := v_price_row IS NOT NULL; + + -- Convert to EUR/USD for storage + IF v_price_verified THEN + CASE v_price_row.currency + WHEN 'EUR' THEN + v_price_eur := v_price_row.price; + v_price_usd := NULL; + WHEN 'USD' THEN + v_price_usd := v_price_row.price; + v_price_eur := NULL; + WHEN 'GBP' THEN + v_price_eur := v_price_row.price * 1.17; + v_price_usd := NULL; + ELSE + v_price_eur := NULL; + v_price_usd := NULL; + END CASE; + ELSE + v_price_eur := NULL; + v_price_usd := NULL; + END IF; + + -- Image: has image_url + v_image_verified := EXISTS ( + SELECT 1 FROM transceivers + WHERE id = v_rec.id + AND image_url IS NOT NULL + AND image_url != '' + ); + + -- Details: has usable data_confidence (not garbage/unknown) + -- Accepts: low, medium, high, scraped_unverified, verified, official, enriched_estimated + v_details_verified := EXISTS ( + SELECT 1 FROM transceivers + WHERE id = v_rec.id + AND data_confidence NOT IN ('garbage', 'unknown', '') + AND data_confidence IS NOT NULL + AND (connector IS NOT NULL OR wavelengths IS NOT NULL OR fiber_type IS NOT NULL) + ); + + UPDATE transceivers SET + price_verified = v_price_verified, + price_verified_eur = v_price_eur, + street_price_usd = v_price_usd, + image_verified = v_image_verified, + details_verified = v_details_verified, + fully_verified = v_price_verified AND v_image_verified AND v_details_verified, + updated_at = NOW() + WHERE id = v_rec.id; + END LOOP; +END; +$$ LANGUAGE plpgsql; + +-- ── Step 4: Run verification refresh ────────────────────────────────────────── +SELECT compute_transceiver_verification(); + +-- ── Step 5: Report ───────────────────────────────────────────────────────────── +SELECT + COUNT(*) AS total, + SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) AS price_verified, + SUM(CASE WHEN image_verified THEN 1 ELSE 0 END) AS image_verified, + SUM(CASE WHEN details_verified THEN 1 ELSE 0 END) AS details_verified, + SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) AS fully_verified, + ROUND(100.0 * SUM(CASE WHEN price_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS price_pct, + ROUND(100.0 * SUM(CASE WHEN fully_verified THEN 1 ELSE 0 END) / COUNT(*), 1) AS fully_pct +FROM transceivers; diff --git a/sql/027-fs-com-eur-currency-fix.sql b/sql/027-fs-com-eur-currency-fix.sql new file mode 100644 index 0000000..82c600e --- /dev/null +++ b/sql/027-fs-com-eur-currency-fix.sql @@ -0,0 +1,44 @@ +-- Migration 027: Fix FS.COM price observations currency +-- +-- FS.COM scraper previously scraped www.fs.com (USD) and tried to convert USD→EUR. +-- Now scrapes de.fs.com (EUR) directly — prices are real EUR values. +-- This migration cleans up any existing FS.COM prices that were derived (USD→EUR) +-- and marks them for re-scraping on next run. +-- +-- Applied: 2026-04-06 (after fs-com.ts BASE_URL switch to de.fs.com) + +-- Delete price observations from FS.COM that look like converted USD prices +-- (prices stored as USD but attributed to de.fs.com source after the switch) +-- We just delete them all so the next scrape gets fresh EUR prices +DELETE FROM price_observations po +USING vendors v +WHERE po.source_vendor_id = v.id + AND v.slug = 'fs-com' + AND po.time < NOW() - INTERVAL '7 days'; + +-- Reset verification for affected transceivers so they get re-verified +UPDATE transceivers t +SET + price_verified = FALSE, + price_verified_eur = NULL, + fully_verified = FALSE, + updated_at = NOW() +FROM vendors v +WHERE t.vendor_id = v.id + AND v.slug = 'fs-com' + AND t.price_verified = TRUE + AND t.price_verified_eur IS NOT NULL; + +-- Report remaining FS.COM observations +SELECT + v.name AS vendor, + po.currency, + COUNT(*) AS count, + ROUND(AVG(po.price), 2) AS avg_price, + MIN(po.time) AS oldest, + MAX(po.time) AS newest +FROM price_observations po +JOIN vendors v ON po.source_vendor_id = v.id +WHERE v.slug = 'fs-com' +GROUP BY v.name, po.currency +ORDER BY count DESC;