-- Migration 102: Product photo/details verification reconciliation -- Applied after the scraper started storing many image_url/product URLs without -- consistently promoting image_verified/details_verified. BEGIN; -- Backfill canonical product URLs from recent real price observations. UPDATE transceivers t SET product_page_url = latest.url, updated_at = NOW() FROM ( SELECT DISTINCT ON (po.transceiver_id) po.transceiver_id, po.url FROM price_observations po WHERE po.url IS NOT NULL AND po.url != '' AND po.time > NOW() - INTERVAL '180 days' ORDER BY po.transceiver_id, po.time DESC ) latest WHERE t.id = latest.transceiver_id AND (t.product_page_url IS NULL OR t.product_page_url = ''); -- Any non-placeholder product image URL written by a scraper counts as an -- image verification source. Older scrapers often set only has_image/image_url. UPDATE transceivers SET has_image = true, image_verified = true, image_verified_at = COALESCE(image_verified_at, NOW()), image_verified_url = COALESCE(NULLIF(image_verified_url, ''), image_url), updated_at = NOW() WHERE image_url IS NOT NULL AND image_url != '' AND image_url !~* '(placeholder|no-image|no_image|keinbild|logo)' AND (image_verified = false OR image_verified IS NULL); -- Details are verified once a crawled source URL and the core product identity -- fields are present. This avoids marking malformed scraper rows as complete. UPDATE transceivers SET details_verified = true, details_verified_at = COALESCE(details_verified_at, NOW()), details_source_url = COALESCE(NULLIF(details_source_url, ''), product_page_url), data_confidence = CASE WHEN data_confidence IS NULL OR data_confidence IN ('unknown', 'enriched_estimated') THEN 'scraped_unverified' ELSE data_confidence END, updated_at = NOW() WHERE product_page_url IS NOT NULL AND product_page_url != '' AND form_factor IS NOT NULL AND speed_gbps IS NOT NULL AND part_number IS NOT NULL AND part_number != '' AND reach_label IS NOT NULL AND reach_label != '' AND fiber_type IS NOT NULL AND fiber_type != '' AND COALESCE(data_confidence, 'unknown') != 'garbage' AND (details_verified = false OR details_verified IS NULL); -- Refresh full badge after the promotions above. UPDATE transceivers SET fully_verified = true, fully_verified_at = COALESCE(fully_verified_at, NOW()), updated_at = NOW() WHERE competitor_verified = true AND price_verified = true AND image_verified = true AND details_verified = true AND fully_verified = false; COMMIT; SELECT COUNT(*) AS total, COUNT(*) FILTER (WHERE image_url IS NOT NULL AND image_url != '') AS has_image_url, COUNT(*) FILTER (WHERE image_verified) AS image_verified, COUNT(*) FILTER (WHERE product_page_url IS NOT NULL AND product_page_url != '') AS has_product_page_url, COUNT(*) FILTER (WHERE details_verified) AS details_verified, COUNT(*) FILTER (WHERE fully_verified) AS fully_verified FROM transceivers;