-- Migration 016: Data confidence tracking -- Mark every spec field as either vendor_verified or enriched_estimated ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS data_confidence TEXT DEFAULT 'unknown' CHECK (data_confidence IN ('vendor_verified', 'enriched_estimated', 'scraped_unverified', 'unknown')); ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_at TIMESTAMPTZ; ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_fields TEXT[]; -- Mark all products that were just enriched by our script UPDATE transceivers SET data_confidence = 'enriched_estimated', enriched_at = NOW(), enriched_fields = ARRAY_REMOVE(ARRAY[ CASE WHEN fiber_type IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'fiber_type' END, CASE WHEN connector IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'connector' END, CASE WHEN wavelengths IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'wavelengths' END, CASE WHEN power_consumption_w IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'power_consumption_w' END ], NULL) WHERE updated_at > NOW() - INTERVAL '1 day' AND data_confidence = 'unknown'; -- Products with price observations from vendor shops = scraped (higher confidence) UPDATE transceivers SET data_confidence = 'scraped_unverified' WHERE data_confidence = 'unknown' AND EXISTS (SELECT 1 FROM price_observations po WHERE po.transceiver_id = transceivers.id); -- Products from Cisco TMG matrix = vendor_verified UPDATE transceivers SET data_confidence = 'vendor_verified' WHERE EXISTS ( SELECT 1 FROM compatibility c WHERE c.transceiver_id = transceivers.id AND c.verified_by = 'vendor_matrix' ); CREATE INDEX IF NOT EXISTS idx_transceivers_confidence ON transceivers(data_confidence); -- View: data quality overview CREATE OR REPLACE VIEW v_data_quality AS SELECT data_confidence, COUNT(*) AS count, ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM transceivers) * 100, 1) AS pct, COUNT(*) FILTER (WHERE fiber_type IS NOT NULL AND fiber_type != '') AS has_fiber, COUNT(*) FILTER (WHERE connector IS NOT NULL AND connector != '' AND connector != '-') AS has_connector, COUNT(*) FILTER (WHERE wavelengths IS NOT NULL AND wavelengths != '') AS has_wavelength, COUNT(*) FILTER (WHERE power_consumption_w IS NOT NULL) AS has_power, COUNT(*) FILTER (WHERE reach_meters > 0) AS has_reach, COUNT(*) FILTER (WHERE image_url IS NOT NULL AND image_url != '') AS has_image FROM transceivers GROUP BY data_confidence ORDER BY count DESC;