transceiver-db/sql/016-data-confidence.sql
Rene Fichtmueller 73ef5766e6 feat(v0.2.1): data confidence tracking + validation + blog feedback system
- Migration 016: data_confidence column (vendor_verified/enriched_estimated/scraped_unverified)
- Migration 015: blog_feedback table with 8 quality scores + free text
- Validation script: 8 physics-based rules (wavelength↔fiber, reach plausibility, power limits)
- Blog feedback API: POST /api/blog/:id/feedback + training data export
- FO Blog Pipeline v3: 10-step Flexoptix Style prompts (Less bullshit. More engineering.)
- Auto-fix: wavelength↔fiber mismatches corrected automatically
2026-03-31 09:12:37 +02:00

51 lines
2.5 KiB
SQL

-- Migration 016: Data confidence tracking
-- Mark every spec field as either vendor_verified or enriched_estimated
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS data_confidence TEXT DEFAULT 'unknown'
CHECK (data_confidence IN ('vendor_verified', 'enriched_estimated', 'scraped_unverified', 'unknown'));
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_at TIMESTAMPTZ;
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_fields TEXT[];
-- Mark all products that were just enriched by our script
UPDATE transceivers SET
data_confidence = 'enriched_estimated',
enriched_at = NOW(),
enriched_fields = ARRAY_REMOVE(ARRAY[
CASE WHEN fiber_type IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'fiber_type' END,
CASE WHEN connector IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'connector' END,
CASE WHEN wavelengths IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'wavelengths' END,
CASE WHEN power_consumption_w IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'power_consumption_w' END
], NULL)
WHERE updated_at > NOW() - INTERVAL '1 day'
AND data_confidence = 'unknown';
-- Products with price observations from vendor shops = scraped (higher confidence)
UPDATE transceivers SET data_confidence = 'scraped_unverified'
WHERE data_confidence = 'unknown'
AND EXISTS (SELECT 1 FROM price_observations po WHERE po.transceiver_id = transceivers.id);
-- Products from Cisco TMG matrix = vendor_verified
UPDATE transceivers SET data_confidence = 'vendor_verified'
WHERE EXISTS (
SELECT 1 FROM compatibility c WHERE c.transceiver_id = transceivers.id
AND c.verified_by = 'vendor_matrix'
);
CREATE INDEX IF NOT EXISTS idx_transceivers_confidence ON transceivers(data_confidence);
-- View: data quality overview
CREATE OR REPLACE VIEW v_data_quality AS
SELECT
data_confidence,
COUNT(*) AS count,
ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM transceivers) * 100, 1) AS pct,
COUNT(*) FILTER (WHERE fiber_type IS NOT NULL AND fiber_type != '') AS has_fiber,
COUNT(*) FILTER (WHERE connector IS NOT NULL AND connector != '' AND connector != '-') AS has_connector,
COUNT(*) FILTER (WHERE wavelengths IS NOT NULL AND wavelengths != '') AS has_wavelength,
COUNT(*) FILTER (WHERE power_consumption_w IS NOT NULL) AS has_power,
COUNT(*) FILTER (WHERE reach_meters > 0) AS has_reach,
COUNT(*) FILTER (WHERE image_url IS NOT NULL AND image_url != '') AS has_image
FROM transceivers
GROUP BY data_confidence
ORDER BY count DESC;