- Migration 016: data_confidence column (vendor_verified/enriched_estimated/scraped_unverified) - Migration 015: blog_feedback table with 8 quality scores + free text - Validation script: 8 physics-based rules (wavelength↔fiber, reach plausibility, power limits) - Blog feedback API: POST /api/blog/:id/feedback + training data export - FO Blog Pipeline v3: 10-step Flexoptix Style prompts (Less bullshit. More engineering.) - Auto-fix: wavelength↔fiber mismatches corrected automatically
51 lines
2.5 KiB
SQL
51 lines
2.5 KiB
SQL
-- Migration 016: Data confidence tracking
|
|
-- Mark every spec field as either vendor_verified or enriched_estimated
|
|
|
|
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS data_confidence TEXT DEFAULT 'unknown'
|
|
CHECK (data_confidence IN ('vendor_verified', 'enriched_estimated', 'scraped_unverified', 'unknown'));
|
|
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_at TIMESTAMPTZ;
|
|
ALTER TABLE transceivers ADD COLUMN IF NOT EXISTS enriched_fields TEXT[];
|
|
|
|
-- Mark all products that were just enriched by our script
|
|
UPDATE transceivers SET
|
|
data_confidence = 'enriched_estimated',
|
|
enriched_at = NOW(),
|
|
enriched_fields = ARRAY_REMOVE(ARRAY[
|
|
CASE WHEN fiber_type IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'fiber_type' END,
|
|
CASE WHEN connector IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'connector' END,
|
|
CASE WHEN wavelengths IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'wavelengths' END,
|
|
CASE WHEN power_consumption_w IS NOT NULL AND updated_at > NOW() - INTERVAL '1 day' THEN 'power_consumption_w' END
|
|
], NULL)
|
|
WHERE updated_at > NOW() - INTERVAL '1 day'
|
|
AND data_confidence = 'unknown';
|
|
|
|
-- Products with price observations from vendor shops = scraped (higher confidence)
|
|
UPDATE transceivers SET data_confidence = 'scraped_unverified'
|
|
WHERE data_confidence = 'unknown'
|
|
AND EXISTS (SELECT 1 FROM price_observations po WHERE po.transceiver_id = transceivers.id);
|
|
|
|
-- Products from Cisco TMG matrix = vendor_verified
|
|
UPDATE transceivers SET data_confidence = 'vendor_verified'
|
|
WHERE EXISTS (
|
|
SELECT 1 FROM compatibility c WHERE c.transceiver_id = transceivers.id
|
|
AND c.verified_by = 'vendor_matrix'
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_transceivers_confidence ON transceivers(data_confidence);
|
|
|
|
-- View: data quality overview
|
|
CREATE OR REPLACE VIEW v_data_quality AS
|
|
SELECT
|
|
data_confidence,
|
|
COUNT(*) AS count,
|
|
ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM transceivers) * 100, 1) AS pct,
|
|
COUNT(*) FILTER (WHERE fiber_type IS NOT NULL AND fiber_type != '') AS has_fiber,
|
|
COUNT(*) FILTER (WHERE connector IS NOT NULL AND connector != '' AND connector != '-') AS has_connector,
|
|
COUNT(*) FILTER (WHERE wavelengths IS NOT NULL AND wavelengths != '') AS has_wavelength,
|
|
COUNT(*) FILTER (WHERE power_consumption_w IS NOT NULL) AS has_power,
|
|
COUNT(*) FILTER (WHERE reach_meters > 0) AS has_reach,
|
|
COUNT(*) FILTER (WHERE image_url IS NOT NULL AND image_url != '') AS has_image
|
|
FROM transceivers
|
|
GROUP BY data_confidence
|
|
ORDER BY count DESC;
|