transceiver-db/sql/025-blog-sll.sql

-- Migration 025: Blog Self-Learning Loop (SLL v1.0)
-- Tracks LinkedIn engagement per post, extracts winning/losing patterns,
-- feeds insights back into the generation pipeline automatically.

-- ─────────────────────────────────────────────────────────
-- Blog Performance: LinkedIn/platform engagement per post
-- ─────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS blog_performance (
  id                UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  blog_id           UUID REFERENCES blog_drafts(id) ON DELETE CASCADE,
  platform          TEXT DEFAULT 'linkedin',

  -- Raw engagement metrics (LinkedIn: saves > shares > comments > likes)
  impressions       INTEGER,
  comments          INTEGER DEFAULT 0,
  shares            INTEGER DEFAULT 0,
  saves             INTEGER DEFAULT 0,
  likes             INTEGER DEFAULT 0,

  -- SLL score formula: (comments×3) + (shares×2) + (saves×2), likes = 0 weight
  engagement_score  INTEGER GENERATED ALWAYS AS (
    COALESCE(comments,0)*3 + COALESCE(shares,0)*2 + COALESCE(saves,0)*2
  ) STORED,

  -- Snapshot of article at time of measurement (for pattern extraction)
  hook_text         TEXT,        -- first ~120 chars of the LinkedIn post
  blog_type         TEXT,        -- tutorial | market_alert | comparison | …
  topic             TEXT,        -- topic tag used during generation
  word_count        INTEGER,
  pipeline_version  TEXT,

  -- Timing
  posted_at         TIMESTAMPTZ,
  measured_at       TIMESTAMPTZ DEFAULT NOW(),
  notes             TEXT,
  created_at        TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX IF NOT EXISTS idx_blog_perf_blog    ON blog_performance(blog_id);
CREATE INDEX IF NOT EXISTS idx_blog_perf_score   ON blog_performance(engagement_score DESC);
CREATE INDEX IF NOT EXISTS idx_blog_perf_type    ON blog_performance(blog_type);
CREATE INDEX IF NOT EXISTS idx_blog_perf_posted  ON blog_performance(posted_at DESC);

-- ─────────────────────────────────────────────────────────
-- Learned Patterns: extracted winning / losing patterns
-- ─────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS blog_learned_patterns (
  id                  UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  pattern_type        TEXT NOT NULL,   -- hook | structure | topic | length | opening | verb_style
  pattern_value       TEXT NOT NULL,   -- "short hook + contrast", "lab vs production", "single scenario"
  performance_class   TEXT NOT NULL,   -- winner | loser
  avg_engagement      NUMERIC,         -- average engagement_score across samples
  sample_count        INTEGER DEFAULT 1,
  example_post_id     UUID REFERENCES blog_drafts(id),
  active              BOOLEAN DEFAULT TRUE,
  extracted_at        TIMESTAMPTZ DEFAULT NOW(),
  last_updated        TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX IF NOT EXISTS idx_blog_patterns_type  ON blog_learned_patterns(pattern_type);
CREATE INDEX IF NOT EXISTS idx_blog_patterns_class ON blog_learned_patterns(performance_class);
CREATE INDEX IF NOT EXISTS idx_blog_patterns_active ON blog_learned_patterns(active) WHERE active = TRUE;

-- ─────────────────────────────────────────────────────────
-- SLL State: current active learning snapshot (weekly)
-- ─────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS blog_sll_state (
  id                    UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  week_start            DATE NOT NULL UNIQUE,

  -- Top-performing patterns (JSON arrays of strings)
  winner_patterns       JSONB DEFAULT '[]',
  loser_patterns        JSONB DEFAULT '[]',

  -- Topic performance
  top_topics            JSONB DEFAULT '[]',
  avoid_topics          JSONB DEFAULT '[]',

  -- Length optimization
  optimal_length_min    INTEGER,
  optimal_length_max    INTEGER,

  -- Hook patterns that convert
  best_hook_patterns    JSONB DEFAULT '[]',

  -- Meta
  posts_analyzed        INTEGER DEFAULT 0,
  avg_engagement_score  NUMERIC,
  generated_at          TIMESTAMPTZ DEFAULT NOW(),
  generated_by          TEXT DEFAULT 'sll-auto'
);

-- ─────────────────────────────────────────────────────────
-- View: performance enriched with blog metadata
-- ─────────────────────────────────────────────────────────
CREATE OR REPLACE VIEW v_blog_performance AS
SELECT
  p.id,
  p.blog_id,
  d.title,
  d.topic,
  d.pipeline_version,
  d.word_count,
  p.platform,
  p.impressions,
  p.comments,
  p.shares,
  p.saves,
  p.likes,
  p.engagement_score,
  p.hook_text,
  p.blog_type,
  p.posted_at,
  p.measured_at,
  -- Performance tier based on engagement score
  CASE
    WHEN p.engagement_score >= 20 THEN 'gold'
    WHEN p.engagement_score >= 10 THEN 'silver'
    WHEN p.engagement_score >= 4  THEN 'bronze'
    ELSE 'miss'
  END AS performance_tier
FROM blog_performance p
JOIN blog_drafts d ON d.id = p.blog_id;

COMMENT ON TABLE blog_performance IS 'SLL v1.0: LinkedIn engagement tracking per blog post';
COMMENT ON TABLE blog_learned_patterns IS 'SLL v1.0: Extracted winning/losing content patterns';
COMMENT ON TABLE blog_sll_state IS 'SLL v1.0: Weekly learning state snapshot for pipeline injection';