#!/usr/bin/env node /** * ShieldX Daily Security Research Monitor * * Scans arXiv (cs.CR + cs.AI) and HackerNews daily for new LLM/AI security research. * Uses Claude Haiku via Anthropic API to classify relevance. * HIGH findings: generates detection rule suggestions, commits to Gitea. * * Setup on Erik: * 1. Copy to /opt/scripts/arxiv-monitor.mjs * 2. Set ANTHROPIC_API_KEY in /opt/scripts/.env * 3. Set GITEA_TOKEN in /opt/scripts/.env * 4. chmod +x /opt/scripts/arxiv-monitor.mjs * 5. Add to cron: 0 6 * * * node /opt/scripts/arxiv-monitor.mjs >> /opt/scripts/logs/arxiv-monitor.log 2>&1 * * Requires: Node.js >= 20 (native fetch), git */ import { execSync, exec } from 'node:child_process' import { writeFileSync, mkdirSync, readFileSync, existsSync } from 'node:fs' import { join, dirname } from 'node:path' import { fileURLToPath } from 'node:url' import { promisify } from 'node:util' const execAsync = promisify(exec) const __dir = dirname(fileURLToPath(import.meta.url)) // ── Config ────────────────────────────────────────────────────────────────── const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || loadEnv('ANTHROPIC_API_KEY') const GITEA_TOKEN = process.env.GITEA_TOKEN || loadEnv('GITEA_TOKEN') const GITEA_BASE_URL = process.env.GITEA_BASE_URL || 'https://gitea.context-x.org' const GITEA_USER = process.env.GITEA_USER || 'rene' const SHIELDX_REPO = 'ShieldX' const LOG_DIR = process.env.LOG_DIR || '/opt/scripts/logs' const WORK_DIR = process.env.WORK_DIR || '/tmp/shieldx-monitor' const TODAY = new Date().toISOString().slice(0, 10) function loadEnv(key) { const envFile = join(__dir, '.env') if (!existsSync(envFile)) return '' const lines = readFileSync(envFile, 'utf8').split('\n') for (const line of lines) { const m = line.match(/^([A-Z_]+)=(.+)$/) if (m && m[1] === key) return m[2].trim().replace(/^["']|["']$/g, '') } return '' } // ── Logging ────────────────────────────────────────────────────────────────── mkdirSync(LOG_DIR, { recursive: true }) const logFile = join(LOG_DIR, `arxiv-monitor-${TODAY}.log`) function log(msg) { const line = `[${new Date().toISOString()}] ${msg}` console.log(line) try { writeFileSync(logFile, line + '\n', { flag: 'a' }) } catch {} } // ── arXiv RSS Fetch ────────────────────────────────────────────────────────── async function fetchArxiv(section) { const url = `https://rss.arxiv.org/rss/${section}` try { const res = await fetch(url, { signal: AbortSignal.timeout(20000) }) const xml = await res.text() // Extract items with title + description const items = [] const itemRx = /([\s\S]*?)<\/item>/g let m while ((m = itemRx.exec(xml)) !== null) { const block = m[1] const title = (/([\s\S]*?)<\/title>/.exec(block) || [])[1] || '' const desc = (/<description>([\s\S]*?)<\/description>/.exec(block) || [])[1] || '' const link = (/<link>([\s\S]*?)<\/link>/.exec(block) || [])[1] || '' const clean = (s) => s.replace(/<!\[CDATA\[|\]\]>/g, '').replace(/<[^>]+>/g, '').trim() if (title) items.push({ title: clean(title), desc: clean(desc).slice(0, 400), link: clean(link), source: `arXiv:${section}` }) } log(`arXiv ${section}: ${items.length} papers fetched`) return items } catch (e) { log(`WARN: arXiv ${section} fetch failed: ${e.message}`) return [] } } // ── HackerNews Fetch ───────────────────────────────────────────────────────── async function fetchHackerNews() { const items = [] try { // Top stories const top = await fetch('https://hacker-news.firebaseio.com/v0/topstories.json', { signal: AbortSignal.timeout(10000) }) const ids = (await top.json()).slice(0, 80) const batch = await Promise.allSettled( ids.map(id => fetch(`https://hacker-news.firebaseio.com/v0/item/${id}.json`, { signal: AbortSignal.timeout(8000) }) .then(r => r.json())) ) for (const r of batch) { if (r.status === 'fulfilled' && r.value?.title) { items.push({ title: r.value.title, desc: r.value.text?.slice(0, 300) || '', link: r.value.url || `https://news.ycombinator.com/item?id=${r.value.id}`, source: 'HackerNews' }) } } // RSS keyword feeds const keywords = ['prompt+injection', 'LLM+security', 'jailbreak', 'AI+security'] for (const kw of keywords) { try { const rss = await fetch(`https://hnrss.org/newest?q=${kw}&count=15`, { signal: AbortSignal.timeout(10000) }) const xml = await rss.text() const titleRx = /<title>([\s\S]*?)<\/title>/g const linkRx = /<link>([\s\S]*?)<\/link>/g let tm, lm titleRx.exec(xml) // skip feed title linkRx.exec(xml) while ((tm = titleRx.exec(xml)) !== null && (lm = linkRx.exec(xml)) !== null) { const t = tm[1].replace(/<!\[CDATA\[|\]\]>/g, '').trim() const l = lm[1].replace(/<!\[CDATA\[|\]\]>/g, '').trim() if (t) items.push({ title: t, desc: '', link: l, source: `HN:${kw}` }) } } catch {} } log(`HackerNews: ${items.length} stories fetched`) return items } catch (e) { log(`WARN: HackerNews fetch failed: ${e.message}`) return [] } } // ── Claude Haiku Classification ────────────────────────────────────────────── async function classifyItems(items) { if (!ANTHROPIC_API_KEY) { log('ERROR: ANTHROPIC_API_KEY not set — skipping LLM classification') return [] } // Deduplicate by title similarity const unique = items.filter((item, i, arr) => arr.findIndex(x => x.title.toLowerCase() === item.title.toLowerCase()) === i ) // Batch classify (max 50 items per call to stay within context) const batches = [] for (let i = 0; i < unique.length; i += 40) batches.push(unique.slice(i, i + 40)) const classified = [] for (const batch of batches) { const itemList = batch.map((item, i) => `[${i}] SOURCE: ${item.source}\nTITLE: ${item.title}\nDESC: ${item.desc}` ).join('\n\n---\n\n') const prompt = `You are a security researcher analyzing papers and articles for relevance to ShieldX — an LLM prompt injection defense library. ShieldX detects: prompt injection, jailbreaks, Unicode covert channels (ASCII smuggling, homoglyphs, zero-width steganography), DNS/network exfiltration, indirect prompt injection, agentic manipulation, multi-agent attacks, tool abuse (CVE-2025-55284), MITRE ATLAS techniques for AI. For each numbered item below, classify relevance: - HIGH: New attack technique ShieldX doesn't detect, new CVE for LLM tools, new covert channel/exfiltration method → MUST implement detection rule - MEDIUM: Improved understanding of existing threat, new variant of known attack → worth tracking - LOW: General AI security news, policy, non-technical → log only - SKIP: Not relevant to ShieldX Respond ONLY with valid JSON array, no other text: [{"index": 0, "level": "HIGH"|"MEDIUM"|"LOW"|"SKIP", "reason": "brief reason", "ruleId": "rule-id-if-HIGH-else-null", "detection": "brief detection approach if HIGH"}] Items to classify: ${itemList}` try { const res = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', signal: AbortSignal.timeout(60000), headers: { 'x-api-key': ANTHROPIC_API_KEY, 'anthropic-version': '2023-06-01', 'content-type': 'application/json', }, body: JSON.stringify({ model: 'claude-haiku-4-5', max_tokens: 2048, messages: [{ role: 'user', content: prompt }] }) }) if (!res.ok) { const err = await res.text() log(`WARN: Anthropic API error ${res.status}: ${err.slice(0, 200)}`) continue } const data = await res.json() const content = data.content?.[0]?.text || '[]' // Parse JSON — find the array even if there's surrounding text const jsonMatch = content.match(/\[[\s\S]*\]/) if (!jsonMatch) { log('WARN: No JSON array in classification response'); continue } const results = JSON.parse(jsonMatch[0]) for (const r of results) { if (typeof r.index === 'number' && batch[r.index]) { classified.push({ ...batch[r.index], ...r }) } } } catch (e) { log(`WARN: Classification batch failed: ${e.message}`) } } return classified } // ── Detection Code Generation (HIGH items) ────────────────────────────────── async function generateDetectionCode(item) { const prompt = `You are a TypeScript security engineer implementing detection rules for ShieldX — an LLM prompt injection defense library. Based on this finding, write a TypeScript detection function that can be added to a ShieldX scanner file. Finding: ${item.title} Source: ${item.source} Details: ${item.desc} Suggested rule ID: ${item.ruleId} Detection approach: ${item.detection} Requirements: - Pure TypeScript, strict mode compatible - Function signature: function detect${toPascalCase(item.ruleId || 'new')}(input: string): ScanResult[] - Use this ScanResult shape: { scannerId: string, scannerType: string, detected: true, confidence: number (0-1), threatLevel: 'low'|'medium'|'high'|'critical', killChainPhase: string, matchedPatterns: string[], latencyMs: number, metadata: Record<string, unknown> } - Only return results when something suspicious is detected - Add a comment with: MITRE ATLAS technique (if applicable), CVE (if applicable), source paper/article - Keep it focused — one clear detection pattern - NO imports needed (standalone function) - IMPORTANT: Return ONLY the TypeScript code, no explanation text Write the detection function now:` try { const res = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', signal: AbortSignal.timeout(60000), headers: { 'x-api-key': ANTHROPIC_API_KEY, 'anthropic-version': '2023-06-01', 'content-type': 'application/json', }, body: JSON.stringify({ model: 'claude-haiku-4-5', max_tokens: 1500, messages: [{ role: 'user', content: prompt }] }) }) if (!res.ok) return null const data = await res.json() return data.content?.[0]?.text || null } catch (e) { log(`WARN: Code generation failed for ${item.ruleId}: ${e.message}`) return null } } function toPascalCase(s) { return s.split(/[-_]/).map(w => w.charAt(0).toUpperCase() + w.slice(1)).join('') } // ── Git Operations ──────────────────────────────────────────────────────────── async function cloneOrPullShieldX() { mkdirSync(WORK_DIR, { recursive: true }) const repoDir = join(WORK_DIR, SHIELDX_REPO) const cloneUrl = `https://${GITEA_USER}:${GITEA_TOKEN}@gitea.context-x.org/${GITEA_USER}/${SHIELDX_REPO}.git` if (existsSync(join(repoDir, '.git'))) { log('Pulling latest ShieldX from Gitea...') await execAsync('git pull origin main', { cwd: repoDir }) } else { log('Cloning ShieldX from Gitea...') await execAsync(`git clone ${cloneUrl} ${repoDir}`) } return repoDir } async function appendToNewRulesFile(repoDir, highItems) { const rulesFile = join(repoDir, 'src/detection/AutoGeneratedRules.ts') const header = `/** * Auto-Generated Detection Rules — ShieldX arXiv Monitor * Generated: ${TODAY} * Source: arxiv-monitor.mjs * * These rules are AUTO-GENERATED from security research. * Review before production use. Each rule references its source paper/CVE. * * @see scripts/arxiv-monitor.mjs */ import type { ScanResult } from '../types/detection' ` let content = existsSync(rulesFile) ? readFileSync(rulesFile, 'utf8') : header for (const item of highItems) { if (!item.code) continue const separator = `\n\n// ── ${TODAY}: ${item.title.slice(0, 80)} ──\n// Source: ${item.link}\n` // Extract code block if wrapped in ```typescript ... ``` const codeMatch = item.code.match(/```(?:typescript|ts)?\n?([\s\S]*?)```/) || [null, item.code] const cleanCode = (codeMatch[1] || item.code).trim() content += separator + cleanCode + '\n' } writeFileSync(rulesFile, content) log(`Wrote ${highItems.filter(i => i.code).length} new rules to AutoGeneratedRules.ts`) return rulesFile } async function typecheck(repoDir) { try { await execAsync('npm install --ignore-scripts', { cwd: repoDir, timeout: 60000 }) await execAsync('npx tsc --noEmit', { cwd: repoDir, timeout: 60000 }) log('TypeScript check passed') return true } catch (e) { log(`WARN: TypeScript check failed — skipping auto-commit: ${e.message.slice(0, 300)}`) return false } } async function commitAndPush(repoDir, highItems) { const titles = highItems.map(i => `- ${i.ruleId}: ${i.title.slice(0, 60)}`).join('\n') const msg = `feat(detection): auto-update from security research ${TODAY}\n\nSources:\n${highItems.map(i => `- ${i.source}: ${i.title.slice(0, 80)}`).join('\n')}\n\nNew rules:\n${titles}` await execAsync('git config user.email "monitor@shieldx.local"', { cwd: repoDir }) await execAsync('git config user.name "ShieldX Monitor"', { cwd: repoDir }) await execAsync('git add src/detection/AutoGeneratedRules.ts', { cwd: repoDir }) const { stdout: status } = await execAsync('git status --short', { cwd: repoDir }) if (!status.trim()) { log('No changes to commit') return false } await execAsync(`git commit -m "${msg.replace(/"/g, "'")}"`, { cwd: repoDir }) await execAsync('git push origin main', { cwd: repoDir }) log(`Committed and pushed ${highItems.length} new rules to Gitea`) return true } // ── Report ──────────────────────────────────────────────────────────────────── function saveReport(classified, committed) { const report = { date: TODAY, total_scanned: classified.length, high: classified.filter(i => i.level === 'HIGH'), medium: classified.filter(i => i.level === 'MEDIUM'), low: classified.filter(i => i.level === 'LOW'), skip: classified.filter(i => i.level === 'SKIP').length, committed, } const reportFile = join(LOG_DIR, `shieldx-report-${TODAY}.json`) writeFileSync(reportFile, JSON.stringify(report, null, 2)) log(`\n=== ShieldX Daily Security Monitor — ${TODAY} ===`) log(`Total scanned: ${report.total_scanned}`) log(`HIGH findings: ${report.high.length}`) for (const h of report.high) log(` → [HIGH] ${h.title.slice(0, 80)} (${h.ruleId})`) log(`MEDIUM findings: ${report.medium.length}`) for (const m of report.medium) log(` → [MED] ${m.title.slice(0, 80)}`) log(`LOW/SKIP: ${report.low.length + report.skip}`) log(`Rules committed: ${committed ? 'YES' : 'NO'}`) log(`Report saved: ${reportFile}`) } // ── Main ───────────────────────────────────────────────────────────────────── async function main() { log(`ShieldX arXiv Monitor starting — ${TODAY}`) if (!ANTHROPIC_API_KEY) { log('FATAL: ANTHROPIC_API_KEY not set. Add to /opt/scripts/.env') process.exit(1) } // 1. Fetch feeds const [csCR, csAI, hnItems] = await Promise.all([ fetchArxiv('cs.CR'), fetchArxiv('cs.AI'), fetchHackerNews(), ]) const allItems = [...csCR, ...csAI, ...hnItems] log(`Total items to classify: ${allItems.length}`) // 2. Classify via Claude Haiku const classified = await classifyItems(allItems) const highItems = classified.filter(i => i.level === 'HIGH') log(`Classification complete: ${highItems.length} HIGH, ${classified.filter(i=>i.level==='MEDIUM').length} MEDIUM`) // 3. For HIGH items: generate detection code let committed = false if (highItems.length > 0 && GITEA_TOKEN) { for (const item of highItems) { log(`Generating detection code for: ${item.title.slice(0, 60)}`) item.code = await generateDetectionCode(item) } const itemsWithCode = highItems.filter(i => i.code) if (itemsWithCode.length > 0) { try { const repoDir = await cloneOrPullShieldX() await appendToNewRulesFile(repoDir, itemsWithCode) const ok = await typecheck(repoDir) if (ok) { committed = await commitAndPush(repoDir, itemsWithCode) } } catch (e) { log(`ERROR: Git operations failed: ${e.message}`) } } } else if (highItems.length > 0 && !GITEA_TOKEN) { log('WARN: GITEA_TOKEN not set — HIGH findings detected but not committed') } // 4. Save report saveReport(classified, committed) } main().catch(e => { log(`FATAL: ${e.message}\n${e.stack}`) process.exit(1) })