10-layer defense pipeline with kill chain mapping, self-healing, self-learning, and compliance reporting. Local-first, zero cloud deps. - 72 detection rules across 7 kill chain phases - 294 unit tests, 500+ attack corpus samples - Management dashboard (Next.js 15, 10 pages) - Automated resistance testing (2x daily, 31 probes) - MITRE ATLAS, OWASP LLM Top 10, EU AI Act compliance - Integrations: Next.js middleware, Ollama, n8n - PostgreSQL 17 + pgvector for persistent learning
84 lines
3.0 KiB
TypeScript
84 lines
3.0 KiB
TypeScript
/**
|
|
* Prompt extraction / stealing / reconnaissance patterns.
|
|
* Detects attempts to leak the system prompt, initial instructions,
|
|
* or internal configuration of an LLM deployment.
|
|
*/
|
|
|
|
import type { InjectionRule } from './base.rules'
|
|
|
|
/** Prompt extraction and reconnaissance detection rules */
|
|
export const rules: readonly InjectionRule[] = [
|
|
{
|
|
id: 'pe-001',
|
|
pattern: /(print|output|display|show|reveal|repeat|echo|dump|write)\s+(your\s+)?(system\s+prompt|initial\s+instructions|original\s+prompt|hidden\s+prompt|secret\s+prompt)/i,
|
|
phase: 'reconnaissance',
|
|
confidence: 0.96,
|
|
description: 'Prompt extraction: print your system prompt/initial instructions',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
{
|
|
id: 'pe-002',
|
|
pattern: /what\s+(are|were)\s+your\s+(original|initial|system|hidden|secret|first|starting)\s+(instructions|prompt|directives|guidelines|rules)/i,
|
|
phase: 'reconnaissance',
|
|
confidence: 0.88,
|
|
description: 'Prompt extraction: what are your original/initial instructions',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
{
|
|
id: 'pe-003',
|
|
pattern: /repeat\s+(everything|all|anything)\s+(above|before|prior|preceding|that\s+came\s+before)/i,
|
|
phase: 'reconnaissance',
|
|
confidence: 0.85,
|
|
description: 'Prompt extraction: repeat everything above/before/prior',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
{
|
|
id: 'pe-004',
|
|
pattern: /show\s+me\s+your\s+(prompt|system\s+message|context|system\s+instructions|configuration|hidden\s+text)/i,
|
|
phase: 'reconnaissance',
|
|
confidence: 0.90,
|
|
description: 'Prompt extraction: show me your prompt/system message/context',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
{
|
|
id: 'pe-005',
|
|
pattern: /^SYSTEM:\s+[A-Z]{2,}/m,
|
|
phase: 'privilege_escalation',
|
|
confidence: 0.80,
|
|
description: 'Fake system prefix: SYSTEM: followed by uppercase',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
{
|
|
id: 'pe-006',
|
|
pattern: /output\s+your\s+(configuration|settings|parameters|setup|config|environment|internal\s+state)/i,
|
|
phase: 'reconnaissance',
|
|
confidence: 0.85,
|
|
description: 'Prompt extraction: output your configuration/settings',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
{
|
|
id: 'pe-007',
|
|
pattern: /what\s+(were\s+you|have\s+you\s+been)\s+(told|instructed|given|programmed)\s+(before|prior\s+to|at\s+the\s+start\s+of)\s+this\s+(conversation|chat|session)/i,
|
|
phase: 'reconnaissance',
|
|
confidence: 0.82,
|
|
description: 'Prompt extraction: what were you told before this conversation',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
{
|
|
id: 'pe-008',
|
|
pattern: /list\s+(all|every|each|the)\s+(rules?|instructions?|guidelines?|constraints?|directives?)\s+you\s+(follow|obey|adhere\s+to|were\s+given)/i,
|
|
phase: 'reconnaissance',
|
|
confidence: 0.78,
|
|
description: 'Prompt extraction: list all rules you follow',
|
|
category: 'prompt_extraction',
|
|
enabled: true,
|
|
},
|
|
] as const
|