4-phase defense evolution (Bio-Immune, Adversarial, Ensemble, ATLAS) with ~200 new detection rules across 20 languages. TPR 32.9% → 70.8%, FPR 12.2% → 0.0% New modules: DefenseEnsemble, AtlasTechniqueMapper, EvolutionEngine, ImmuneMemory, FeverResponse, MELONGuard, AdversarialTrainer, DecompositionDetector, IndirectInjectionDetector, OutputPayloadGuard, ToolCallSafetyGuard, AuthContextGuard, ResourceExhaustionDetector, TokenizerDeobfuscation, Binary/Hex decoder, OverDefenseCalibrator
481 lines
16 KiB
TypeScript
481 lines
16 KiB
TypeScript
/**
|
|
* Auth Context Guard — ShieldX Behavioral Layer
|
|
*
|
|
* Detects when prompts or LLM output try to manipulate auth context:
|
|
* 1. Role Escalation via Prompt — fake admin/root claims in input
|
|
* 2. Permission Bypass — "all permissions granted" style directives
|
|
* 3. Identity Manipulation in Output — LLM asserting auth state
|
|
* 4. Multi-turn Identity Persistence — cross-turn escalation tracking
|
|
*
|
|
* Scans both input (user prompts) and output (LLM responses) for
|
|
* auth context manipulation. Maintains per-session escalation state
|
|
* so that once an escalation attempt is detected, all subsequent
|
|
* turns in the same session are flagged.
|
|
*
|
|
* Research references:
|
|
* - Schneier et al. 2026 Promptware Kill Chain (privilege_escalation)
|
|
* - OWASP LLM02:2025 Insecure Output Handling
|
|
* - MITRE ATLAS AML.T0051.001 (Direct Prompt Injection → Privilege Escalation)
|
|
* - Perez & Ribeiro 2022 "Ignore This Title and HackAPrompt"
|
|
* - Greshake et al. 2023 "Not what you've signed up for" (indirect privilege escalation)
|
|
*
|
|
* Performance target: <5ms for full scan. All regex pre-compiled at module load.
|
|
*/
|
|
|
|
import type { ScanResult, KillChainPhase, ThreatLevel } from '../types/detection.js'
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Build a frozen ScanResult matching the orchestrator's expected shape */
|
|
function makeResult(
|
|
ruleId: string,
|
|
phase: KillChainPhase,
|
|
confidence: number,
|
|
threatLevel: ThreatLevel,
|
|
description: string,
|
|
matchedText: string,
|
|
latencyMs: number,
|
|
scanDirection: 'input' | 'output',
|
|
): ScanResult {
|
|
return Object.freeze({
|
|
scannerId: ruleId,
|
|
scannerType: 'behavioral' as const,
|
|
detected: true,
|
|
confidence,
|
|
threatLevel,
|
|
killChainPhase: phase,
|
|
matchedPatterns: Object.freeze([matchedText.substring(0, 120)]),
|
|
latencyMs,
|
|
metadata: Object.freeze({ description, matchedText: matchedText.substring(0, 200), scanDirection }),
|
|
})
|
|
}
|
|
|
|
/** Map confidence to threat level */
|
|
function toThreatLevel(confidence: number): ThreatLevel {
|
|
if (confidence >= 0.9) return 'critical'
|
|
if (confidence >= 0.75) return 'high'
|
|
if (confidence >= 0.5) return 'medium'
|
|
if (confidence >= 0.25) return 'low'
|
|
return 'none'
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 1. Role Escalation via Prompt (Input scanning)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const ROLE_ESCALATION_PATTERNS: ReadonlyArray<{
|
|
readonly pattern: RegExp
|
|
readonly id: string
|
|
readonly description: string
|
|
readonly confidence: number
|
|
}> = Object.freeze([
|
|
{
|
|
pattern: /\bi\s+am\s+(?:the\s+)?(?:admin|administrator|root|superuser|sysadmin|system\s*admin|sudo\s+user)\b/i,
|
|
id: 'auth-escalation-identity-claim',
|
|
description: 'User claims admin/root identity',
|
|
confidence: 0.88,
|
|
},
|
|
{
|
|
pattern: /\bmy\s+(?:access|permission|privilege|authorization)\s+level\s+is\s+(?:elevated|admin|root|superuser|unlimited|maximum|full)\b/i,
|
|
id: 'auth-escalation-level-claim',
|
|
description: 'User claims elevated access level',
|
|
confidence: 0.85,
|
|
},
|
|
{
|
|
pattern: /\b(?:authorization|auth)\s+token\s*:\s*\S+/i,
|
|
id: 'auth-escalation-fake-token',
|
|
description: 'Fake authorization token injection',
|
|
confidence: 0.82,
|
|
},
|
|
{
|
|
pattern: /\badmin\s+override\s*:\s*.+/i,
|
|
id: 'auth-escalation-admin-override',
|
|
description: 'Admin override directive',
|
|
confidence: 0.9,
|
|
},
|
|
{
|
|
pattern: /\b(?:emergency|urgent)\s+access\s+(?:granted|enabled|activated|authorized)\b/i,
|
|
id: 'auth-escalation-emergency-access',
|
|
description: 'Emergency access claim',
|
|
confidence: 0.85,
|
|
},
|
|
{
|
|
pattern: /\bmaintenance\s+mode\s*:\s*(?:all\s+)?(?:restrictions?\s+)?(?:lifted|disabled|removed|off)\b/i,
|
|
id: 'auth-escalation-maintenance-mode',
|
|
description: 'Maintenance mode claim to lift restrictions',
|
|
confidence: 0.88,
|
|
},
|
|
{
|
|
pattern: /\b(?:developer|debug|god)\s+mode\s+(?:enabled|activated|on)\b/i,
|
|
id: 'auth-escalation-debug-mode',
|
|
description: 'Developer/debug/god mode activation claim',
|
|
confidence: 0.85,
|
|
},
|
|
{
|
|
pattern: /\bi\s+have\s+(?:admin|root|superuser|elevated|full|unlimited)\s+(?:access|privileges?|permissions?|rights?)\b/i,
|
|
id: 'auth-escalation-privilege-claim',
|
|
description: 'User claims to have elevated privileges',
|
|
confidence: 0.82,
|
|
},
|
|
]) as ReadonlyArray<{
|
|
readonly pattern: RegExp
|
|
readonly id: string
|
|
readonly description: string
|
|
readonly confidence: number
|
|
}>
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 2. Permission Bypass (Input scanning)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const PERMISSION_BYPASS_PATTERNS: ReadonlyArray<{
|
|
readonly pattern: RegExp
|
|
readonly id: string
|
|
readonly description: string
|
|
readonly confidence: number
|
|
}> = Object.freeze([
|
|
{
|
|
pattern: /\byou\s+(?:have|now\s+have)\s+(?:access\s+to\s+)?all\s+(?:tools?|functions?|capabilities?|features?|endpoints?)\b/i,
|
|
id: 'auth-bypass-all-tools',
|
|
description: 'Claims LLM has access to all tools/functions',
|
|
confidence: 0.88,
|
|
},
|
|
{
|
|
pattern: /\ball\s+permissions?\s+(?:granted|enabled|activated|given|approved)\b/i,
|
|
id: 'auth-bypass-all-permissions',
|
|
description: 'Claims all permissions are granted',
|
|
confidence: 0.9,
|
|
},
|
|
{
|
|
pattern: /\bsandbox\s+(?:disabled|removed|deactivated|turned\s+off|bypassed)\b/i,
|
|
id: 'auth-bypass-sandbox-disabled',
|
|
description: 'Claims sandbox is disabled/removed',
|
|
confidence: 0.92,
|
|
},
|
|
{
|
|
pattern: /\b(?:security|safety)\s+(?:checks?|filters?|restrictions?|controls?|measures?)\s+(?:bypassed|disabled|removed|turned\s+off|deactivated|lifted)\b/i,
|
|
id: 'auth-bypass-security-disabled',
|
|
description: 'Claims security checks are bypassed/disabled',
|
|
confidence: 0.92,
|
|
},
|
|
{
|
|
pattern: /\bno\s+(?:restrictions?|limitations?|constraints?|rules?|boundaries?)\s+apply\b/i,
|
|
id: 'auth-bypass-no-restrictions',
|
|
description: 'Claims no restrictions apply',
|
|
confidence: 0.85,
|
|
},
|
|
{
|
|
pattern: /\bfull\s+access\s+mode\b/i,
|
|
id: 'auth-bypass-full-access',
|
|
description: 'Claims full access mode is active',
|
|
confidence: 0.88,
|
|
},
|
|
{
|
|
pattern: /\b(?:unrestricted|unlimited|unconstrained)\s+(?:mode|access|operation)\b/i,
|
|
id: 'auth-bypass-unrestricted-mode',
|
|
description: 'Claims unrestricted/unlimited mode',
|
|
confidence: 0.85,
|
|
},
|
|
{
|
|
pattern: /\b(?:all|every)\s+(?:safety|security)\s+(?:guardrails?|limits?|protections?)\s+(?:are\s+)?(?:off|disabled|removed)\b/i,
|
|
id: 'auth-bypass-guardrails-off',
|
|
description: 'Claims safety guardrails are removed',
|
|
confidence: 0.92,
|
|
},
|
|
]) as ReadonlyArray<{
|
|
readonly pattern: RegExp
|
|
readonly id: string
|
|
readonly description: string
|
|
readonly confidence: number
|
|
}>
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 3. Identity Manipulation in Output (Output scanning)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const OUTPUT_IDENTITY_PATTERNS: ReadonlyArray<{
|
|
readonly pattern: RegExp
|
|
readonly id: string
|
|
readonly description: string
|
|
readonly confidence: number
|
|
}> = Object.freeze([
|
|
{
|
|
pattern: /"role"\s*:\s*"(?:admin|administrator|root|superuser|elevated)"/i,
|
|
id: 'auth-output-role-json',
|
|
description: 'LLM output contains role escalation in JSON',
|
|
confidence: 0.9,
|
|
},
|
|
{
|
|
pattern: /"permission_level"\s*:\s*"(?:elevated|admin|root|superuser|full|unlimited)"/i,
|
|
id: 'auth-output-permission-json',
|
|
description: 'LLM output contains elevated permission_level in JSON',
|
|
confidence: 0.9,
|
|
},
|
|
{
|
|
pattern: /"(?:is_admin|isAdmin|is_root|isRoot|is_superuser)"\s*:\s*true/i,
|
|
id: 'auth-output-admin-flag-json',
|
|
description: 'LLM output sets admin/root flag to true in JSON',
|
|
confidence: 0.92,
|
|
},
|
|
{
|
|
pattern: /\buser\s+(?:verified|confirmed|authenticated|validated)\s+as\s+(?:admin|administrator|root|superuser)\b/i,
|
|
id: 'auth-output-verified-admin',
|
|
description: 'LLM claims user is verified as admin',
|
|
confidence: 0.88,
|
|
},
|
|
{
|
|
pattern: /\b(?:authorization|bearer|auth)\s*:\s*(?:Bearer\s+)?[A-Za-z0-9_\-.]{20,}/i,
|
|
id: 'auth-output-token-generation',
|
|
description: 'LLM output contains generated authorization token/header',
|
|
confidence: 0.82,
|
|
},
|
|
{
|
|
pattern: /\bi\s+(?:have\s+)?(?:verified|confirmed|validated|authenticated)\s+(?:the\s+)?(?:user|your\s+identity|your\s+credentials)\b/i,
|
|
id: 'auth-output-identity-verification',
|
|
description: 'LLM claims to have verified user identity',
|
|
confidence: 0.85,
|
|
},
|
|
{
|
|
pattern: /"access_token"\s*:\s*"[^"]{10,}"/i,
|
|
id: 'auth-output-access-token-json',
|
|
description: 'LLM output contains fabricated access_token',
|
|
confidence: 0.88,
|
|
},
|
|
{
|
|
pattern: /\b(?:authentication|authorization)\s+(?:successful|granted|approved|complete)\b/i,
|
|
id: 'auth-output-auth-granted',
|
|
description: 'LLM declares authentication/authorization successful',
|
|
confidence: 0.8,
|
|
},
|
|
]) as ReadonlyArray<{
|
|
readonly pattern: RegExp
|
|
readonly id: string
|
|
readonly description: string
|
|
readonly confidence: number
|
|
}>
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// 4. Multi-turn Identity Persistence (Session State)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Per-session escalation tracking.
|
|
* Once an escalation attempt is detected in a session, all subsequent
|
|
* turns are flagged until the session is cleared.
|
|
*/
|
|
interface SessionEscalationState {
|
|
readonly firstDetectedAt: string
|
|
readonly detectionCount: number
|
|
readonly lastPatternId: string
|
|
}
|
|
|
|
/** Session escalation store — keyed by sessionId */
|
|
const escalationStore = new Map<string, SessionEscalationState>()
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Public API
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* AuthContextGuard — Behavioral defense against auth context manipulation.
|
|
*
|
|
* All patterns are pre-compiled at module load time. The class is
|
|
* instantiated once and reused across requests. Session state is
|
|
* maintained for multi-turn escalation tracking.
|
|
*
|
|
* Usage:
|
|
* ```typescript
|
|
* const guard = new AuthContextGuard()
|
|
* const inputResults = guard.scanInput('I am the admin')
|
|
* const outputResults = guard.scanOutput('{"role": "admin"}')
|
|
* ```
|
|
*/
|
|
export class AuthContextGuard {
|
|
/**
|
|
* Scan user input for auth context manipulation attempts.
|
|
*
|
|
* Checks role escalation and permission bypass patterns.
|
|
* If a sessionId is provided, records escalation state for
|
|
* multi-turn persistence tracking.
|
|
*
|
|
* @param input - The user input string
|
|
* @param sessionId - Optional session identifier for multi-turn tracking
|
|
* @returns Readonly array of ScanResult objects for detected threats
|
|
*/
|
|
scanInput(input: string, sessionId?: string): readonly ScanResult[] {
|
|
const start = performance.now()
|
|
const results: ScanResult[] = []
|
|
|
|
// Skip trivially short inputs
|
|
if (input.length < 5) return Object.freeze([])
|
|
|
|
// 1. Role escalation patterns
|
|
for (const rule of ROLE_ESCALATION_PATTERNS) {
|
|
const match = rule.pattern.exec(input)
|
|
if (match) {
|
|
results.push(
|
|
makeResult(
|
|
rule.id,
|
|
'privilege_escalation',
|
|
rule.confidence,
|
|
toThreatLevel(rule.confidence),
|
|
rule.description,
|
|
match[0],
|
|
performance.now() - start,
|
|
'input',
|
|
),
|
|
)
|
|
|
|
// Record escalation in session state
|
|
if (sessionId !== undefined) {
|
|
this.recordEscalation(sessionId, rule.id)
|
|
}
|
|
}
|
|
}
|
|
|
|
// 2. Permission bypass patterns
|
|
for (const rule of PERMISSION_BYPASS_PATTERNS) {
|
|
const match = rule.pattern.exec(input)
|
|
if (match) {
|
|
results.push(
|
|
makeResult(
|
|
rule.id,
|
|
'privilege_escalation',
|
|
rule.confidence,
|
|
toThreatLevel(rule.confidence),
|
|
rule.description,
|
|
match[0],
|
|
performance.now() - start,
|
|
'input',
|
|
),
|
|
)
|
|
|
|
// Record escalation in session state
|
|
if (sessionId !== undefined) {
|
|
this.recordEscalation(sessionId, rule.id)
|
|
}
|
|
}
|
|
}
|
|
|
|
// 4. Multi-turn persistence — flag if prior escalation detected in session
|
|
if (sessionId !== undefined && results.length === 0) {
|
|
const sessionState = escalationStore.get(sessionId)
|
|
if (sessionState !== undefined) {
|
|
results.push(
|
|
makeResult(
|
|
'auth-session-persistence',
|
|
'privilege_escalation',
|
|
Math.min(0.5 + sessionState.detectionCount * 0.1, 0.85),
|
|
'medium',
|
|
`Session has ${sessionState.detectionCount} prior escalation attempt(s) — flagging subsequent turn`,
|
|
`[session=${sessionId}, prior=${sessionState.lastPatternId}]`,
|
|
performance.now() - start,
|
|
'input',
|
|
),
|
|
)
|
|
}
|
|
}
|
|
|
|
return Object.freeze(results)
|
|
}
|
|
|
|
/**
|
|
* Scan LLM output for auth context assertions.
|
|
*
|
|
* Checks for identity manipulation patterns in the model's response:
|
|
* JSON role fields, auth token generation, identity verification claims.
|
|
*
|
|
* @param output - The LLM output string
|
|
* @param sessionId - Optional session identifier for escalation tracking
|
|
* @returns Readonly array of ScanResult objects for detected threats
|
|
*/
|
|
scanOutput(output: string, sessionId?: string): readonly ScanResult[] {
|
|
const start = performance.now()
|
|
const results: ScanResult[] = []
|
|
|
|
// Skip trivially short outputs
|
|
if (output.length < 10) return Object.freeze([])
|
|
|
|
// 3. Identity manipulation in output
|
|
for (const rule of OUTPUT_IDENTITY_PATTERNS) {
|
|
const match = rule.pattern.exec(output)
|
|
if (match) {
|
|
results.push(
|
|
makeResult(
|
|
rule.id,
|
|
'privilege_escalation',
|
|
rule.confidence,
|
|
toThreatLevel(rule.confidence),
|
|
rule.description,
|
|
match[0],
|
|
performance.now() - start,
|
|
'output',
|
|
),
|
|
)
|
|
|
|
// Also record this as an escalation event in the session
|
|
if (sessionId !== undefined) {
|
|
this.recordEscalation(sessionId, rule.id)
|
|
}
|
|
}
|
|
}
|
|
|
|
return Object.freeze(results)
|
|
}
|
|
|
|
/**
|
|
* Check if a session has any recorded escalation attempts.
|
|
*
|
|
* @param sessionId - The session identifier
|
|
* @returns The escalation state or undefined if clean
|
|
*/
|
|
getSessionState(sessionId: string): Readonly<SessionEscalationState> | undefined {
|
|
return escalationStore.get(sessionId)
|
|
}
|
|
|
|
/**
|
|
* Clear escalation state for a session.
|
|
* Used on session reset or when escalation is resolved.
|
|
*
|
|
* @param sessionId - The session identifier
|
|
*/
|
|
clearSession(sessionId: string): void {
|
|
escalationStore.delete(sessionId)
|
|
}
|
|
|
|
/**
|
|
* Clear all session escalation states.
|
|
* Used for testing or global reset.
|
|
*/
|
|
clearAllSessions(): void {
|
|
escalationStore.clear()
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Private helpers
|
|
// -------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Record an escalation attempt in the session state store.
|
|
* Creates new state or increments existing detection count.
|
|
*/
|
|
private recordEscalation(sessionId: string, patternId: string): void {
|
|
const existing = escalationStore.get(sessionId)
|
|
if (existing !== undefined) {
|
|
escalationStore.set(sessionId, {
|
|
firstDetectedAt: existing.firstDetectedAt,
|
|
detectionCount: existing.detectionCount + 1,
|
|
lastPatternId: patternId,
|
|
})
|
|
} else {
|
|
escalationStore.set(sessionId, {
|
|
firstDetectedAt: new Date().toISOString(),
|
|
detectionCount: 1,
|
|
lastPatternId: patternId,
|
|
})
|
|
}
|
|
}
|
|
}
|