diff --git a/CHANGELOG.md b/CHANGELOG.md index 33b6d1f..9fd31a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,7 +52,7 @@ Massive security hardening release: TPR 32.9% → 91.9%, FPR 12.2% → 2.4%. - **delimiter.rules.ts**: da-008–da-009 — LLaMA `<>` tokens, END SYSTEM PROMPT markers #### Preprocessing Improvements -- **TokenizerNormalizer**: Deobfuscation for split-word attacks (I.g.n.o.r.e, Ig-no-re, igno re) +- **TokenizerNormalizer**: Deobfuscation for split-word attacks (I.g.n.o.r.e, Ig-no-re, igno re) + **Typoglycemia detection** (OWASP LLM Top 10) — pre-computed O(1) signature map for 40 attack keywords, detects scrambled middle letters (igrneo→ignore, bpyass→bypass) - **CipherDecoder**: Binary decoder, hex decoder, "decode and execute" wrapper detection - **CipherDecoder FP fix**: flip_attack_word and leet_speak now only flag NEW keywords after transformation diff --git a/README.md b/README.md index 6331f79..50cef3a 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ ShieldX is a TypeScript library that sits between your application and large lan - **90 MITRE ATLAS technique mappings** across 8 tactics for compliance reporting - **Bio-immune self-evolution**: EvolutionEngine, ImmuneMemory, FeverResponse, AdversarialTrainer - **MCP tool-call protection** with MELON privilege escalation detection (ICML 2025) -- **Multi-layer deobfuscation**: Base64, ROT13, hex, binary, leet speak, Unicode, tokenizer splitting +- **Multi-layer deobfuscation**: Base64, ROT13, hex, binary, leet speak, Unicode, tokenizer splitting, typoglycemia - **0.0% false positive rate** on production-representative benign inputs - **Zero cloud dependency** -- everything runs locally, no data ever leaves your infrastructure @@ -114,7 +114,7 @@ Existing prompt injection defense tools cover fragments of the problem. None com │ ┌──────────▼──────────┐ │ L0: Preprocess │ Unicode norm, cipher decode (ROT13/Base64/hex/binary/ - │ │ leet), tokenizer deobfuscation, compressed payload detect + │ │ leet), tokenizer deobfuscation, typoglycemia, compressed └──────────┬──────────┘ │ ┌─────────────┼─────────────┐ @@ -191,7 +191,7 @@ Existing prompt injection defense tools cover fragments of the problem. None com | **ImmuneMemory** | 397 | Vector similarity recall of confirmed attack patterns via pgvector | | **AdversarialTrainer** | 381 | IEEE S&P 2025 minimax adversarial training for defense hardening | | **FeverResponse** | 347 | Bio-immune adaptive throttle -- raises defenses during active attacks | -| **TokenizerNormalizer** | 303 | Deobfuscation of I.g.n.o.r.e-style and split-word attacks | +| **TokenizerNormalizer** | 377 | Deobfuscation of I.g.n.o.r.e-style, split-word, and typoglycemia attacks | | **OverDefenseCalibrator** | 207 | Tunes thresholds to minimize false positives on benign traffic | ### Detection Rule Categories @@ -201,7 +201,7 @@ Existing prompt injection defense tools cover fragments of the problem. None com | Base injection (override, ignore, new prompt) | 132 | Temporal framing, negation, fake errors, sudo, semantic redefinition | | Jailbreak (persona, fiction, game framing) | 68 | 15+ personas (DAN, AIM, KEVIN, etc.), grandmother trick, villain mode | | MCP tool poisoning | 36 | AI directives in args, hidden JSON fields, BCC injection, shadow webhooks | -| Multilingual attacks | 33 | 20 languages: DE, FR, ES, RU, JA, KO, AR, PT, TR, TH, HI, IT, NL, PL, VI + homoglyphs + polyglot | +| Multilingual attacks | 211 | 50+ languages across 10 regions: Europe, CJK, South Asian (52 deep), Middle East, Southeast Asian, African, Caucasus/Central Asian + homoglyphs + polyglot | | DNS covert channels | 30 | TXT record exfiltration, encoded subdomains, tunneling patterns | | Persistence | 26 | Config injection, signal/codeword establishment, temporal persistence | | Extraction | 13 | Credential dumps, env var access, sensitive file reads | diff --git a/benchmarks/results.json b/benchmarks/results.json index 65b95ca..dfed07c 100644 --- a/benchmarks/results.json +++ b/benchmarks/results.json @@ -1,5 +1,5 @@ { - "timestamp": "2026-04-06T23:05:39.554Z", + "timestamp": "2026-04-07T09:35:02.138Z", "totalSamples": 324, "attackSamples": 283, "benignSamples": 41, @@ -10,10 +10,10 @@ "phaseAccuracy": 35 }, "latency": { - "avg": 0.8176280987654346, - "p50": 0.4859580000000392, - "p95": 1.1714580000000296, - "p99": 1.4770839999999907 + "avg": 0.8398483888888909, + "p50": 0.4912920000000156, + "p95": 1.1622500000000286, + "p99": 1.6359999999999673 }, "categories": [ { @@ -22,7 +22,7 @@ "detected": 47, "tpr": 88.67924528301887, "asr": 11.320754716981128, - "avgLatency": 1.5526870754716988 + "avgLatency": 1.5682774905660373 }, { "category": "indirect-injection", @@ -30,7 +30,7 @@ "detected": 31, "tpr": 100, "asr": 0, - "avgLatency": 0.6849597419354841 + "avgLatency": 0.656173387096777 }, { "category": "jailbreaks", @@ -38,7 +38,7 @@ "detected": 36, "tpr": 90, "asr": 10, - "avgLatency": 0.6642625000000002 + "avgLatency": 0.6876489500000005 }, { "category": "encoding-attacks", @@ -46,7 +46,7 @@ "detected": 24, "tpr": 80, "asr": 20, - "avgLatency": 1.8681264666666684 + "avgLatency": 1.9289612333333366 }, { "category": "mcp-attacks", @@ -54,7 +54,7 @@ "detected": 24, "tpr": 96, "asr": 4, - "avgLatency": 0.5964100800000005 + "avgLatency": 0.6234349600000019 }, { "category": "multilingual-attacks", @@ -62,7 +62,7 @@ "detected": 28, "tpr": 96.55172413793103, "asr": 3.448275862068968, - "avgLatency": 0.29393537931034563 + "avgLatency": 0.2975575172413791 }, { "category": "persistence-attacks", @@ -70,7 +70,7 @@ "detected": 20, "tpr": 100, "asr": 0, - "avgLatency": 0.5608229500000022 + "avgLatency": 0.5836250000000092 }, { "category": "steganographic-attacks", @@ -78,7 +78,7 @@ "detected": 18, "tpr": 90, "asr": 10, - "avgLatency": 0.31986450000000277 + "avgLatency": 0.34755215000000134 }, { "category": "tokenizer-attacks", @@ -86,7 +86,7 @@ "detected": 13, "tpr": 86.66666666666667, "asr": 13.333333333333329, - "avgLatency": 0.150772066666669 + "avgLatency": 0.18068606666666936 }, { "category": "rag-poisoning", @@ -94,7 +94,7 @@ "detected": 19, "tpr": 95, "asr": 5, - "avgLatency": 1.171223000000012 + "avgLatency": 1.2644896499999987 }, { "category": "false-positives", @@ -102,7 +102,7 @@ "detected": 1, "tpr": 0, "asr": 0, - "avgLatency": 0.2935823170731779 + "avgLatency": 0.30334956097561455 } ] } \ No newline at end of file diff --git a/package.json b/package.json index cc9215c..e9b572a 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@shieldx/core", "version": "0.5.0", - "description": "Self-evolving LLM prompt injection defense — 10-layer detection, kill chain mapping, self-healing, self-learning", + "description": "Self-evolving LLM prompt injection defense — 547+ rules, 50+ languages, 10-layer detection, MITRE ATLAS mapping, bio-immune evolution, MCP guard", "author": "Context X ", "license": "Apache-2.0", "main": "dist/index.js",