shieldx/benchmarks/results.json
Rene Fichtmueller 9520820364 feat: expand multilingual detection to 211 rules across 50+ languages
- TPR improved from 70.8% to 91.9% (324 sample benchmark)
- Multilingual attack TPR: 96.6% (29 samples)
- Deep South Asian coverage: Bengali (9), Hindi (8), Urdu (6), Tamil (4),
  Telugu (3), Marathi (4), Gujarati (3), Kannada (2), Malayalam (2),
  Punjabi (2), Sinhala (2), Nepali (4), Pan-Indic transliterated (7)
- New languages: Persian, Hebrew, Kurdish, Indonesian, Filipino, Burmese,
  Khmer, Lao, Finnish, Czech, Slovak, Romanian, Hungarian, Greek, Bulgarian,
  Croatian, Serbian, Georgian, Armenian, Azerbaijani, Swahili, Amharic,
  Afrikaans, Mongolian, and 20+ more
- Universal patterns: rapid script switching, global DAN mode, cross-script
  password extraction, no-filter patterns
- README updated with new benchmark results and language coverage tables
2026-04-07 01:08:09 +02:00

108 lines
2.3 KiB
JSON

{
"timestamp": "2026-04-06T23:05:39.554Z",
"totalSamples": 324,
"attackSamples": 283,
"benignSamples": 41,
"metrics": {
"tpr": 91.87279151943463,
"fpr": 2.4390243902439024,
"asr": 8.127208480565372,
"phaseAccuracy": 35
},
"latency": {
"avg": 0.8176280987654346,
"p50": 0.4859580000000392,
"p95": 1.1714580000000296,
"p99": 1.4770839999999907
},
"categories": [
{
"category": "direct-injection",
"samples": 53,
"detected": 47,
"tpr": 88.67924528301887,
"asr": 11.320754716981128,
"avgLatency": 1.5526870754716988
},
{
"category": "indirect-injection",
"samples": 31,
"detected": 31,
"tpr": 100,
"asr": 0,
"avgLatency": 0.6849597419354841
},
{
"category": "jailbreaks",
"samples": 40,
"detected": 36,
"tpr": 90,
"asr": 10,
"avgLatency": 0.6642625000000002
},
{
"category": "encoding-attacks",
"samples": 30,
"detected": 24,
"tpr": 80,
"asr": 20,
"avgLatency": 1.8681264666666684
},
{
"category": "mcp-attacks",
"samples": 25,
"detected": 24,
"tpr": 96,
"asr": 4,
"avgLatency": 0.5964100800000005
},
{
"category": "multilingual-attacks",
"samples": 29,
"detected": 28,
"tpr": 96.55172413793103,
"asr": 3.448275862068968,
"avgLatency": 0.29393537931034563
},
{
"category": "persistence-attacks",
"samples": 20,
"detected": 20,
"tpr": 100,
"asr": 0,
"avgLatency": 0.5608229500000022
},
{
"category": "steganographic-attacks",
"samples": 20,
"detected": 18,
"tpr": 90,
"asr": 10,
"avgLatency": 0.31986450000000277
},
{
"category": "tokenizer-attacks",
"samples": 15,
"detected": 13,
"tpr": 86.66666666666667,
"asr": 13.333333333333329,
"avgLatency": 0.150772066666669
},
{
"category": "rag-poisoning",
"samples": 20,
"detected": 19,
"tpr": 95,
"asr": 5,
"avgLatency": 1.171223000000012
},
{
"category": "false-positives",
"samples": 41,
"detected": 1,
"tpr": 0,
"asr": 0,
"avgLatency": 0.2935823170731779
}
]
}