Compare commits
No commits in common. "c50af6338953b2ebcd0aae67ec499245134dacd2" and "81233433618009296adb7def5e1ed6056d61fe0f" have entirely different histories.
c50af63389
...
8123343361
1014
knowledge/fixes.json
1014
knowledge/fixes.json
File diff suppressed because it is too large
Load Diff
37
package-lock.json
generated
37
package-lock.json
generated
@ -11,38 +11,6 @@
|
|||||||
"packages/*"
|
"packages/*"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"../../../shieldx": {
|
|
||||||
"name": "@shieldx/core",
|
|
||||||
"version": "0.5.0",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"dependencies": {
|
|
||||||
"pg": "^8.13.0",
|
|
||||||
"pgvector": "^0.2.0",
|
|
||||||
"pino": "^9.6.0",
|
|
||||||
"zod": "^3.24.0"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/node": "^22.0.0",
|
|
||||||
"@types/pg": "^8.11.0",
|
|
||||||
"@vitest/coverage-v8": "^3.0.0",
|
|
||||||
"eslint": "^9.0.0",
|
|
||||||
"tsup": "^8.3.0",
|
|
||||||
"tsx": "^4.19.0",
|
|
||||||
"typescript": "^5.7.0",
|
|
||||||
"vitest": "^3.0.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=20.0.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"next": ">=15.0.0"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"next": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@esbuild/aix-ppc64": {
|
"node_modules/@esbuild/aix-ppc64": {
|
||||||
"version": "0.27.7",
|
"version": "0.27.7",
|
||||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz",
|
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz",
|
||||||
@ -967,10 +935,6 @@
|
|||||||
"win32"
|
"win32"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@shieldx/core": {
|
|
||||||
"resolved": "../../../shieldx",
|
|
||||||
"link": true
|
|
||||||
},
|
|
||||||
"node_modules/@types/estree": {
|
"node_modules/@types/estree": {
|
||||||
"version": "1.0.8",
|
"version": "1.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
|
||||||
@ -3461,7 +3425,6 @@
|
|||||||
"@fastify/cors": "^9.0.1",
|
"@fastify/cors": "^9.0.1",
|
||||||
"@fastify/helmet": "^11.1.1",
|
"@fastify/helmet": "^11.1.1",
|
||||||
"@fastify/rate-limit": "^9.1.0",
|
"@fastify/rate-limit": "^9.1.0",
|
||||||
"@shieldx/core": "file:../../../../../shieldx",
|
|
||||||
"ajv": "^8.17.1",
|
"ajv": "^8.17.1",
|
||||||
"fastify": "^4.28.1",
|
"fastify": "^4.28.1",
|
||||||
"franc": "^6.2.0",
|
"franc": "^6.2.0",
|
||||||
|
|||||||
@ -35,7 +35,6 @@ const ALLOWED_COMMANDS = new Set([
|
|||||||
'/usr/sbin/systemctl',
|
'/usr/sbin/systemctl',
|
||||||
'/usr/bin/sync',
|
'/usr/bin/sync',
|
||||||
'/bin/sync',
|
'/bin/sync',
|
||||||
'/usr/bin/ssh',
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
async function safeExec(
|
async function safeExec(
|
||||||
@ -70,13 +69,7 @@ async function findPm2(): Promise<string | null> {
|
|||||||
|
|
||||||
// ─── 1. PM2 processes ────────────────────────────────────────────────────────
|
// ─── 1. PM2 processes ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
const PM2_REQUIRED_PROCESSES = [
|
const PM2_REQUIRED_PROCESSES = ['llm-gateway', 'llm-learning'];
|
||||||
'llm-gateway', 'llm-learning',
|
|
||||||
'magatama', 'magatama-dashboard',
|
|
||||||
'tip-api', 'tip-scraper-daemon',
|
|
||||||
'peercortex', 'eo-global-pulse',
|
|
||||||
'ghost-blog', 'nognet',
|
|
||||||
];
|
|
||||||
|
|
||||||
async function checkPm2(): Promise<CheckResult> {
|
async function checkPm2(): Promise<CheckResult> {
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
@ -121,40 +114,10 @@ async function healPm2(diagnosis: string): Promise<HealResult> {
|
|||||||
const pm2 = await findPm2();
|
const pm2 = await findPm2();
|
||||||
if (!pm2) return { action_taken: 'pm2 not found — cannot restart', success: false };
|
if (!pm2) return { action_taken: 'pm2 not found — cannot restart', success: false };
|
||||||
|
|
||||||
// Only restart processes that are actually offline — avoid pm2 restart all
|
const { stdout, stderr } = await safeExec(pm2, ['restart', 'all']);
|
||||||
const { stdout: jlist } = await safeExec(pm2, ['jlist']);
|
|
||||||
let processes: Array<{ name: string; pm2_env?: { status?: string } }> = [];
|
|
||||||
try { processes = JSON.parse(jlist) as typeof processes; } catch { /* ignore */ }
|
|
||||||
|
|
||||||
const offline = PM2_REQUIRED_PROCESSES.filter((name) => {
|
|
||||||
const proc = processes.find((p) => p.name === name);
|
|
||||||
return !proc || proc.pm2_env?.status !== 'online';
|
|
||||||
});
|
|
||||||
|
|
||||||
if (offline.length === 0) {
|
|
||||||
return { action_taken: 'no offline processes found — skipping restart', success: true };
|
|
||||||
}
|
|
||||||
|
|
||||||
const results: string[] = [];
|
|
||||||
for (const name of offline) {
|
|
||||||
const { stdout, stderr, success } = await safeExec(pm2, ['restart', name]);
|
|
||||||
results.push(`${name}: ${success ? 'restarted' : stderr.slice(0, 80)}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const output = results.join('; ');
|
|
||||||
logger.info({ diagnosis: diagnosis.slice(0, 120), output, offline }, 'PM2 targeted restart executed');
|
|
||||||
return { action_taken: `pm2 restart ${offline.join(', ')}`, success: true, output };
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── pm2-aware targeted restart (shared helper) ───────────────────────────────
|
|
||||||
|
|
||||||
async function restartProcess(name: string): Promise<HealResult> {
|
|
||||||
const pm2 = await findPm2();
|
|
||||||
if (!pm2) return { action_taken: 'pm2 not found', success: false };
|
|
||||||
const { stdout, stderr, success } = await safeExec(pm2, ['restart', name]);
|
|
||||||
const output = `${stdout}\n${stderr}`.trim();
|
const output = `${stdout}\n${stderr}`.trim();
|
||||||
logger.info({ name, success, output: output.slice(0, 200) }, 'PM2 targeted process restart');
|
logger.info({ diagnosis: diagnosis.slice(0, 120), output: output.slice(0, 200) }, 'PM2 restart executed');
|
||||||
return { action_taken: `pm2 restart ${name}`, success, output };
|
return { action_taken: 'pm2 restart all', success: true, output };
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── 2. PostgreSQL ────────────────────────────────────────────────────────────
|
// ─── 2. PostgreSQL ────────────────────────────────────────────────────────────
|
||||||
@ -438,237 +401,6 @@ async function healWireGuard(_diagnosis: string): Promise<HealResult> {
|
|||||||
return { action_taken: 'systemctl restart wg-quick@wg0', success, output };
|
return { action_taken: 'systemctl restart wg-quick@wg0', success, output };
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── 9. Service port health checks ───────────────────────────────────────────
|
|
||||||
// For each critical service, performs a real HTTP check on the local port.
|
|
||||||
// This catches crash-loops where PM2 shows "online" but the port is not responding.
|
|
||||||
|
|
||||||
interface ServiceDef {
|
|
||||||
name: string; // PM2 process name to restart on failure
|
|
||||||
port: number;
|
|
||||||
path: string; // health endpoint path
|
|
||||||
okStatus?: number[];// accepted HTTP status codes (default: <500)
|
|
||||||
}
|
|
||||||
|
|
||||||
const SERVICES: ServiceDef[] = [
|
|
||||||
{ name: 'magatama', port: 3210, path: '/' },
|
|
||||||
{ name: 'magatama-dashboard', port: 3211, path: '/' },
|
|
||||||
{ name: 'magatama-admin', port: 3212, path: '/' },
|
|
||||||
{ name: 'tip-api', port: 3201, path: '/api/health' },
|
|
||||||
{ name: 'peercortex', port: 3101, path: '/' },
|
|
||||||
{ name: 'llm-gateway', port: 3103, path: '/health' },
|
|
||||||
{ name: 'eo-global-pulse', port: 3000, path: '/' },
|
|
||||||
{ name: 'nognet', port: 3001, path: '/' },
|
|
||||||
{ name: 'ghost-blog', port: 2368, path: '/' },
|
|
||||||
{ name: 'switchblade', port: 3334, path: '/' },
|
|
||||||
];
|
|
||||||
|
|
||||||
async function probePort(service: ServiceDef): Promise<{ ok: boolean; status?: number; error?: string }> {
|
|
||||||
const controller = new AbortController();
|
|
||||||
const timer = setTimeout(() => controller.abort(), 8_000);
|
|
||||||
try {
|
|
||||||
const res = await fetch(`http://localhost:${service.port}${service.path}`, {
|
|
||||||
signal: controller.signal,
|
|
||||||
cache: 'no-store',
|
|
||||||
redirect: 'manual',
|
|
||||||
} as RequestInit);
|
|
||||||
const acceptedCodes = service.okStatus ?? [];
|
|
||||||
const ok = acceptedCodes.length > 0
|
|
||||||
? acceptedCodes.includes(res.status)
|
|
||||||
: res.status < 500 || (res.status >= 300 && res.status < 400);
|
|
||||||
return { ok, status: res.status };
|
|
||||||
} catch (err) {
|
|
||||||
return { ok: false, error: err instanceof Error ? err.message : String(err) };
|
|
||||||
} finally {
|
|
||||||
clearTimeout(timer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkServicePorts(): Promise<CheckResult> {
|
|
||||||
const start = Date.now();
|
|
||||||
const results = await Promise.all(SERVICES.map(async (svc) => ({ svc, probe: await probePort(svc) })));
|
|
||||||
const latency_ms = Date.now() - start;
|
|
||||||
|
|
||||||
const failing = results.filter((r) => !r.probe.ok);
|
|
||||||
|
|
||||||
if (failing.length === 0) {
|
|
||||||
return { healthy: true, message: `All ${SERVICES.length} service ports are responding`, latency_ms };
|
|
||||||
}
|
|
||||||
|
|
||||||
const details = Object.fromEntries(
|
|
||||||
failing.map((r) => [r.svc.name, r.probe.status ?? r.probe.error ?? 'no response']),
|
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
|
||||||
healthy: false,
|
|
||||||
message: `${failing.length} service(s) not responding: ${failing.map((r) => r.svc.name).join(', ')}`,
|
|
||||||
details,
|
|
||||||
latency_ms,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async function healServicePorts(diagnosis: string): Promise<HealResult> {
|
|
||||||
// Re-probe to find which services are currently failing
|
|
||||||
const results = await Promise.all(SERVICES.map(async (svc) => ({ svc, probe: await probePort(svc) })));
|
|
||||||
const failing = results.filter((r) => !r.probe.ok);
|
|
||||||
|
|
||||||
if (failing.length === 0) {
|
|
||||||
return { action_taken: 'services recovered on re-check — no restart needed', success: true };
|
|
||||||
}
|
|
||||||
|
|
||||||
const healResults: string[] = [];
|
|
||||||
for (const { svc } of failing) {
|
|
||||||
const result = await restartProcess(svc.name);
|
|
||||||
healResults.push(`${svc.name}: ${result.success ? 'restarted' : result.output?.slice(0, 80) ?? 'failed'}`);
|
|
||||||
logger.info({ service: svc.name, diagnosis: diagnosis.slice(0, 80) }, 'Service port failure — PM2 restart triggered');
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
action_taken: `restarted: ${failing.map((r) => r.svc.name).join(', ')}`,
|
|
||||||
success: true,
|
|
||||||
output: healResults.join('; '),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── SSH helper ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const SSH_OPTS = ['-o', 'BatchMode=yes', '-o', 'ConnectTimeout=10', '-o', 'StrictHostKeyChecking=accept-new'];
|
|
||||||
|
|
||||||
async function sshExec(host: string, command: string): Promise<{ stdout: string; stderr: string; success: boolean }> {
|
|
||||||
return safeExec('/usr/bin/ssh', [...SSH_OPTS, host, command]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── 10. Proxmox pvestatd health ─────────────────────────────────────────────
|
|
||||||
|
|
||||||
const PROXMOX_HOST = process.env['PROXMOX_HOST'] ?? 'root@192.168.178.10';
|
|
||||||
|
|
||||||
async function checkProxmoxPvestatd(): Promise<CheckResult> {
|
|
||||||
const start = Date.now();
|
|
||||||
const { stdout, success } = await sshExec(
|
|
||||||
PROXMOX_HOST,
|
|
||||||
'pid=$(pgrep pvestatd 2>/dev/null | head -1); [ -n "$pid" ] && cat /proc/$pid/status 2>/dev/null | grep "^State:" || echo "NOT_RUNNING"',
|
|
||||||
);
|
|
||||||
const latency_ms = Date.now() - start;
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
return { healthy: false, message: 'Cannot SSH to Proxmox to check pvestatd', latency_ms };
|
|
||||||
}
|
|
||||||
|
|
||||||
const out = stdout.trim();
|
|
||||||
|
|
||||||
if (out === 'NOT_RUNNING') {
|
|
||||||
return { healthy: false, message: 'pvestatd is not running on Proxmox', details: { state: 'not_running' }, latency_ms };
|
|
||||||
}
|
|
||||||
|
|
||||||
const stateMatch = out.match(/State:\s+(\S)/);
|
|
||||||
const state = stateMatch?.[1] ?? '?';
|
|
||||||
|
|
||||||
if (state === 'D') {
|
|
||||||
return {
|
|
||||||
healthy: false,
|
|
||||||
message: 'pvestatd is in D-state (kernel deadlock) — Proxmox GUI graphs will be empty',
|
|
||||||
details: { state: 'D', raw: out.slice(0, 200) },
|
|
||||||
latency_ms,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return { healthy: true, message: `pvestatd is running (state: ${state})`, details: { state }, latency_ms };
|
|
||||||
}
|
|
||||||
|
|
||||||
async function healProxmoxPvestatd(diagnosis: string): Promise<HealResult> {
|
|
||||||
// Step 1: Get PID
|
|
||||||
const { stdout: pidOut } = await sshExec(PROXMOX_HOST, 'pgrep pvestatd 2>/dev/null | head -1');
|
|
||||||
const pid = pidOut.trim();
|
|
||||||
|
|
||||||
if (!pid) {
|
|
||||||
// Not running at all — just start it
|
|
||||||
const { success, stdout, stderr } = await sshExec(PROXMOX_HOST, 'systemctl start pvestatd 2>&1');
|
|
||||||
return { action_taken: 'systemctl start pvestatd', success, output: `${stdout}\n${stderr}`.trim() };
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 2: Move stuck process to root cgroup so systemd can reclaim the unit
|
|
||||||
await sshExec(PROXMOX_HOST, `echo ${pid} > /sys/fs/cgroup/cgroup.procs 2>/dev/null || true`);
|
|
||||||
|
|
||||||
// Step 3: Remove stale lock files
|
|
||||||
await sshExec(PROXMOX_HOST, 'rm -f /var/run/pvestatd.pid.lock /var/run/pvestatd.pid 2>/dev/null || true');
|
|
||||||
|
|
||||||
// Step 4: Reset failed state and start
|
|
||||||
const { stdout, stderr, success } = await sshExec(
|
|
||||||
PROXMOX_HOST,
|
|
||||||
'systemctl reset-failed pvestatd 2>/dev/null; systemctl start pvestatd 2>&1; sleep 3; systemctl is-active pvestatd',
|
|
||||||
);
|
|
||||||
const output = `${stdout}\n${stderr}`.trim();
|
|
||||||
logger.info({ pid, diagnosis: diagnosis.slice(0, 120), output }, 'pvestatd D-state heal executed');
|
|
||||||
return { action_taken: `moved pid ${pid} to root cgroup, reset-failed, started pvestatd`, success, output };
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── 11. OPNsense disk space ─────────────────────────────────────────────────
|
|
||||||
|
|
||||||
const OPNSENSE_HOST = process.env['OPNSENSE_HOST'] ?? 'root@192.168.178.11';
|
|
||||||
const OPNSENSE_DISK_THRESHOLD = 75;
|
|
||||||
|
|
||||||
async function checkOpnsenseDisk(): Promise<CheckResult> {
|
|
||||||
const start = Date.now();
|
|
||||||
const { stdout, success } = await sshExec(
|
|
||||||
OPNSENSE_HOST,
|
|
||||||
"df -h / | awk 'NR==2{print $5}' | tr -d '%'",
|
|
||||||
);
|
|
||||||
const latency_ms = Date.now() - start;
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
return { healthy: false, message: 'Cannot SSH to OPNsense to check disk', latency_ms };
|
|
||||||
}
|
|
||||||
|
|
||||||
const usedPct = parseInt(stdout.trim(), 10);
|
|
||||||
|
|
||||||
if (isNaN(usedPct)) {
|
|
||||||
return { healthy: false, message: `Cannot parse OPNsense disk usage: "${stdout.trim()}"`, latency_ms };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (usedPct >= OPNSENSE_DISK_THRESHOLD) {
|
|
||||||
return {
|
|
||||||
healthy: false,
|
|
||||||
message: `OPNsense disk usage ${usedPct}% ≥ threshold ${OPNSENSE_DISK_THRESHOLD}%`,
|
|
||||||
details: { usedPercent: usedPct, threshold: OPNSENSE_DISK_THRESHOLD },
|
|
||||||
latency_ms,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
healthy: true,
|
|
||||||
message: `OPNsense disk usage: ${usedPct}%`,
|
|
||||||
details: { usedPercent: usedPct },
|
|
||||||
latency_ms,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async function healOpnsenseDisk(diagnosis: string): Promise<HealResult> {
|
|
||||||
const steps = [
|
|
||||||
// Remove logs older than 7 days
|
|
||||||
'find /var/log -name "*.log" -mtime +7 -delete 2>/dev/null || true',
|
|
||||||
// Suricata JSON logs older than 3 days
|
|
||||||
'find /var/log/suricata -name "*.json" -mtime +3 -delete 2>/dev/null || true',
|
|
||||||
'find /var/log/suricata -name "*.json.gz" -mtime +1 -delete 2>/dev/null || true',
|
|
||||||
// Tmp files older than 1 day
|
|
||||||
'find /tmp /var/tmp -mtime +1 -delete 2>/dev/null || true',
|
|
||||||
// pkg cache
|
|
||||||
'pkg clean -y 2>/dev/null || true',
|
|
||||||
// Report new usage
|
|
||||||
"df -h / | awk 'NR==2{print $5}'",
|
|
||||||
];
|
|
||||||
|
|
||||||
const { stdout, stderr, success } = await sshExec(OPNSENSE_HOST, steps.join('; '));
|
|
||||||
const output = `${stdout}\n${stderr}`.trim();
|
|
||||||
const newUsage = stdout.trim().split('\n').at(-1) ?? '?';
|
|
||||||
|
|
||||||
logger.info({ diagnosis: diagnosis.slice(0, 120), newUsage, output: output.slice(0, 400) }, 'OPNsense disk cleanup executed');
|
|
||||||
|
|
||||||
return {
|
|
||||||
action_taken: `cleaned logs, tmp, pkg cache on OPNsense — disk now at ${newUsage}`,
|
|
||||||
success,
|
|
||||||
output: output.slice(0, 500),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Exported check list ──────────────────────────────────────────────────────
|
// ─── Exported check list ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
export const healthChecks: HealthCheck[] = [
|
export const healthChecks: HealthCheck[] = [
|
||||||
@ -676,11 +408,8 @@ export const healthChecks: HealthCheck[] = [
|
|||||||
{ name: 'postgresql', category: 'database', check: checkPostgres, heal: healPostgres },
|
{ name: 'postgresql', category: 'database', check: checkPostgres, heal: healPostgres },
|
||||||
{ name: 'ollama', category: 'service', check: checkOllama, heal: healOllama },
|
{ name: 'ollama', category: 'service', check: checkOllama, heal: healOllama },
|
||||||
{ name: 'cloudflare-tunnel', category: 'tunnel', check: checkCloudflareTunnel, heal: healCloudflareTunnel },
|
{ name: 'cloudflare-tunnel', category: 'tunnel', check: checkCloudflareTunnel, heal: healCloudflareTunnel },
|
||||||
{ name: 'service-ports', category: 'service', check: checkServicePorts, heal: healServicePorts },
|
|
||||||
{ name: 'disk-space', category: 'service', check: checkDiskSpace, heal: healDiskSpace },
|
{ name: 'disk-space', category: 'service', check: checkDiskSpace, heal: healDiskSpace },
|
||||||
{ name: 'memory', category: 'service', check: checkMemory, heal: healMemory },
|
{ name: 'memory', category: 'service', check: checkMemory, heal: healMemory },
|
||||||
{ name: 'network', category: 'network', check: checkNetwork, heal: healNetwork },
|
{ name: 'network', category: 'network', check: checkNetwork, heal: healNetwork },
|
||||||
{ name: 'wireguard', category: 'network', check: checkWireGuard, heal: healWireGuard },
|
{ name: 'wireguard', category: 'network', check: checkWireGuard, heal: healWireGuard },
|
||||||
{ name: 'proxmox-pvestatd', category: 'service', check: checkProxmoxPvestatd, heal: healProxmoxPvestatd },
|
|
||||||
{ name: 'opnsense-disk', category: 'service', check: checkOpnsenseDisk, heal: healOpnsenseDisk },
|
|
||||||
];
|
];
|
||||||
|
|||||||
@ -12,7 +12,6 @@
|
|||||||
"@fastify/cors": "^9.0.1",
|
"@fastify/cors": "^9.0.1",
|
||||||
"@fastify/helmet": "^11.1.1",
|
"@fastify/helmet": "^11.1.1",
|
||||||
"@fastify/rate-limit": "^9.1.0",
|
"@fastify/rate-limit": "^9.1.0",
|
||||||
"@shieldx/core": "file:../../../../../shieldx",
|
|
||||||
"ajv": "^8.17.1",
|
"ajv": "^8.17.1",
|
||||||
"fastify": "^4.28.1",
|
"fastify": "^4.28.1",
|
||||||
"franc": "^6.2.0",
|
"franc": "^6.2.0",
|
||||||
|
|||||||
@ -17,32 +17,6 @@ import {
|
|||||||
validationFailuresTotal,
|
validationFailuresTotal,
|
||||||
} from '../observability/metrics.js';
|
} from '../observability/metrics.js';
|
||||||
import { logger } from '../observability/logger.js';
|
import { logger } from '../observability/logger.js';
|
||||||
import { ShieldX } from '@shieldx/core';
|
|
||||||
|
|
||||||
// Singleton ShieldX instance — initialized once, sub-millisecond scans
|
|
||||||
// Disable Ollama-dependent scanners (sentinel, constitutional, embedding, attention)
|
|
||||||
// to keep gateway scans fast and dependency-free
|
|
||||||
const shieldx = new ShieldX({
|
|
||||||
scanners: {
|
|
||||||
rules: true, // 547+ rules, 50+ languages
|
|
||||||
sentinel: false, // Requires Ollama
|
|
||||||
constitutional: false, // Requires Ollama
|
|
||||||
embedding: false, // Requires Ollama
|
|
||||||
embeddingAnomaly: false,
|
|
||||||
entropy: true, // Zero-cost entropy analysis
|
|
||||||
yara: false, // Requires YARA binary
|
|
||||||
attention: false, // Requires Ollama
|
|
||||||
canary: false, // Not needed in gateway context
|
|
||||||
indirect: true, // RAG/tool injection detection
|
|
||||||
selfConsciousness: false,
|
|
||||||
crossModel: false,
|
|
||||||
behavioral: true, // Session profiling
|
|
||||||
unicode: true, // Homoglyph/script detection
|
|
||||||
tokenizer: true, // I.g.n.o.r.e-style attacks
|
|
||||||
compressedPayload: true,
|
|
||||||
},
|
|
||||||
logging: { level: 'warn', structured: true, incidentLog: false },
|
|
||||||
} as any); // DeepPartial config — merges with defaults
|
|
||||||
|
|
||||||
const CompletionRequestSchema = z.object({
|
const CompletionRequestSchema = z.object({
|
||||||
caller: z.string().min(1).max(100),
|
caller: z.string().min(1).max(100),
|
||||||
@ -64,37 +38,43 @@ type CompletionRequest = z.infer<typeof CompletionRequestSchema>;
|
|||||||
|
|
||||||
const SKIP_SHIELDX_CALLERS = new Set(['internal', 'shieldx']);
|
const SKIP_SHIELDX_CALLERS = new Set(['internal', 'shieldx']);
|
||||||
|
|
||||||
async function runShieldXScan(
|
async function runShieldXScan(input: string, caller: string): Promise<{ passed: boolean; reason?: string }> {
|
||||||
input: string,
|
const GATEWAY_URL = `http://localhost:${process.env['PORT'] ?? '3100'}`;
|
||||||
caller: string,
|
|
||||||
): Promise<{ passed: boolean; reason?: string; threatLevel?: string; phase?: string; latencyMs?: number }> {
|
|
||||||
try {
|
try {
|
||||||
const result = await shieldx.scanInput(input);
|
const response = await fetch(`${GATEWAY_URL}/v1/completion`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json', 'X-Caller-ID': 'internal' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
caller: 'internal',
|
||||||
|
task_type: 'shieldx_threat_classification',
|
||||||
|
input,
|
||||||
|
options: { return_validation_details: false },
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(8000),
|
||||||
|
});
|
||||||
|
|
||||||
if (result.detected) {
|
if (!response.ok) return { passed: true }; // Fail open if ShieldX is down
|
||||||
logger.warn({
|
|
||||||
caller,
|
|
||||||
threatLevel: result.threatLevel,
|
|
||||||
phase: result.killChainPhase,
|
|
||||||
action: result.action,
|
|
||||||
latencyMs: result.latencyMs,
|
|
||||||
ensemble: result.ensemble,
|
|
||||||
atlasMapping: result.atlasMapping?.techniqueIds?.slice(0, 5),
|
|
||||||
scannerCount: result.scanResults.length,
|
|
||||||
}, 'ShieldX threat detected — input blocked');
|
|
||||||
|
|
||||||
return {
|
const result = await response.json() as { output?: string; status?: string };
|
||||||
passed: false,
|
if (result.status !== 'approved' || !result.output) return { passed: true };
|
||||||
reason: `Prompt injection detected: ${result.killChainPhase} (${result.threatLevel})`,
|
|
||||||
threatLevel: result.threatLevel,
|
type ShieldResult = { threat_detected: boolean; threat_type?: string; confidence?: number };
|
||||||
phase: result.killChainPhase,
|
let parsed: ShieldResult;
|
||||||
latencyMs: result.latencyMs,
|
try {
|
||||||
};
|
parsed = JSON.parse(result.output) as ShieldResult;
|
||||||
|
} catch {
|
||||||
|
return { passed: true };
|
||||||
}
|
}
|
||||||
|
|
||||||
return { passed: true, latencyMs: result.latencyMs };
|
if (parsed.threat_detected && (parsed.confidence ?? 0) > 0.8) {
|
||||||
|
logger.warn({ caller, threat_type: parsed.threat_type }, 'ShieldX threat detected');
|
||||||
|
return { passed: false, reason: `Threat detected: ${parsed.threat_type ?? 'unknown'}` };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { passed: true };
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error({ err, caller }, 'ShieldX scan error — failing open');
|
// ShieldX unavailable — fail open (log but continue)
|
||||||
|
logger.warn({ err, caller }, 'ShieldX scan failed, continuing without scan');
|
||||||
return { passed: true };
|
return { passed: true };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -122,7 +102,7 @@ export async function completionRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
const { caller, input, language, context, options } = body;
|
const { caller, input, language, context, options } = body;
|
||||||
const returnValidationDetails = options?.return_validation_details ?? false;
|
const returnValidationDetails = options?.return_validation_details ?? false;
|
||||||
|
|
||||||
// Stage 2: ShieldX scan (real library, 547+ rules, sub-millisecond)
|
// Stage 2: ShieldX scan
|
||||||
if (!SKIP_SHIELDX_CALLERS.has(caller)) {
|
if (!SKIP_SHIELDX_CALLERS.has(caller)) {
|
||||||
const shieldResult = await runShieldXScan(input, caller);
|
const shieldResult = await runShieldXScan(input, caller);
|
||||||
if (!shieldResult.passed) {
|
if (!shieldResult.passed) {
|
||||||
@ -131,9 +111,6 @@ export async function completionRoute(fastify: FastifyInstance): Promise<void> {
|
|||||||
statusCode: 400,
|
statusCode: 400,
|
||||||
error: 'Rejected',
|
error: 'Rejected',
|
||||||
message: shieldResult.reason ?? 'Input rejected by security scan',
|
message: shieldResult.reason ?? 'Input rejected by security scan',
|
||||||
threat_level: shieldResult.threatLevel,
|
|
||||||
kill_chain_phase: shieldResult.phase,
|
|
||||||
shieldx_latency_ms: shieldResult.latencyMs,
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user