fix: catch non-zero exit in local train, return JSON instead of 500

- sh -lc replaced with bash to avoid dash/profile.d syntax errors
- runCommand errors now caught in local provider path
- stdout/stderr extracted from error object and returned as JSON
- No more HTTP 500 on script failure
This commit is contained in:
Rene Fichtmueller 2026-04-25 23:24:04 +02:00
parent 7935453073
commit d5be0ba43c

View File

@ -165,8 +165,23 @@ selflearningRouter.post("/train", async (req: Request, res: Response) => {
res.status(409).json({ success: false, error: "Local training command is not configured.", suggestion: "Set TIP_LOCAL_TRAIN_COMMAND; the lane name is appended automatically." });
return;
}
const out = await runCommand("sh", ["-lc", `${command} ${lane}`], 12 * 60 * 60 * 1000);
res.json({ success: true, provider, lane, stdout: out.stdout.slice(-6000), stderr: out.stderr.slice(-4000) });
try {
const out = await runCommand("bash", [command, lane], 12 * 60 * 60 * 1000);
res.json({ success: true, provider, lane, stdout: out.stdout.slice(-6000), stderr: out.stderr.slice(-4000) });
} catch (err: unknown) {
// execFileAsync throws on non-zero exit — stdout/stderr are still on the error object
const e = err as { stdout?: string; stderr?: string; message?: string };
const stdout = (e.stdout ?? "").slice(-6000);
const stderr = (e.stderr ?? "").slice(-4000);
res.json({
success: false,
provider,
lane,
error: e.message ?? String(err),
stdout,
stderr,
});
}
return;
}
const endpoint = process.env.TIP_RUNPOD_ENDPOINT_ID || process.env.RUNPOD_ENDPOINT_ID;