feat(research-robot): actionable recommendations + dispatch/pause/resume buttons

The Research Robot panel showed only an LLM assessment (info, no action). Now:

API (research-robot.ts):
- GET enriches response with recommendations[] computed from live pgboss state:
  classifies each persistently-failing job (auth/401, network, no-handler, other)
  into severity + concrete advice + offered actions.
- POST /action {action,job}: dispatch (enqueue one run), pause (remove from
  schedule with backup to research_robot_paused_schedules), resume (restore).
  All validated against the pgboss.queue whitelist.

Dashboard:
- Renders each recommendation as a card with severity colour, cause, last error,
  and action buttons (Jetzt auslösen / Pausieren / Fortsetzen / Token-Anleitung).
- Verified: sync:flexoptix-catalog -> critical auth (HTTP 401), offers
  token-help + pause. Dispatch/pause/resume roundtrip tested green.
This commit is contained in:
Rene Fichtmueller 2026-06-06 16:53:54 +00:00
parent 0cf607040f
commit a9f95fc552
2 changed files with 300 additions and 6 deletions

View File

@ -0,0 +1,210 @@
import { Router, Request, Response } from "express";
import { pool } from "../db/client";
export const researchRobotRouter = Router();
// Known job queues are validated against pgboss.queue before any action.
async function isKnownQueue(name: string): Promise<boolean> {
const r = await pool.query("SELECT 1 FROM pgboss.queue WHERE name = $1", [name]);
return (r.rowCount ?? 0) > 0;
}
type Recommendation = {
job: string;
severity: "critical" | "warning" | "info";
title: string;
detail: string;
cause: string;
// Which actions the UI should offer for this job
actions: Array<"dispatch" | "pause" | "resume" | "token_help">;
failed_count?: number;
last_error?: string;
};
// Classify a job error message into a human cause + recommended actions.
function classifyError(jobName: string, errorText: string): Omit<Recommendation, "job" | "failed_count" | "last_error"> {
const e = (errorText || "").toLowerCase();
if (/http 401|http 403|unauthorized|forbidden|invalid.*token|api.*key/.test(e)) {
return {
severity: "critical",
title: `${jobName}: Authentifizierung fehlgeschlagen`,
detail: "Der Job erreicht die Quelle, wird aber abgewiesen (401/403). Meist ein fehlender oder abgelaufener API-Token. Ohne gültigen Token reiht der Robot den Job endlos neu ein und er schlägt jedes Mal fehl.",
cause: "auth",
actions: ["token_help", "pause"],
};
}
if (/timeout|etimedout|econnrefused|enotfound|network|socket hang up|fetch failed/.test(e)) {
return {
severity: "warning",
title: `${jobName}: Quelle nicht erreichbar`,
detail: "Netzwerk-/Verbindungsfehler beim Abruf. Oft temporär (Ziel-Server kurz down, Rate-Limit). Manuell erneut auslösen oder pausieren, falls die Quelle dauerhaft weg ist.",
cause: "network",
actions: ["dispatch", "pause"],
};
}
if (/no handler|not registered|unknown job|kein handler/.test(e)) {
return {
severity: "critical",
title: `${jobName}: Kein Handler registriert`,
detail: "Der Job ist im Schedule, aber es gibt keinen Code-Handler dafür. Er kann nie laufen — entweder Handler nachrüsten oder den Job aus dem Schedule entfernen.",
cause: "no_handler",
actions: ["pause"],
};
}
return {
severity: "warning",
title: `${jobName}: Wiederholt fehlgeschlagen`,
detail: "Der Job schlägt regelmäßig fehl. Letzte Fehlermeldung siehe unten. Manuell auslösen zum erneuten Versuch, oder pausieren falls überflüssig.",
cause: "other",
actions: ["dispatch", "pause"],
};
}
// GET /api/research-robot — letzte Robot-Laeufe + abgeleitete Handlungsempfehlungen
researchRobotRouter.get("/", async (_req: Request, res: Response) => {
try {
const r = await pool.query(
"SELECT run_at, freshness, decision, dispatched, model FROM research_robot_runs ORDER BY run_at DESC LIMIT 20"
);
// Build recommendations from LIVE pgboss state (more accurate than the stored summary).
const recommendations: Recommendation[] = [];
try {
const failing = await pool.query<{ name: string; failed: string; last_error: string | null }>(`
SELECT j.name,
COUNT(*) FILTER (WHERE j.state = 'failed') AS failed,
(SELECT jj.output::text FROM pgboss.job jj
WHERE jj.name = j.name AND jj.state = 'failed' AND jj.output IS NOT NULL
ORDER BY jj.created_on DESC LIMIT 1) AS last_error
FROM pgboss.job j
WHERE j.created_on > NOW() - INTERVAL '3 days'
GROUP BY j.name
HAVING COUNT(*) FILTER (WHERE j.state = 'failed') >= 2
AND COUNT(*) FILTER (WHERE j.state = 'completed') = 0
ORDER BY failed DESC
LIMIT 20
`);
// Which jobs are currently scheduled (so we can offer pause vs resume correctly)
const scheduled = await pool.query<{ name: string }>("SELECT name FROM pgboss.schedule");
const scheduledSet = new Set(scheduled.rows.map((s) => s.name));
for (const row of failing.rows) {
let errMsg = "";
try {
const parsed = JSON.parse(row.last_error || "{}") as { stack?: string; message?: string };
errMsg = (parsed.stack || parsed.message || row.last_error || "").slice(0, 300);
} catch {
errMsg = (row.last_error || "").slice(0, 300);
}
const cls = classifyError(row.name, errMsg);
// If the job isn't scheduled, swap pause→resume in the offered actions
const actions = cls.actions.map((a) =>
a === "pause" && !scheduledSet.has(row.name) ? "resume" : a
) as Recommendation["actions"];
recommendations.push({
job: row.name,
severity: cls.severity,
title: cls.title,
detail: cls.detail,
cause: cls.cause,
actions: [...new Set(actions)],
failed_count: parseInt(row.failed, 10),
last_error: errMsg.split("\n")[0].slice(0, 160),
});
}
} catch { /* pgboss introspection best-effort */ }
if (recommendations.length === 0) {
recommendations.push({
job: "",
severity: "info",
title: "Alle Scraper-Jobs gesund",
detail: "Keine dauerhaft fehlschlagenden Jobs erkannt. Keine Aktion nötig.",
cause: "healthy",
actions: [],
});
}
res.json({ success: true, runs: r.rows, recommendations });
} catch (e) {
res.json({ success: true, runs: [], recommendations: [] });
}
});
// POST /api/research-robot/action — { action, job }
// dispatch → Job sofort in die Queue stellen; pause → aus Schedule entfernen (Backup);
// resume → aus Backup wiederherstellen.
researchRobotRouter.post("/action", async (req: Request, res: Response) => {
const action = String((req.body?.action ?? "")).trim();
const job = String((req.body?.job ?? "")).trim();
if (!["dispatch", "pause", "resume"].includes(action)) {
res.status(400).json({ success: false, error: "Unbekannte Aktion." });
return;
}
if (!job || !(await isKnownQueue(job))) {
res.status(400).json({ success: false, error: "Unbekannter oder ungültiger Job." });
return;
}
try {
if (action === "dispatch") {
// Enqueue one job run immediately (pg-boss v10 standard policy, mirrors a scheduled send)
await pool.query(
`INSERT INTO pgboss.job (id, name, data, priority, retry_limit, retry_delay, expire_in, keep_until, policy, start_after)
VALUES (gen_random_uuid(), $1, '{}'::jsonb, 0, 2, 0, INTERVAL '15 minutes', NOW() + INTERVAL '14 days', 'standard', NOW())`,
[job]
);
res.json({ success: true, action, job, message: `Job '${job}' wurde in die Queue gestellt und läuft beim nächsten Worker-Tick.` });
return;
}
// Ensure backup table for paused schedules exists
await pool.query(`
CREATE TABLE IF NOT EXISTS research_robot_paused_schedules (
name TEXT PRIMARY KEY,
cron TEXT, timezone TEXT, data JSONB, options JSONB,
paused_at TIMESTAMPTZ DEFAULT NOW()
)`);
if (action === "pause") {
const sched = await pool.query("SELECT name, cron, timezone, data, options FROM pgboss.schedule WHERE name = $1", [job]);
if (sched.rowCount === 0) {
res.json({ success: true, action, job, message: `Job '${job}' war nicht im Schedule — bereits pausiert.` });
return;
}
const s = sched.rows[0];
await pool.query(
`INSERT INTO research_robot_paused_schedules (name, cron, timezone, data, options, paused_at)
VALUES ($1,$2,$3,$4,$5,NOW())
ON CONFLICT (name) DO UPDATE SET cron=$2, timezone=$3, data=$4, options=$5, paused_at=NOW()`,
[s.name, s.cron, s.timezone, s.data, s.options]
);
await pool.query("DELETE FROM pgboss.schedule WHERE name = $1", [job]);
res.json({ success: true, action, job, message: `Job '${job}' pausiert (aus Schedule entfernt). Der Robot reiht ihn nicht mehr neu ein. Reversibel über 'Fortsetzen'.` });
return;
}
if (action === "resume") {
const bak = await pool.query("SELECT name, cron, timezone, data, options FROM research_robot_paused_schedules WHERE name = $1", [job]);
if (bak.rowCount === 0) {
res.status(404).json({ success: false, error: `Kein pausierter Schedule-Eintrag für '${job}' gefunden.` });
return;
}
const s = bak.rows[0];
await pool.query(
`INSERT INTO pgboss.schedule (name, cron, timezone, data, options, created_on, updated_on)
VALUES ($1,$2,$3,$4,$5,NOW(),NOW())
ON CONFLICT (name) DO UPDATE SET cron=$2, timezone=$3, data=$4, options=$5, updated_on=NOW()`,
[s.name, s.cron, s.timezone, s.data, s.options]
);
await pool.query("DELETE FROM research_robot_paused_schedules WHERE name = $1", [job]);
res.json({ success: true, action, job, message: `Job '${job}' wieder aktiviert (zurück im Schedule).` });
return;
}
} catch (e) {
res.status(500).json({ success: false, error: (e as Error).message });
}
});

View File

@ -10435,21 +10435,105 @@ async function loadResearchRobot() {
var r = d.runs[0], f = r.freshness || {}, dec = r.decision || {};
var card = el("research-robot-card"); if (!card) return;
var total = f.jobsTotal || 0, stale = f.stale || 0;
var escd = (dec.escalated && dec.escalated.length) ? dec.escalated : [];
var disp = r.dispatched || [];
var recs = d.recommendations || [];
var h = "<div class=\"card-label\">&#129302; Research Robot <span style=\"font-weight:400;color:var(--text-dim);font-size:0.72rem\">letzter Lauf " + esc(new Date(r.run_at).toLocaleString()) + " &middot; " + esc(r.model||"") + "</span></div>";
h += "<div class=\"mt\" style=\"display:flex;gap:1.6rem;flex-wrap:wrap;align-items:baseline;margin-bottom:0.5rem\">";
h += "<div class=\"mt\" style=\"display:flex;gap:1.6rem;flex-wrap:wrap;align-items:baseline;margin-bottom:0.6rem\">";
h += "<div><span style=\"font-size:1.5rem;font-weight:700;color:var(--green)\">" + (total-stale) + "</span> <span style=\"color:var(--text-dim);font-size:0.8rem\">/" + total + " Jobs frisch</span></div>";
h += "<div><span style=\"font-size:1.5rem;font-weight:700;color:" + (stale>0?"#f59e0b":"var(--green)") + "\">" + stale + "</span> <span style=\"color:var(--text-dim);font-size:0.8rem\">ueberfaellig</span></div>";
if (escd.length) h += "<div><span style=\"font-size:1.5rem;font-weight:700;color:#ef4444\">" + escd.length + "</span> <span style=\"color:var(--text-dim);font-size:0.8rem\">eskaliert</span></div>";
var critCount = recs.filter(function(x){return x.severity==="critical";}).length;
if (critCount) h += "<div><span style=\"font-size:1.5rem;font-weight:700;color:#ef4444\">" + critCount + "</span> <span style=\"color:var(--text-dim);font-size:0.8rem\">Handlungsbedarf</span></div>";
h += "</div>";
if (dec.assessment) h += "<div style=\"font-size:0.84rem;margin-bottom:0.4rem\"><b>KI-Urteil (lokales LLM):</b> " + esc(dec.assessment) + "</div>";
if (disp.length) h += "<div style=\"font-size:0.76rem;color:var(--text-dim)\">Neu eingereiht: " + disp.map(esc).join(", ") + "</div>";
if (escd.length) h += "<div style=\"font-size:0.76rem;color:#ef4444;margin-top:0.3rem\">&#9888; Dauerhaft fehlschlagend: " + escd.map(esc).join(", ") + "</div>";
if (dec.assessment) h += "<div style=\"font-size:0.8rem;margin-bottom:0.6rem;color:var(--text-dim)\"><b style=\"color:var(--text)\">KI-Urteil (lokales LLM):</b> " + esc(dec.assessment) + "</div>";
if (disp.length) h += "<div style=\"font-size:0.74rem;color:var(--text-dim);margin-bottom:0.5rem\">Automatisch neu eingereiht: " + disp.map(esc).join(", ") + "</div>";
// ── Recommendations with action buttons ─────────────────────────────────
var sevMeta = {
critical: { icon: "&#128308;", color: "#ef4444", bg: "rgba(239,68,68,0.07)", bd: "rgba(239,68,68,0.3)" },
warning: { icon: "&#9888;&#65039;", color: "#f59e0b", bg: "rgba(245,158,11,0.07)", bd: "rgba(245,158,11,0.3)" },
info: { icon: "&#9989;", color: "#22c55e", bg: "rgba(34,197,94,0.07)", bd: "rgba(34,197,94,0.3)" }
};
var actionMeta = {
dispatch: { label: "&#9654; Jetzt auslösen", color: "#6366f1", title: "Job sofort in die Queue stellen (einmaliger Lauf)" },
pause: { label: "&#10074;&#10074; Pausieren", color: "#f59e0b", title: "Aus dem Schedule nehmen — Robot reiht ihn nicht mehr neu ein (reversibel)" },
resume: { label: "&#9654; Fortsetzen", color: "#22c55e", title: "Wieder in den Schedule aufnehmen" },
token_help: { label: "&#128273; Token-Anleitung", color: "#0ea5e9", title: "Wie man den fehlenden API-Token setzt" }
};
h += "<div style=\"display:flex;flex-direction:column;gap:0.5rem;margin-top:0.4rem\">";
recs.forEach(function(rec) {
var m = sevMeta[rec.severity] || sevMeta.info;
h += "<div style=\"border:1px solid " + m.bd + ";background:" + m.bg + ";border-radius:8px;padding:0.6rem 0.75rem\">";
h += "<div style=\"display:flex;align-items:flex-start;gap:0.5rem;justify-content:space-between;flex-wrap:wrap\">";
h += "<div style=\"flex:1;min-width:200px\">";
h += "<div style=\"font-weight:700;font-size:0.84rem;color:" + m.color + "\">" + m.icon + " " + esc(rec.title) + "</div>";
h += "<div style=\"font-size:0.76rem;color:var(--text-dim);margin-top:0.25rem;line-height:1.45\">" + esc(rec.detail) + "</div>";
if (rec.last_error) h += "<div style=\"font-size:0.68rem;color:var(--text-dim);margin-top:0.3rem;font-family:var(--mono);opacity:0.8\">Letzter Fehler: " + esc(rec.last_error) + (rec.failed_count?" (" + rec.failed_count + "x fehlgeschlagen)":"") + "</div>";
h += "</div>";
// Action buttons
if (rec.actions && rec.actions.length) {
h += "<div style=\"display:flex;gap:0.4rem;flex-wrap:wrap;align-items:flex-start\">";
rec.actions.forEach(function(act) {
var am = actionMeta[act]; if (!am) return;
if (act === "token_help") {
h += "<button onclick=\"showTokenHelp('" + esc(rec.job) + "')\" title=\"" + am.title + "\" style=\"cursor:pointer;background:transparent;border:1px solid " + am.color + ";color:" + am.color + ";border-radius:6px;padding:4px 10px;font-size:0.72rem;font-weight:600;white-space:nowrap\">" + am.label + "</button>";
} else {
h += "<button onclick=\"researchRobotAction('" + act + "','" + esc(rec.job) + "',this)\" title=\"" + am.title + "\" style=\"cursor:pointer;background:transparent;border:1px solid " + am.color + ";color:" + am.color + ";border-radius:6px;padding:4px 10px;font-size:0.72rem;font-weight:600;white-space:nowrap\">" + am.label + "</button>";
}
});
h += "</div>";
}
h += "</div></div>";
});
h += "</div>";
card.innerHTML = h; card.style.display = "";
} catch(e) {}
}
// Execute a research-robot action (dispatch/pause/resume) and refresh the panel
async function researchRobotAction(action, job, btn) {
if (btn) { btn.disabled = true; btn.style.opacity = "0.5"; }
try {
var token = (window.loadToken ? window.loadToken() : "") || "";
var resp = await fetch("/api/research-robot/action", {
method: "POST",
headers: { "Authorization": "Bearer " + token, "Content-Type": "application/json" },
body: JSON.stringify({ action: action, job: job })
});
var d = await resp.json();
if (d.success) {
if (typeof showToast === "function") showToast("Aktion ausgeführt", d.message || (action + " für " + job));
} else {
if (typeof showToast === "function") showToast("Fehler", d.error || "Aktion fehlgeschlagen", true);
}
} catch(e) {
if (typeof showToast === "function") showToast("Fehler", e.message, true);
}
setTimeout(loadResearchRobot, 600);
}
// Show how to set a missing API token (e.g. FLEXOPTIX_API_TOKEN)
function showTokenHelp(job) {
var isFlexoptix = /flexoptix/i.test(job);
var msg;
if (isFlexoptix) {
msg = "Der Job '" + job + "' ruft die Flexoptix REST-API auf und bekommt HTTP 401 (kein gültiger Token).\n\n"
+ "So setzt du den Token:\n"
+ "1. Im Flexoptix-Portal einen REST-API-Token erzeugen (Account → API).\n"
+ "2. Auf Erik in /opt/tip/.env setzen: FLEXOPTIX_API_TOKEN=<dein-token>\n"
+ "3. API neu starten: pm2 restart tip-api\n\n"
+ "Bis der Token gesetzt ist, kannst du den Job pausieren, damit er nicht alle 15 Min erneut fehlschlägt.";
} else {
msg = "Der Job '" + job + "' wird von der Quelle mit 401/403 abgewiesen — meist fehlt ein API-Token oder er ist abgelaufen.\n\n"
+ "Token in /opt/tip/.env setzen und 'pm2 restart tip-api'. Bis dahin Job pausieren.";
}
if (typeof showModal === "function") { showModal("Token-Anleitung", "<pre style=\"white-space:pre-wrap;font-size:0.8rem;line-height:1.5\">" + esc(msg) + "</pre>"); }
else { alert(msg); }
}
async function loadProcurementPulse() {
var pulse = el('ov-proc-pulse');
var moversCard = el('ov-movers-card');