diff --git a/packages/api/src/routes/research-robot.ts b/packages/api/src/routes/research-robot.ts new file mode 100644 index 0000000..b857a08 --- /dev/null +++ b/packages/api/src/routes/research-robot.ts @@ -0,0 +1,210 @@ +import { Router, Request, Response } from "express"; +import { pool } from "../db/client"; + +export const researchRobotRouter = Router(); + +// Known job queues are validated against pgboss.queue before any action. +async function isKnownQueue(name: string): Promise { + const r = await pool.query("SELECT 1 FROM pgboss.queue WHERE name = $1", [name]); + return (r.rowCount ?? 0) > 0; +} + +type Recommendation = { + job: string; + severity: "critical" | "warning" | "info"; + title: string; + detail: string; + cause: string; + // Which actions the UI should offer for this job + actions: Array<"dispatch" | "pause" | "resume" | "token_help">; + failed_count?: number; + last_error?: string; +}; + +// Classify a job error message into a human cause + recommended actions. +function classifyError(jobName: string, errorText: string): Omit { + const e = (errorText || "").toLowerCase(); + + if (/http 401|http 403|unauthorized|forbidden|invalid.*token|api.*key/.test(e)) { + return { + severity: "critical", + title: `${jobName}: Authentifizierung fehlgeschlagen`, + detail: "Der Job erreicht die Quelle, wird aber abgewiesen (401/403). Meist ein fehlender oder abgelaufener API-Token. Ohne gültigen Token reiht der Robot den Job endlos neu ein und er schlägt jedes Mal fehl.", + cause: "auth", + actions: ["token_help", "pause"], + }; + } + if (/timeout|etimedout|econnrefused|enotfound|network|socket hang up|fetch failed/.test(e)) { + return { + severity: "warning", + title: `${jobName}: Quelle nicht erreichbar`, + detail: "Netzwerk-/Verbindungsfehler beim Abruf. Oft temporär (Ziel-Server kurz down, Rate-Limit). Manuell erneut auslösen oder pausieren, falls die Quelle dauerhaft weg ist.", + cause: "network", + actions: ["dispatch", "pause"], + }; + } + if (/no handler|not registered|unknown job|kein handler/.test(e)) { + return { + severity: "critical", + title: `${jobName}: Kein Handler registriert`, + detail: "Der Job ist im Schedule, aber es gibt keinen Code-Handler dafür. Er kann nie laufen — entweder Handler nachrüsten oder den Job aus dem Schedule entfernen.", + cause: "no_handler", + actions: ["pause"], + }; + } + return { + severity: "warning", + title: `${jobName}: Wiederholt fehlgeschlagen`, + detail: "Der Job schlägt regelmäßig fehl. Letzte Fehlermeldung siehe unten. Manuell auslösen zum erneuten Versuch, oder pausieren falls überflüssig.", + cause: "other", + actions: ["dispatch", "pause"], + }; +} + +// GET /api/research-robot — letzte Robot-Laeufe + abgeleitete Handlungsempfehlungen +researchRobotRouter.get("/", async (_req: Request, res: Response) => { + try { + const r = await pool.query( + "SELECT run_at, freshness, decision, dispatched, model FROM research_robot_runs ORDER BY run_at DESC LIMIT 20" + ); + + // Build recommendations from LIVE pgboss state (more accurate than the stored summary). + const recommendations: Recommendation[] = []; + try { + const failing = await pool.query<{ name: string; failed: string; last_error: string | null }>(` + SELECT j.name, + COUNT(*) FILTER (WHERE j.state = 'failed') AS failed, + (SELECT jj.output::text FROM pgboss.job jj + WHERE jj.name = j.name AND jj.state = 'failed' AND jj.output IS NOT NULL + ORDER BY jj.created_on DESC LIMIT 1) AS last_error + FROM pgboss.job j + WHERE j.created_on > NOW() - INTERVAL '3 days' + GROUP BY j.name + HAVING COUNT(*) FILTER (WHERE j.state = 'failed') >= 2 + AND COUNT(*) FILTER (WHERE j.state = 'completed') = 0 + ORDER BY failed DESC + LIMIT 20 + `); + + // Which jobs are currently scheduled (so we can offer pause vs resume correctly) + const scheduled = await pool.query<{ name: string }>("SELECT name FROM pgboss.schedule"); + const scheduledSet = new Set(scheduled.rows.map((s) => s.name)); + + for (const row of failing.rows) { + let errMsg = ""; + try { + const parsed = JSON.parse(row.last_error || "{}") as { stack?: string; message?: string }; + errMsg = (parsed.stack || parsed.message || row.last_error || "").slice(0, 300); + } catch { + errMsg = (row.last_error || "").slice(0, 300); + } + const cls = classifyError(row.name, errMsg); + // If the job isn't scheduled, swap pause→resume in the offered actions + const actions = cls.actions.map((a) => + a === "pause" && !scheduledSet.has(row.name) ? "resume" : a + ) as Recommendation["actions"]; + recommendations.push({ + job: row.name, + severity: cls.severity, + title: cls.title, + detail: cls.detail, + cause: cls.cause, + actions: [...new Set(actions)], + failed_count: parseInt(row.failed, 10), + last_error: errMsg.split("\n")[0].slice(0, 160), + }); + } + } catch { /* pgboss introspection best-effort */ } + + if (recommendations.length === 0) { + recommendations.push({ + job: "", + severity: "info", + title: "Alle Scraper-Jobs gesund", + detail: "Keine dauerhaft fehlschlagenden Jobs erkannt. Keine Aktion nötig.", + cause: "healthy", + actions: [], + }); + } + + res.json({ success: true, runs: r.rows, recommendations }); + } catch (e) { + res.json({ success: true, runs: [], recommendations: [] }); + } +}); + +// POST /api/research-robot/action — { action, job } +// dispatch → Job sofort in die Queue stellen; pause → aus Schedule entfernen (Backup); +// resume → aus Backup wiederherstellen. +researchRobotRouter.post("/action", async (req: Request, res: Response) => { + const action = String((req.body?.action ?? "")).trim(); + const job = String((req.body?.job ?? "")).trim(); + + if (!["dispatch", "pause", "resume"].includes(action)) { + res.status(400).json({ success: false, error: "Unbekannte Aktion." }); + return; + } + if (!job || !(await isKnownQueue(job))) { + res.status(400).json({ success: false, error: "Unbekannter oder ungültiger Job." }); + return; + } + + try { + if (action === "dispatch") { + // Enqueue one job run immediately (pg-boss v10 standard policy, mirrors a scheduled send) + await pool.query( + `INSERT INTO pgboss.job (id, name, data, priority, retry_limit, retry_delay, expire_in, keep_until, policy, start_after) + VALUES (gen_random_uuid(), $1, '{}'::jsonb, 0, 2, 0, INTERVAL '15 minutes', NOW() + INTERVAL '14 days', 'standard', NOW())`, + [job] + ); + res.json({ success: true, action, job, message: `Job '${job}' wurde in die Queue gestellt und läuft beim nächsten Worker-Tick.` }); + return; + } + + // Ensure backup table for paused schedules exists + await pool.query(` + CREATE TABLE IF NOT EXISTS research_robot_paused_schedules ( + name TEXT PRIMARY KEY, + cron TEXT, timezone TEXT, data JSONB, options JSONB, + paused_at TIMESTAMPTZ DEFAULT NOW() + )`); + + if (action === "pause") { + const sched = await pool.query("SELECT name, cron, timezone, data, options FROM pgboss.schedule WHERE name = $1", [job]); + if (sched.rowCount === 0) { + res.json({ success: true, action, job, message: `Job '${job}' war nicht im Schedule — bereits pausiert.` }); + return; + } + const s = sched.rows[0]; + await pool.query( + `INSERT INTO research_robot_paused_schedules (name, cron, timezone, data, options, paused_at) + VALUES ($1,$2,$3,$4,$5,NOW()) + ON CONFLICT (name) DO UPDATE SET cron=$2, timezone=$3, data=$4, options=$5, paused_at=NOW()`, + [s.name, s.cron, s.timezone, s.data, s.options] + ); + await pool.query("DELETE FROM pgboss.schedule WHERE name = $1", [job]); + res.json({ success: true, action, job, message: `Job '${job}' pausiert (aus Schedule entfernt). Der Robot reiht ihn nicht mehr neu ein. Reversibel über 'Fortsetzen'.` }); + return; + } + + if (action === "resume") { + const bak = await pool.query("SELECT name, cron, timezone, data, options FROM research_robot_paused_schedules WHERE name = $1", [job]); + if (bak.rowCount === 0) { + res.status(404).json({ success: false, error: `Kein pausierter Schedule-Eintrag für '${job}' gefunden.` }); + return; + } + const s = bak.rows[0]; + await pool.query( + `INSERT INTO pgboss.schedule (name, cron, timezone, data, options, created_on, updated_on) + VALUES ($1,$2,$3,$4,$5,NOW(),NOW()) + ON CONFLICT (name) DO UPDATE SET cron=$2, timezone=$3, data=$4, options=$5, updated_on=NOW()`, + [s.name, s.cron, s.timezone, s.data, s.options] + ); + await pool.query("DELETE FROM research_robot_paused_schedules WHERE name = $1", [job]); + res.json({ success: true, action, job, message: `Job '${job}' wieder aktiviert (zurück im Schedule).` }); + return; + } + } catch (e) { + res.status(500).json({ success: false, error: (e as Error).message }); + } +}); diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index 8708bc9..fe2bc22 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -10435,21 +10435,105 @@ async function loadResearchRobot() { var r = d.runs[0], f = r.freshness || {}, dec = r.decision || {}; var card = el("research-robot-card"); if (!card) return; var total = f.jobsTotal || 0, stale = f.stale || 0; - var escd = (dec.escalated && dec.escalated.length) ? dec.escalated : []; var disp = r.dispatched || []; + var recs = d.recommendations || []; + var h = "
🤖 Research Robot letzter Lauf " + esc(new Date(r.run_at).toLocaleString()) + " · " + esc(r.model||"") + "
"; - h += "
"; + h += "
"; h += "
" + (total-stale) + " /" + total + " Jobs frisch
"; h += "
0?"#f59e0b":"var(--green)") + "\">" + stale + " ueberfaellig
"; - if (escd.length) h += "
" + escd.length + " eskaliert
"; + var critCount = recs.filter(function(x){return x.severity==="critical";}).length; + if (critCount) h += "
" + critCount + " Handlungsbedarf
"; h += "
"; - if (dec.assessment) h += "
KI-Urteil (lokales LLM): " + esc(dec.assessment) + "
"; - if (disp.length) h += "
Neu eingereiht: " + disp.map(esc).join(", ") + "
"; - if (escd.length) h += "
⚠ Dauerhaft fehlschlagend: " + escd.map(esc).join(", ") + "
"; + + if (dec.assessment) h += "
KI-Urteil (lokales LLM): " + esc(dec.assessment) + "
"; + if (disp.length) h += "
Automatisch neu eingereiht: " + disp.map(esc).join(", ") + "
"; + + // ── Recommendations with action buttons ───────────────────────────────── + var sevMeta = { + critical: { icon: "🔴", color: "#ef4444", bg: "rgba(239,68,68,0.07)", bd: "rgba(239,68,68,0.3)" }, + warning: { icon: "⚠️", color: "#f59e0b", bg: "rgba(245,158,11,0.07)", bd: "rgba(245,158,11,0.3)" }, + info: { icon: "✅", color: "#22c55e", bg: "rgba(34,197,94,0.07)", bd: "rgba(34,197,94,0.3)" } + }; + var actionMeta = { + dispatch: { label: "▶ Jetzt auslösen", color: "#6366f1", title: "Job sofort in die Queue stellen (einmaliger Lauf)" }, + pause: { label: "❚❚ Pausieren", color: "#f59e0b", title: "Aus dem Schedule nehmen — Robot reiht ihn nicht mehr neu ein (reversibel)" }, + resume: { label: "▶ Fortsetzen", color: "#22c55e", title: "Wieder in den Schedule aufnehmen" }, + token_help: { label: "🔑 Token-Anleitung", color: "#0ea5e9", title: "Wie man den fehlenden API-Token setzt" } + }; + + h += "
"; + recs.forEach(function(rec) { + var m = sevMeta[rec.severity] || sevMeta.info; + h += "
"; + h += "
"; + h += "
"; + h += "
" + m.icon + " " + esc(rec.title) + "
"; + h += "
" + esc(rec.detail) + "
"; + if (rec.last_error) h += "
Letzter Fehler: " + esc(rec.last_error) + (rec.failed_count?" (" + rec.failed_count + "x fehlgeschlagen)":"") + "
"; + h += "
"; + // Action buttons + if (rec.actions && rec.actions.length) { + h += "
"; + rec.actions.forEach(function(act) { + var am = actionMeta[act]; if (!am) return; + if (act === "token_help") { + h += ""; + } else { + h += ""; + } + }); + h += "
"; + } + h += "
"; + }); + h += "
"; + card.innerHTML = h; card.style.display = ""; } catch(e) {} } +// Execute a research-robot action (dispatch/pause/resume) and refresh the panel +async function researchRobotAction(action, job, btn) { + if (btn) { btn.disabled = true; btn.style.opacity = "0.5"; } + try { + var token = (window.loadToken ? window.loadToken() : "") || ""; + var resp = await fetch("/api/research-robot/action", { + method: "POST", + headers: { "Authorization": "Bearer " + token, "Content-Type": "application/json" }, + body: JSON.stringify({ action: action, job: job }) + }); + var d = await resp.json(); + if (d.success) { + if (typeof showToast === "function") showToast("Aktion ausgeführt", d.message || (action + " für " + job)); + } else { + if (typeof showToast === "function") showToast("Fehler", d.error || "Aktion fehlgeschlagen", true); + } + } catch(e) { + if (typeof showToast === "function") showToast("Fehler", e.message, true); + } + setTimeout(loadResearchRobot, 600); +} + +// Show how to set a missing API token (e.g. FLEXOPTIX_API_TOKEN) +function showTokenHelp(job) { + var isFlexoptix = /flexoptix/i.test(job); + var msg; + if (isFlexoptix) { + msg = "Der Job '" + job + "' ruft die Flexoptix REST-API auf und bekommt HTTP 401 (kein gültiger Token).\n\n" + + "So setzt du den Token:\n" + + "1. Im Flexoptix-Portal einen REST-API-Token erzeugen (Account → API).\n" + + "2. Auf Erik in /opt/tip/.env setzen: FLEXOPTIX_API_TOKEN=\n" + + "3. API neu starten: pm2 restart tip-api\n\n" + + "Bis der Token gesetzt ist, kannst du den Job pausieren, damit er nicht alle 15 Min erneut fehlschlägt."; + } else { + msg = "Der Job '" + job + "' wird von der Quelle mit 401/403 abgewiesen — meist fehlt ein API-Token oder er ist abgelaufen.\n\n" + + "Token in /opt/tip/.env setzen und 'pm2 restart tip-api'. Bis dahin Job pausieren."; + } + if (typeof showModal === "function") { showModal("Token-Anleitung", "
" + esc(msg) + "
"); } + else { alert(msg); } +} + async function loadProcurementPulse() { var pulse = el('ov-proc-pulse'); var moversCard = el('ov-movers-card');