transceiver-db/packages/api/src/routes/selflearning.ts

import { execFile } from "child_process";
import { existsSync, readFileSync } from "fs";
import { join } from "path";
import { promisify } from "util";
import { Router, Request, Response } from "express";

const execFileAsync = promisify(execFile);
export const selflearningRouter = Router();

type Lane = "tip_llm" | "blog_llm";
type Provider = "runpod" | "local";

const repoRoot = join(__dirname, "..", "..", "..", "..");
const manifestPath = join(repoRoot, "training-data", "runpod", "manifest.json");

function isLane(value: unknown): value is Lane {
  return value === "tip_llm" || value === "blog_llm";
}

function readManifest() {
  if (!existsSync(manifestPath)) return null;
  return JSON.parse(readFileSync(manifestPath, "utf8"));
}

async function runCommand(command: string, args: string[], timeoutMs = 20 * 60 * 1000) {
  const { stdout, stderr } = await execFileAsync(command, args, {
    cwd: repoRoot,
    timeout: timeoutMs,
    maxBuffer: 20 * 1024 * 1024,
    env: process.env,
  });
  return { stdout, stderr };
}

async function keychain(service: string): Promise<string | null> {
  if (process.platform !== "darwin") return null;
  try {
    const { stdout } = await execFileAsync("security", ["find-generic-password", "-s", service, "-w"], { timeout: 5000 });
    return stdout.trim() || null;
  } catch {
    return null;
  }
}

async function secret(envNames: string[], services: string[]): Promise<string | null> {
  for (const name of envNames) if (process.env[name]) return process.env[name] as string;
  for (const service of services) {
    const value = await keychain(service);
    if (value) return value;
  }
  return null;
}

function dataset(lane: Lane): string {
  return lane === "tip_llm"
    ? process.env.TIP_HF_DATASET_TIP_LLM || "renefichtmueller/tip-llm-sft"
    : process.env.TIP_HF_DATASET_BLOG_LLM || "renefichtmueller/blog-llm-sft";
}

function modelRepo(lane: Lane, runId: string): string {
  const fallback = lane === "tip_llm" ? "renefichtmueller/TIP_LLM" : "renefichtmueller/Blog_LLM";
  const base = lane === "tip_llm" ? process.env.TIP_HF_MODEL_TIP_LLM : process.env.TIP_HF_MODEL_BLOG_LLM;
  return `${base || fallback}-${runId}`;
}

function runpodInput(lane: Lane, seedOnly: boolean, maxSteps: number, runId: string, hfToken: string) {
  return {
    user_id: "tip-selflearning",
    model_id: `${lane}-${runId}`,
    run_id: runId,
    credentials: { hf_token: hfToken },
    args: {
      base_model: process.env.TIP_RUNPOD_BASE_MODEL || "Qwen/Qwen2.5-Coder-7B-Instruct",
      model_type: "AutoModelForCausalLM",
      tokenizer_type: "AutoTokenizer",
      load_in_4bit: true,
      strict: false,
      datasets: [{ path: dataset(lane), type: "chat_template", split: "train" }],
      val_set_size: 0.02,
      output_dir: `/workspace/outputs/${lane}-${runId}`,
      sequence_len: lane === "blog_llm" ? 4096 : 3072,
      sample_packing: true,
      eval_sample_packing: false,
      pad_to_sequence_len: true,
      adapter: "qlora",
      lora_r: lane === "blog_llm" ? 48 : 32,
      lora_alpha: lane === "blog_llm" ? 96 : 64,
      lora_dropout: 0.05,
      lora_target_linear: true,
      lora_modules_to_save: ["embed_tokens", "lm_head"],
      gradient_accumulation_steps: 2,
      micro_batch_size: 1,
      num_epochs: seedOnly ? 1 : lane === "blog_llm" ? 2 : 3,
      optimizer: "adamw_torch_fused",
      lr_scheduler: "cosine",
      learning_rate: lane === "blog_llm" ? 0.00016 : 0.00018,
      train_on_inputs: false,
      bf16: "auto",
      tf32: true,
      gradient_checkpointing: true,
      flash_attention: true,
      logging_steps: 5,
      warmup_steps: 10,
      evals_per_epoch: 1,
      save_steps: seedOnly ? 50 : 250,
      max_steps: maxSteps,
      push_to_hub: !seedOnly,
      hub_model_id: modelRepo(lane, runId),
      hub_strategy: "end",
      hub_private_repo: true,
      hf_use_auth_token: true,
      special_tokens: { pad_token: "<|endoftext|>" },
    },
  };
}

selflearningRouter.get("/status", async (_req: Request, res: Response) => {
  const runpodEndpoint = process.env.TIP_RUNPOD_ENDPOINT_ID || process.env.RUNPOD_ENDPOINT_ID || null;
  const runpodToken = await secret(["RUNPOD_API_KEY", "TIP_RUNPOD_API_KEY"], ["magatama.runpod.api", "tip.runpod.api"]);
  const hfToken = await secret(["HF_TOKEN", "HUGGINGFACE_TOKEN"], ["magatama.huggingface.token", "tip.huggingface.token"]);
  res.json({
    success: true,
    manifest: readManifest(),
    lanes: {
      tip_llm: { dataset: dataset("tip_llm"), target_model_prefix: process.env.TIP_HF_MODEL_TIP_LLM || "renefichtmueller/TIP_LLM" },
      blog_llm: { dataset: dataset("blog_llm"), target_model_prefix: process.env.TIP_HF_MODEL_BLOG_LLM || "renefichtmueller/Blog_LLM" },
    },
    runpod: { endpoint_configured: Boolean(runpodEndpoint), api_key_configured: Boolean(runpodToken) },
    huggingface: { token_configured: Boolean(hfToken) },
    local: { command: process.env.TIP_LOCAL_TRAIN_COMMAND || "not configured", ready: Boolean(process.env.TIP_LOCAL_TRAIN_COMMAND) },
  });
});

selflearningRouter.post("/build", async (_req: Request, res: Response) => {
  try {
    const out = await runCommand("npm", ["run", "learning-pool:build"]);
    res.json({ success: true, manifest: readManifest(), stdout: out.stdout.slice(-4000), stderr: out.stderr.slice(-4000) });
  } catch (err) {
    res.status(500).json({ success: false, error: String(err) });
  }
});

selflearningRouter.post("/publish-hf", async (_req: Request, res: Response) => {
  try {
    if (!readManifest()) await runCommand("npm", ["run", "learning-pool:build"]);
    const out = await runCommand("npm", ["run", "learning-pool:publish-hf"], 30 * 60 * 1000);
    res.json({ success: true, stdout: out.stdout.slice(-6000), stderr: out.stderr.slice(-4000) });
  } catch (err) {
    res.status(500).json({ success: false, error: String(err) });
  }
});

selflearningRouter.post("/train", async (req: Request, res: Response) => {
  const lane = req.body?.lane;
  const provider = (req.body?.provider || "runpod") as Provider;
  const seedOnly = req.body?.seed_only !== false;
  const maxSteps = Number(req.body?.max_steps || (seedOnly ? 200 : 2000));
  if (!isLane(lane)) {
    res.status(400).json({ success: false, error: "lane must be tip_llm or blog_llm" });
    return;
  }
  if (provider === "local") {
    const command = process.env.TIP_LOCAL_TRAIN_COMMAND;
    if (!command) {
      res.status(409).json({ success: false, error: "Local training command is not configured.", suggestion: "Set TIP_LOCAL_TRAIN_COMMAND; the lane name is appended automatically." });
      return;
    }
    try {
      const out = await runCommand("bash", [command, lane], 12 * 60 * 60 * 1000);
      res.json({ success: true, provider, lane, stdout: out.stdout.slice(-6000), stderr: out.stderr.slice(-4000) });
    } catch (err: unknown) {
      // execFileAsync throws on non-zero exit — stdout/stderr are still on the error object
      const e = err as { stdout?: string; stderr?: string; message?: string };
      const stdout = (e.stdout ?? "").slice(-6000);
      const stderr = (e.stderr ?? "").slice(-4000);
      res.json({
        success: false,
        provider,
        lane,
        error: e.message ?? String(err),
        stdout,
        stderr,
      });
    }
    return;
  }
  const endpoint = process.env.TIP_RUNPOD_ENDPOINT_ID || process.env.RUNPOD_ENDPOINT_ID;
  const runpodToken = await secret(["RUNPOD_API_KEY", "TIP_RUNPOD_API_KEY"], ["magatama.runpod.api", "tip.runpod.api"]);
  const hfToken = await secret(["HF_TOKEN", "HUGGINGFACE_TOKEN"], ["magatama.huggingface.token", "tip.huggingface.token"]);
  if (!endpoint || !runpodToken || !hfToken) {
    res.status(409).json({ success: false, error: "RunPod/Hugging Face credentials are incomplete.", runpod_endpoint_configured: Boolean(endpoint), runpod_api_key_configured: Boolean(runpodToken), hf_token_configured: Boolean(hfToken) });
    return;
  }
  try {
    if (!readManifest()) await runCommand("npm", ["run", "learning-pool:build"]);
    const runId = `${lane}-v${new Date().toISOString().replace(/[-:T.Z]/g, "").slice(0, 12)}`;
    const input = runpodInput(lane, seedOnly, maxSteps, runId, hfToken);
    const response = await fetch(`https://api.runpod.ai/v2/${endpoint}/run`, {
      method: "POST",
      headers: { "Content-Type": "application/json", Authorization: `Bearer ${runpodToken}` },
      body: JSON.stringify({ input, policy: { executionTimeout: Number(process.env.TIP_RUNPOD_EXECUTION_TIMEOUT || 12 * 60 * 60), ttl: Number(process.env.TIP_RUNPOD_JOB_TTL || 24 * 60 * 60) } }),
    });
    const body = await response.json().catch(() => ({}));
    if (!response.ok) {
      res.status(response.status).json({ success: false, error: "RunPod request failed", details: body });
      return;
    }
    res.json({ success: true, provider, lane, seed_only: seedOnly, run_id: runId, dataset: dataset(lane), target_model: input.args.hub_model_id, runpod: body });
  } catch (err) {
    res.status(500).json({ success: false, error: String(err) });
  }
});