agent-r/test/orchestrator.test.ts

import { mkdtempSync, readFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { DatabaseSync } from "node:sqlite";
import path from "node:path";
import { describe, expect, test, vi } from "vitest";
import { ArtifactManager } from "../src/artifacts.js";
import { loadConfig } from "../src/config.js";
import { AgentROrchestrator } from "../src/orchestrator.js";
import { strategyPlanSchema } from "../src/schema-catalog.js";
import { RunStore } from "../src/store.js";
import type { AgentInvocation, AgentInvocationResult, ProgressEvent, RawAgentRunner } from "../src/types.js";

class ScriptedRunner implements RawAgentRunner {
  public readonly prompts: string[] = [];
  public readonly progressEvents: ProgressEvent[] = [];

  constructor(
    private readonly outputs: string[],
  ) {}

  async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
    const next = this.outputs.shift();
    if (!next) {
      throw new Error("No scripted output remaining.");
    }

    this.prompts.push(request.prompt);
    readFileSync(request.artifacts.schemaPath, "utf8");
    if (request.onProgress) {
      const progressEvent: ProgressEvent = {
        ts: new Date().toISOString(),
        runId: request.runId,
        source: request.role,
        kind: "turn.started",
        message: "Started scripted turn.",
        payload: {},
      };
      this.progressEvents.push(progressEvent);
      await request.onProgress(progressEvent);
    }
    const parsed = JSON.parse(next) as Record<string, unknown>;
    if (typeof parsed.taskId === "string" && !parsed.taskId) {
      const match = request.prompt.match(/exact task id `([^`]+)`/);
      if (match) {
        parsed.taskId = match[1];
      }
    }
    return {
      sessionId: request.sessionId ?? `session-${this.prompts.length}`,
      output: parsed as T,
      rawMessage: JSON.stringify(parsed),
      rawEvents: ['{"type":"turn.completed"}'],
      stderr: "",
      artifacts: request.artifacts,
    };
  }
}

class DeferredRunner implements RawAgentRunner {
  private pending:
    | {
        request: AgentInvocation<unknown>;
        resolve: (value: AgentInvocationResult<unknown>) => void;
      }
    | null = null;

  async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
    return await new Promise<AgentInvocationResult<T>>((resolve) => {
      this.pending = {
        request: request as AgentInvocation<unknown>,
        resolve: resolve as (value: AgentInvocationResult<unknown>) => void,
      };
    });
  }

  complete(output: Record<string, unknown>): void {
    if (!this.pending) {
      throw new Error("No pending invocation to complete.");
    }

    const { request, resolve } = this.pending;
    this.pending = null;
    resolve({
      sessionId: request.sessionId ?? "session-1",
      output,
      rawMessage: JSON.stringify(output),
      rawEvents: ['{"type":"turn.completed"}'],
      stderr: "",
      artifacts: request.artifacts,
    });
  }
}

describe("AgentROrchestrator", () => {
  test("runs through plan, implementation, self-check, and independent check", async () => {
    const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-orchestrator-"));
    const config = loadConfig(repoPath);
    const store = new RunStore(config.databasePath);
    const artifacts = new ArtifactManager(config.runsDir);
    const runner = new ScriptedRunner([
      JSON.stringify({
        decision: "continue",
        summary: "Implement the CLI skeleton.",
        rationale: "The project is empty and needs a first vertical slice.",
        goalProgress: "No implementation exists yet.",
        risks: [],
        tasks: [
          {
            title: "Create CLI entrypoint",
            objective: "Build the initial command surface.",
            acceptanceCriteria: ["A run command exists."],
            verificationSteps: ["Run the help command."],
            allowedPaths: ["src", "package.json"],
            runtimeClass: "short",
          },
        ],
        blockedReason: null,
        blockerClass: null,
        requiredInputs: [],
        fallbackTasks: [],
      }),
      JSON.stringify({
        taskId: "",
        status: "completed",
        summary: "Added the CLI skeleton.",
        changes: ["Created an entrypoint."],
        verification: [
          {
            command: "agent-r --help",
            outcome: "passed",
            details: "Help output rendered.",
            category: "test",
            artifacts: [],
            metrics: [],
          },
        ],
        followUps: [],
        touchedFiles: ["src/index.ts", "package.json"],
        blockers: [],
        blockerClass: null,
        requiredInputs: [],
        fallbackTasks: [],
      }),
      JSON.stringify({
        decision: "done",
        summary: "The requested slice is complete.",
        rationale: "The only planned task is complete and verified.",
        goalProgress: "The vertical slice exists.",
        risks: [],
        tasks: [],
        blockedReason: null,
        blockerClass: null,
        requiredInputs: [],
        fallbackTasks: [],
      }),
      JSON.stringify({
        readyForIndependentCheck: true,
        summary: "The run is ready for independent review.",
        rationale: "Implementation and verification are present.",
        evidence: ["CLI entrypoint exists."],
        remainingGaps: [],
      }),
      JSON.stringify({
        verdict: "approved",
        summary: "The goal is satisfied.",
        rationale: "The requested slice exists and is verified.",
        evidence: ["CLI entrypoint present."],
        remainingTasks: [],
      }),
    ]);

    const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
    const run = orchestrator.createRun("Create a CLI skeleton");

    const firstStatus = await orchestrator.runUntilStable(run.id, 10);
    const snapshot = store.buildSnapshot(run.id);
    const db = new DatabaseSync(config.databasePath);
    const checkpointRow = db
      .prepare("SELECT payload_json FROM checkpoints WHERE run_id = ? ORDER BY rowid DESC LIMIT 1")
      .get(run.id) as { payload_json: string };
    const checkpoint = JSON.parse(checkpointRow.payload_json) as Record<string, unknown>;

    expect(firstStatus.status).toBe("done");
    expect(snapshot.completedTasks).toHaveLength(1);
    expect(snapshot.approvals).toHaveLength(2);
    expect(snapshot.pendingTasks).toHaveLength(0);
    expect(runner.prompts.at(1)).toContain("You must echo the exact task id");
    expect(snapshot.run.waitingInputs).toEqual([]);
    expect(checkpoint.counts).toMatchObject({
      completed: 1,
      pending: 0,
    });
    expect(checkpoint).not.toHaveProperty("artifacts.0.path");
    expect(checkpoint).not.toHaveProperty("recentEvents.0.payload");
  });

  test("strategy prompt explicitly permits read-only repository inspection", async () => {
    const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-strategy-"));
    const config = loadConfig(repoPath);
    const store = new RunStore(config.databasePath);
    const artifacts = new ArtifactManager(config.runsDir);
    const runner = new ScriptedRunner([
      JSON.stringify({
        decision: "blocked",
        summary: "Cannot continue.",
        rationale: "Test stop.",
        goalProgress: "None.",
        risks: [],
        tasks: [],
        blockedReason: "Stop immediately.",
        blockerClass: "contradiction",
        requiredInputs: [],
        fallbackTasks: [],
      }),
    ]);

    const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
    const run = orchestrator.createRun("Inspect repo");
    await orchestrator.runUntilStable(run.id, 1);

    expect(runner.prompts[0]).toContain("You may inspect the repository directly");
    expect(runner.prompts[0]).toContain("Do not edit files");
  });

  test("strategy plan schema requires blockedReason and accepts null", () => {
    const valid = JSON.parse(
      JSON.stringify({
        decision: "continue",
        summary: "Keep going.",
        rationale: "Work remains.",
        goalProgress: "Partial.",
        risks: [],
        tasks: [],
        blockedReason: null,
        blockerClass: null,
        requiredInputs: [],
        fallbackTasks: [],
      }),
    ) as Record<string, unknown>;

    expect(strategyPlanSchema.required).toContain("blockedReason");
    expect(strategyPlanSchema.required).toContain("blockerClass");
    expect(strategyPlanSchema.properties.blockedReason.type).toEqual(["string", "null"]);
    expect(valid.blockedReason).toBeNull();
  });

  test("moves to waiting_input when implementation reports missing external input", async () => {
    const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-waiting-input-"));
    const config = loadConfig(repoPath);
    const store = new RunStore(config.databasePath);
    const artifacts = new ArtifactManager(config.runsDir);
    const runner = new ScriptedRunner([
      JSON.stringify({
        decision: "continue",
        summary: "Need a hardware proof task.",
        rationale: "Work remains.",
        goalProgress: "Partial.",
        risks: [],
        tasks: [
          {
            title: "Run hardware proof",
            objective: "Execute the physical node proof.",
            acceptanceCriteria: ["Proof succeeds."],
            verificationSteps: ["Run the hardware script."],
            allowedPaths: ["scripts/**"],
            runtimeClass: "long",
          },
        ],
        blockedReason: null,
        blockerClass: null,
        requiredInputs: [],
        fallbackTasks: [],
      }),
      JSON.stringify({
        taskId: "",
        status: "blocked",
        summary: "Waiting for real hardware.",
        changes: [],
        verification: [
          {
            command: "scripts/hardware-proof.sh",
            outcome: "not_run",
            details: "No accessible physical node was available.",
            category: "proof",
            artifacts: [],
            metrics: [],
          },
        ],
        followUps: [],
        touchedFiles: [],
        blockers: ["No accessible physical node."],
        blockerClass: "external_resource",
        requiredInputs: ["Reachable physical node with ISO boot path"],
        fallbackTasks: [],
      }),
    ]);

    const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
    const run = orchestrator.createRun("Prove the hardware path");
    const finalRun = await orchestrator.runUntilStable(run.id, 5);
    const snapshot = store.buildSnapshot(run.id);

    expect(finalRun.status).toBe("waiting_input");
    expect(snapshot.run.waitingInputs).toEqual(["Reachable physical node with ISO boot path"]);
    expect(snapshot.blockedTasks).toHaveLength(1);
  });

  test("emits heartbeat only after five minutes of inactivity", async () => {
    vi.useFakeTimers();

    try {
      const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-heartbeat-"));
      const config = loadConfig(repoPath);
      const store = new RunStore(config.databasePath);
      const artifacts = new ArtifactManager(config.runsDir);
      const runner = new DeferredRunner();
      const progressEvents: ProgressEvent[] = [];
      const orchestrator = new AgentROrchestrator(config, store, artifacts, runner, (event) => {
        progressEvents.push(event);
      });

      const run = orchestrator.createRun("Wait on a long-running strategy turn");
      const runPromise = orchestrator.runUntilStable(run.id, 1);

      await vi.advanceTimersByTimeAsync(299_999);
      expect(progressEvents.filter((event) => event.kind === "heartbeat")).toHaveLength(0);

      await vi.advanceTimersByTimeAsync(1);
      expect(progressEvents.filter((event) => event.kind === "heartbeat")).toHaveLength(1);

      runner.complete({
        decision: "blocked",
        summary: "Still waiting.",
        rationale: "Synthetic test stop.",
        goalProgress: "None.",
        risks: [],
        tasks: [],
        blockedReason: "Stop after heartbeat.",
        blockerClass: "tool_failure",
        requiredInputs: [],
        fallbackTasks: [],
      });

      const finalRun = await runPromise;
      expect(finalRun.status).toBe("blocked");
    } finally {
      vi.useRealTimers();
    }
  });
});