import { mkdtempSync, readFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { DatabaseSync } from "node:sqlite"; import path from "node:path"; import { describe, expect, test, vi } from "vitest"; import { ArtifactManager } from "../src/artifacts.js"; import { loadConfig } from "../src/config.js"; import { AgentROrchestrator } from "../src/orchestrator.js"; import { strategyPlanSchema } from "../src/schema-catalog.js"; import { RunStore } from "../src/store.js"; import type { AgentInvocation, AgentInvocationResult, ProgressEvent, RawAgentRunner } from "../src/types.js"; class ScriptedRunner implements RawAgentRunner { public readonly prompts: string[] = []; public readonly progressEvents: ProgressEvent[] = []; constructor( private readonly outputs: string[], ) {} async invoke(request: AgentInvocation): Promise> { const next = this.outputs.shift(); if (!next) { throw new Error("No scripted output remaining."); } this.prompts.push(request.prompt); readFileSync(request.artifacts.schemaPath, "utf8"); if (request.onProgress) { const progressEvent: ProgressEvent = { ts: new Date().toISOString(), runId: request.runId, source: request.role, kind: "turn.started", message: "Started scripted turn.", payload: {}, }; this.progressEvents.push(progressEvent); await request.onProgress(progressEvent); } const parsed = JSON.parse(next) as Record; if (typeof parsed.taskId === "string" && !parsed.taskId) { const match = request.prompt.match(/exact task id `([^`]+)`/); if (match) { parsed.taskId = match[1]; } } return { sessionId: request.sessionId ?? `session-${this.prompts.length}`, output: parsed as T, rawMessage: JSON.stringify(parsed), rawEvents: ['{"type":"turn.completed"}'], stderr: "", artifacts: request.artifacts, }; } } class DeferredRunner implements RawAgentRunner { private pending: | { request: AgentInvocation; resolve: (value: AgentInvocationResult) => void; } | null = null; async invoke(request: AgentInvocation): Promise> { return await new Promise>((resolve) => { this.pending = { request: request as AgentInvocation, resolve: resolve as (value: AgentInvocationResult) => void, }; }); } complete(output: Record): void { if (!this.pending) { throw new Error("No pending invocation to complete."); } const { request, resolve } = this.pending; this.pending = null; resolve({ sessionId: request.sessionId ?? "session-1", output, rawMessage: JSON.stringify(output), rawEvents: ['{"type":"turn.completed"}'], stderr: "", artifacts: request.artifacts, }); } } describe("AgentROrchestrator", () => { test("runs through plan, implementation, self-check, and independent check", async () => { const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-orchestrator-")); const config = loadConfig(repoPath); const store = new RunStore(config.databasePath); const artifacts = new ArtifactManager(config.runsDir); const runner = new ScriptedRunner([ JSON.stringify({ decision: "continue", summary: "Implement the CLI skeleton.", rationale: "The project is empty and needs a first vertical slice.", goalProgress: "No implementation exists yet.", risks: [], tasks: [ { title: "Create CLI entrypoint", objective: "Build the initial command surface.", acceptanceCriteria: ["A run command exists."], verificationSteps: ["Run the help command."], allowedPaths: ["src", "package.json"], runtimeClass: "short", }, ], blockedReason: null, blockerClass: null, requiredInputs: [], fallbackTasks: [], }), JSON.stringify({ taskId: "", status: "completed", summary: "Added the CLI skeleton.", changes: ["Created an entrypoint."], verification: [ { command: "agent-r --help", outcome: "passed", details: "Help output rendered.", category: "test", artifacts: [], metrics: [], }, ], followUps: [], touchedFiles: ["src/index.ts", "package.json"], blockers: [], blockerClass: null, requiredInputs: [], fallbackTasks: [], }), JSON.stringify({ decision: "done", summary: "The requested slice is complete.", rationale: "The only planned task is complete and verified.", goalProgress: "The vertical slice exists.", risks: [], tasks: [], blockedReason: null, blockerClass: null, requiredInputs: [], fallbackTasks: [], }), JSON.stringify({ readyForIndependentCheck: true, summary: "The run is ready for independent review.", rationale: "Implementation and verification are present.", evidence: ["CLI entrypoint exists."], remainingGaps: [], }), JSON.stringify({ verdict: "approved", summary: "The goal is satisfied.", rationale: "The requested slice exists and is verified.", evidence: ["CLI entrypoint present."], remainingTasks: [], }), ]); const orchestrator = new AgentROrchestrator(config, store, artifacts, runner); const run = orchestrator.createRun("Create a CLI skeleton"); const firstStatus = await orchestrator.runUntilStable(run.id, 10); const snapshot = store.buildSnapshot(run.id); const db = new DatabaseSync(config.databasePath); const checkpointRow = db .prepare("SELECT payload_json FROM checkpoints WHERE run_id = ? ORDER BY rowid DESC LIMIT 1") .get(run.id) as { payload_json: string }; const checkpoint = JSON.parse(checkpointRow.payload_json) as Record; expect(firstStatus.status).toBe("done"); expect(snapshot.completedTasks).toHaveLength(1); expect(snapshot.approvals).toHaveLength(2); expect(snapshot.pendingTasks).toHaveLength(0); expect(runner.prompts.at(1)).toContain("You must echo the exact task id"); expect(snapshot.run.waitingInputs).toEqual([]); expect(checkpoint.counts).toMatchObject({ completed: 1, pending: 0, }); expect(checkpoint).not.toHaveProperty("artifacts.0.path"); expect(checkpoint).not.toHaveProperty("recentEvents.0.payload"); }); test("strategy prompt explicitly permits read-only repository inspection", async () => { const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-strategy-")); const config = loadConfig(repoPath); const store = new RunStore(config.databasePath); const artifacts = new ArtifactManager(config.runsDir); const runner = new ScriptedRunner([ JSON.stringify({ decision: "blocked", summary: "Cannot continue.", rationale: "Test stop.", goalProgress: "None.", risks: [], tasks: [], blockedReason: "Stop immediately.", blockerClass: "contradiction", requiredInputs: [], fallbackTasks: [], }), ]); const orchestrator = new AgentROrchestrator(config, store, artifacts, runner); const run = orchestrator.createRun("Inspect repo"); await orchestrator.runUntilStable(run.id, 1); expect(runner.prompts[0]).toContain("You may inspect the repository directly"); expect(runner.prompts[0]).toContain("Do not edit files"); }); test("strategy plan schema requires blockedReason and accepts null", () => { const valid = JSON.parse( JSON.stringify({ decision: "continue", summary: "Keep going.", rationale: "Work remains.", goalProgress: "Partial.", risks: [], tasks: [], blockedReason: null, blockerClass: null, requiredInputs: [], fallbackTasks: [], }), ) as Record; expect(strategyPlanSchema.required).toContain("blockedReason"); expect(strategyPlanSchema.required).toContain("blockerClass"); expect(strategyPlanSchema.properties.blockedReason.type).toEqual(["string", "null"]); expect(valid.blockedReason).toBeNull(); }); test("moves to waiting_input when implementation reports missing external input", async () => { const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-waiting-input-")); const config = loadConfig(repoPath); const store = new RunStore(config.databasePath); const artifacts = new ArtifactManager(config.runsDir); const runner = new ScriptedRunner([ JSON.stringify({ decision: "continue", summary: "Need a hardware proof task.", rationale: "Work remains.", goalProgress: "Partial.", risks: [], tasks: [ { title: "Run hardware proof", objective: "Execute the physical node proof.", acceptanceCriteria: ["Proof succeeds."], verificationSteps: ["Run the hardware script."], allowedPaths: ["scripts/**"], runtimeClass: "long", }, ], blockedReason: null, blockerClass: null, requiredInputs: [], fallbackTasks: [], }), JSON.stringify({ taskId: "", status: "blocked", summary: "Waiting for real hardware.", changes: [], verification: [ { command: "scripts/hardware-proof.sh", outcome: "not_run", details: "No accessible physical node was available.", category: "proof", artifacts: [], metrics: [], }, ], followUps: [], touchedFiles: [], blockers: ["No accessible physical node."], blockerClass: "external_resource", requiredInputs: ["Reachable physical node with ISO boot path"], fallbackTasks: [], }), ]); const orchestrator = new AgentROrchestrator(config, store, artifacts, runner); const run = orchestrator.createRun("Prove the hardware path"); const finalRun = await orchestrator.runUntilStable(run.id, 5); const snapshot = store.buildSnapshot(run.id); expect(finalRun.status).toBe("waiting_input"); expect(snapshot.run.waitingInputs).toEqual(["Reachable physical node with ISO boot path"]); expect(snapshot.blockedTasks).toHaveLength(1); }); test("emits heartbeat only after five minutes of inactivity", async () => { vi.useFakeTimers(); try { const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-heartbeat-")); const config = loadConfig(repoPath); const store = new RunStore(config.databasePath); const artifacts = new ArtifactManager(config.runsDir); const runner = new DeferredRunner(); const progressEvents: ProgressEvent[] = []; const orchestrator = new AgentROrchestrator(config, store, artifacts, runner, (event) => { progressEvents.push(event); }); const run = orchestrator.createRun("Wait on a long-running strategy turn"); const runPromise = orchestrator.runUntilStable(run.id, 1); await vi.advanceTimersByTimeAsync(299_999); expect(progressEvents.filter((event) => event.kind === "heartbeat")).toHaveLength(0); await vi.advanceTimersByTimeAsync(1); expect(progressEvents.filter((event) => event.kind === "heartbeat")).toHaveLength(1); runner.complete({ decision: "blocked", summary: "Still waiting.", rationale: "Synthetic test stop.", goalProgress: "None.", risks: [], tasks: [], blockedReason: "Stop after heartbeat.", blockerClass: "tool_failure", requiredInputs: [], fallbackTasks: [], }); const finalRun = await runPromise; expect(finalRun.status).toBe("blocked"); } finally { vi.useRealTimers(); } }); });