agent-r/test/orchestrator.test.ts
2026-04-11 17:06:06 +09:00

260 lines
9 KiB
TypeScript

import { mkdtempSync, readFileSync } from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { describe, expect, test } from "vitest";
import { ArtifactManager } from "../src/artifacts.js";
import { loadConfig } from "../src/config.js";
import { AgentROrchestrator } from "../src/orchestrator.js";
import { strategyPlanSchema } from "../src/schema-catalog.js";
import { RunStore } from "../src/store.js";
import type { AgentInvocation, AgentInvocationResult, ProgressEvent, RawAgentRunner } from "../src/types.js";
class ScriptedRunner implements RawAgentRunner {
public readonly prompts: string[] = [];
public readonly progressEvents: ProgressEvent[] = [];
constructor(
private readonly outputs: string[],
) {}
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
const next = this.outputs.shift();
if (!next) {
throw new Error("No scripted output remaining.");
}
this.prompts.push(request.prompt);
readFileSync(request.artifacts.schemaPath, "utf8");
if (request.onProgress) {
const progressEvent: ProgressEvent = {
ts: new Date().toISOString(),
runId: request.runId,
source: request.role,
kind: "turn.started",
message: "Started scripted turn.",
payload: {},
};
this.progressEvents.push(progressEvent);
await request.onProgress(progressEvent);
}
const parsed = JSON.parse(next) as Record<string, unknown>;
if (typeof parsed.taskId === "string" && !parsed.taskId) {
const match = request.prompt.match(/exact task id `([^`]+)`/);
if (match) {
parsed.taskId = match[1];
}
}
return {
sessionId: request.sessionId ?? `session-${this.prompts.length}`,
output: parsed as T,
rawMessage: JSON.stringify(parsed),
rawEvents: ['{"type":"turn.completed"}'],
stderr: "",
artifacts: request.artifacts,
};
}
}
describe("AgentROrchestrator", () => {
test("runs through plan, implementation, self-check, and independent check", async () => {
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-orchestrator-"));
const config = loadConfig(repoPath);
const store = new RunStore(config.databasePath);
const artifacts = new ArtifactManager(config.runsDir);
const runner = new ScriptedRunner([
JSON.stringify({
decision: "continue",
summary: "Implement the CLI skeleton.",
rationale: "The project is empty and needs a first vertical slice.",
goalProgress: "No implementation exists yet.",
risks: [],
tasks: [
{
title: "Create CLI entrypoint",
objective: "Build the initial command surface.",
acceptanceCriteria: ["A run command exists."],
verificationSteps: ["Run the help command."],
allowedPaths: ["src", "package.json"],
runtimeClass: "short",
},
],
blockedReason: null,
blockerClass: null,
requiredInputs: [],
fallbackTasks: [],
}),
JSON.stringify({
taskId: "",
status: "completed",
summary: "Added the CLI skeleton.",
changes: ["Created an entrypoint."],
verification: [
{
command: "agent-r --help",
outcome: "passed",
details: "Help output rendered.",
category: "test",
artifacts: [],
metrics: {},
},
],
followUps: [],
touchedFiles: ["src/index.ts", "package.json"],
blockers: [],
blockerClass: null,
requiredInputs: [],
fallbackTasks: [],
}),
JSON.stringify({
decision: "done",
summary: "The requested slice is complete.",
rationale: "The only planned task is complete and verified.",
goalProgress: "The vertical slice exists.",
risks: [],
tasks: [],
blockedReason: null,
blockerClass: null,
requiredInputs: [],
fallbackTasks: [],
}),
JSON.stringify({
readyForIndependentCheck: true,
summary: "The run is ready for independent review.",
rationale: "Implementation and verification are present.",
evidence: ["CLI entrypoint exists."],
remainingGaps: [],
}),
JSON.stringify({
verdict: "approved",
summary: "The goal is satisfied.",
rationale: "The requested slice exists and is verified.",
evidence: ["CLI entrypoint present."],
remainingTasks: [],
}),
]);
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
const run = orchestrator.createRun("Create a CLI skeleton");
const firstStatus = await orchestrator.runUntilStable(run.id, 10);
const snapshot = store.buildSnapshot(run.id);
expect(firstStatus.status).toBe("done");
expect(snapshot.completedTasks).toHaveLength(1);
expect(snapshot.approvals).toHaveLength(2);
expect(snapshot.pendingTasks).toHaveLength(0);
expect(runner.prompts.at(1)).toContain("You must echo the exact task id");
expect(snapshot.run.waitingInputs).toEqual([]);
});
test("strategy prompt explicitly permits read-only repository inspection", async () => {
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-strategy-"));
const config = loadConfig(repoPath);
const store = new RunStore(config.databasePath);
const artifacts = new ArtifactManager(config.runsDir);
const runner = new ScriptedRunner([
JSON.stringify({
decision: "blocked",
summary: "Cannot continue.",
rationale: "Test stop.",
goalProgress: "None.",
risks: [],
tasks: [],
blockedReason: "Stop immediately.",
blockerClass: "contradiction",
requiredInputs: [],
fallbackTasks: [],
}),
]);
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
const run = orchestrator.createRun("Inspect repo");
await orchestrator.runUntilStable(run.id, 1);
expect(runner.prompts[0]).toContain("You may inspect the repository directly");
expect(runner.prompts[0]).toContain("Do not edit files");
});
test("strategy plan schema requires blockedReason and accepts null", () => {
const valid = JSON.parse(
JSON.stringify({
decision: "continue",
summary: "Keep going.",
rationale: "Work remains.",
goalProgress: "Partial.",
risks: [],
tasks: [],
blockedReason: null,
blockerClass: null,
requiredInputs: [],
fallbackTasks: [],
}),
) as Record<string, unknown>;
expect(strategyPlanSchema.required).toContain("blockedReason");
expect(strategyPlanSchema.required).toContain("blockerClass");
expect(strategyPlanSchema.properties.blockedReason.type).toEqual(["string", "null"]);
expect(valid.blockedReason).toBeNull();
});
test("moves to waiting_input when implementation reports missing external input", async () => {
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-waiting-input-"));
const config = loadConfig(repoPath);
const store = new RunStore(config.databasePath);
const artifacts = new ArtifactManager(config.runsDir);
const runner = new ScriptedRunner([
JSON.stringify({
decision: "continue",
summary: "Need a hardware proof task.",
rationale: "Work remains.",
goalProgress: "Partial.",
risks: [],
tasks: [
{
title: "Run hardware proof",
objective: "Execute the physical node proof.",
acceptanceCriteria: ["Proof succeeds."],
verificationSteps: ["Run the hardware script."],
allowedPaths: ["scripts/**"],
runtimeClass: "long",
},
],
blockedReason: null,
blockerClass: null,
requiredInputs: [],
fallbackTasks: [],
}),
JSON.stringify({
taskId: "",
status: "blocked",
summary: "Waiting for real hardware.",
changes: [],
verification: [
{
command: "scripts/hardware-proof.sh",
outcome: "not_run",
details: "No accessible physical node was available.",
category: "proof",
artifacts: [],
metrics: {},
},
],
followUps: [],
touchedFiles: [],
blockers: ["No accessible physical node."],
blockerClass: "external_resource",
requiredInputs: ["Reachable physical node with ISO boot path"],
fallbackTasks: [],
}),
]);
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
const run = orchestrator.createRun("Prove the hardware path");
const finalRun = await orchestrator.runUntilStable(run.id, 5);
const snapshot = store.buildSnapshot(run.id);
expect(finalRun.status).toBe("waiting_input");
expect(snapshot.run.waitingInputs).toEqual(["Reachable physical node with ISO boot path"]);
expect(snapshot.blockedTasks).toHaveLength(1);
});
});