350 lines
12 KiB
TypeScript
350 lines
12 KiB
TypeScript
import { mkdtempSync, readFileSync } from "node:fs";
|
|
import { tmpdir } from "node:os";
|
|
import { DatabaseSync } from "node:sqlite";
|
|
import path from "node:path";
|
|
import { describe, expect, test, vi } from "vitest";
|
|
import { ArtifactManager } from "../src/artifacts.js";
|
|
import { loadConfig } from "../src/config.js";
|
|
import { AgentROrchestrator } from "../src/orchestrator.js";
|
|
import { strategyPlanSchema } from "../src/schema-catalog.js";
|
|
import { RunStore } from "../src/store.js";
|
|
import type { AgentInvocation, AgentInvocationResult, ProgressEvent, RawAgentRunner } from "../src/types.js";
|
|
|
|
class ScriptedRunner implements RawAgentRunner {
|
|
public readonly prompts: string[] = [];
|
|
public readonly progressEvents: ProgressEvent[] = [];
|
|
|
|
constructor(
|
|
private readonly outputs: string[],
|
|
) {}
|
|
|
|
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
|
|
const next = this.outputs.shift();
|
|
if (!next) {
|
|
throw new Error("No scripted output remaining.");
|
|
}
|
|
|
|
this.prompts.push(request.prompt);
|
|
readFileSync(request.artifacts.schemaPath, "utf8");
|
|
if (request.onProgress) {
|
|
const progressEvent: ProgressEvent = {
|
|
ts: new Date().toISOString(),
|
|
runId: request.runId,
|
|
source: request.role,
|
|
kind: "turn.started",
|
|
message: "Started scripted turn.",
|
|
payload: {},
|
|
};
|
|
this.progressEvents.push(progressEvent);
|
|
await request.onProgress(progressEvent);
|
|
}
|
|
const parsed = JSON.parse(next) as Record<string, unknown>;
|
|
if (typeof parsed.taskId === "string" && !parsed.taskId) {
|
|
const match = request.prompt.match(/exact task id `([^`]+)`/);
|
|
if (match) {
|
|
parsed.taskId = match[1];
|
|
}
|
|
}
|
|
return {
|
|
sessionId: request.sessionId ?? `session-${this.prompts.length}`,
|
|
output: parsed as T,
|
|
rawMessage: JSON.stringify(parsed),
|
|
rawEvents: ['{"type":"turn.completed"}'],
|
|
stderr: "",
|
|
artifacts: request.artifacts,
|
|
};
|
|
}
|
|
}
|
|
|
|
class DeferredRunner implements RawAgentRunner {
|
|
private pending:
|
|
| {
|
|
request: AgentInvocation<unknown>;
|
|
resolve: (value: AgentInvocationResult<unknown>) => void;
|
|
}
|
|
| null = null;
|
|
|
|
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
|
|
return await new Promise<AgentInvocationResult<T>>((resolve) => {
|
|
this.pending = {
|
|
request: request as AgentInvocation<unknown>,
|
|
resolve: resolve as (value: AgentInvocationResult<unknown>) => void,
|
|
};
|
|
});
|
|
}
|
|
|
|
complete(output: Record<string, unknown>): void {
|
|
if (!this.pending) {
|
|
throw new Error("No pending invocation to complete.");
|
|
}
|
|
|
|
const { request, resolve } = this.pending;
|
|
this.pending = null;
|
|
resolve({
|
|
sessionId: request.sessionId ?? "session-1",
|
|
output,
|
|
rawMessage: JSON.stringify(output),
|
|
rawEvents: ['{"type":"turn.completed"}'],
|
|
stderr: "",
|
|
artifacts: request.artifacts,
|
|
});
|
|
}
|
|
}
|
|
|
|
describe("AgentROrchestrator", () => {
|
|
test("runs through plan, implementation, self-check, and independent check", async () => {
|
|
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-orchestrator-"));
|
|
const config = loadConfig(repoPath);
|
|
const store = new RunStore(config.databasePath);
|
|
const artifacts = new ArtifactManager(config.runsDir);
|
|
const runner = new ScriptedRunner([
|
|
JSON.stringify({
|
|
decision: "continue",
|
|
summary: "Implement the CLI skeleton.",
|
|
rationale: "The project is empty and needs a first vertical slice.",
|
|
goalProgress: "No implementation exists yet.",
|
|
risks: [],
|
|
tasks: [
|
|
{
|
|
title: "Create CLI entrypoint",
|
|
objective: "Build the initial command surface.",
|
|
acceptanceCriteria: ["A run command exists."],
|
|
verificationSteps: ["Run the help command."],
|
|
allowedPaths: ["src", "package.json"],
|
|
runtimeClass: "short",
|
|
},
|
|
],
|
|
blockedReason: null,
|
|
blockerClass: null,
|
|
requiredInputs: [],
|
|
fallbackTasks: [],
|
|
}),
|
|
JSON.stringify({
|
|
taskId: "",
|
|
status: "completed",
|
|
summary: "Added the CLI skeleton.",
|
|
changes: ["Created an entrypoint."],
|
|
verification: [
|
|
{
|
|
command: "agent-r --help",
|
|
outcome: "passed",
|
|
details: "Help output rendered.",
|
|
category: "test",
|
|
artifacts: [],
|
|
metrics: [],
|
|
},
|
|
],
|
|
followUps: [],
|
|
touchedFiles: ["src/index.ts", "package.json"],
|
|
blockers: [],
|
|
blockerClass: null,
|
|
requiredInputs: [],
|
|
fallbackTasks: [],
|
|
}),
|
|
JSON.stringify({
|
|
decision: "done",
|
|
summary: "The requested slice is complete.",
|
|
rationale: "The only planned task is complete and verified.",
|
|
goalProgress: "The vertical slice exists.",
|
|
risks: [],
|
|
tasks: [],
|
|
blockedReason: null,
|
|
blockerClass: null,
|
|
requiredInputs: [],
|
|
fallbackTasks: [],
|
|
}),
|
|
JSON.stringify({
|
|
readyForIndependentCheck: true,
|
|
summary: "The run is ready for independent review.",
|
|
rationale: "Implementation and verification are present.",
|
|
evidence: ["CLI entrypoint exists."],
|
|
remainingGaps: [],
|
|
}),
|
|
JSON.stringify({
|
|
verdict: "approved",
|
|
summary: "The goal is satisfied.",
|
|
rationale: "The requested slice exists and is verified.",
|
|
evidence: ["CLI entrypoint present."],
|
|
remainingTasks: [],
|
|
}),
|
|
]);
|
|
|
|
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
|
const run = orchestrator.createRun("Create a CLI skeleton");
|
|
|
|
const firstStatus = await orchestrator.runUntilStable(run.id, 10);
|
|
const snapshot = store.buildSnapshot(run.id);
|
|
const db = new DatabaseSync(config.databasePath);
|
|
const checkpointRow = db
|
|
.prepare("SELECT payload_json FROM checkpoints WHERE run_id = ? ORDER BY rowid DESC LIMIT 1")
|
|
.get(run.id) as { payload_json: string };
|
|
const checkpoint = JSON.parse(checkpointRow.payload_json) as Record<string, unknown>;
|
|
|
|
expect(firstStatus.status).toBe("done");
|
|
expect(snapshot.completedTasks).toHaveLength(1);
|
|
expect(snapshot.approvals).toHaveLength(2);
|
|
expect(snapshot.pendingTasks).toHaveLength(0);
|
|
expect(runner.prompts.at(1)).toContain("You must echo the exact task id");
|
|
expect(snapshot.run.waitingInputs).toEqual([]);
|
|
expect(checkpoint.counts).toMatchObject({
|
|
completed: 1,
|
|
pending: 0,
|
|
});
|
|
expect(checkpoint).not.toHaveProperty("artifacts.0.path");
|
|
expect(checkpoint).not.toHaveProperty("recentEvents.0.payload");
|
|
});
|
|
|
|
test("strategy prompt explicitly permits read-only repository inspection", async () => {
|
|
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-strategy-"));
|
|
const config = loadConfig(repoPath);
|
|
const store = new RunStore(config.databasePath);
|
|
const artifacts = new ArtifactManager(config.runsDir);
|
|
const runner = new ScriptedRunner([
|
|
JSON.stringify({
|
|
decision: "blocked",
|
|
summary: "Cannot continue.",
|
|
rationale: "Test stop.",
|
|
goalProgress: "None.",
|
|
risks: [],
|
|
tasks: [],
|
|
blockedReason: "Stop immediately.",
|
|
blockerClass: "contradiction",
|
|
requiredInputs: [],
|
|
fallbackTasks: [],
|
|
}),
|
|
]);
|
|
|
|
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
|
const run = orchestrator.createRun("Inspect repo");
|
|
await orchestrator.runUntilStable(run.id, 1);
|
|
|
|
expect(runner.prompts[0]).toContain("You may inspect the repository directly");
|
|
expect(runner.prompts[0]).toContain("Do not edit files");
|
|
});
|
|
|
|
test("strategy plan schema requires blockedReason and accepts null", () => {
|
|
const valid = JSON.parse(
|
|
JSON.stringify({
|
|
decision: "continue",
|
|
summary: "Keep going.",
|
|
rationale: "Work remains.",
|
|
goalProgress: "Partial.",
|
|
risks: [],
|
|
tasks: [],
|
|
blockedReason: null,
|
|
blockerClass: null,
|
|
requiredInputs: [],
|
|
fallbackTasks: [],
|
|
}),
|
|
) as Record<string, unknown>;
|
|
|
|
expect(strategyPlanSchema.required).toContain("blockedReason");
|
|
expect(strategyPlanSchema.required).toContain("blockerClass");
|
|
expect(strategyPlanSchema.properties.blockedReason.type).toEqual(["string", "null"]);
|
|
expect(valid.blockedReason).toBeNull();
|
|
});
|
|
|
|
test("moves to waiting_input when implementation reports missing external input", async () => {
|
|
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-waiting-input-"));
|
|
const config = loadConfig(repoPath);
|
|
const store = new RunStore(config.databasePath);
|
|
const artifacts = new ArtifactManager(config.runsDir);
|
|
const runner = new ScriptedRunner([
|
|
JSON.stringify({
|
|
decision: "continue",
|
|
summary: "Need a hardware proof task.",
|
|
rationale: "Work remains.",
|
|
goalProgress: "Partial.",
|
|
risks: [],
|
|
tasks: [
|
|
{
|
|
title: "Run hardware proof",
|
|
objective: "Execute the physical node proof.",
|
|
acceptanceCriteria: ["Proof succeeds."],
|
|
verificationSteps: ["Run the hardware script."],
|
|
allowedPaths: ["scripts/**"],
|
|
runtimeClass: "long",
|
|
},
|
|
],
|
|
blockedReason: null,
|
|
blockerClass: null,
|
|
requiredInputs: [],
|
|
fallbackTasks: [],
|
|
}),
|
|
JSON.stringify({
|
|
taskId: "",
|
|
status: "blocked",
|
|
summary: "Waiting for real hardware.",
|
|
changes: [],
|
|
verification: [
|
|
{
|
|
command: "scripts/hardware-proof.sh",
|
|
outcome: "not_run",
|
|
details: "No accessible physical node was available.",
|
|
category: "proof",
|
|
artifacts: [],
|
|
metrics: [],
|
|
},
|
|
],
|
|
followUps: [],
|
|
touchedFiles: [],
|
|
blockers: ["No accessible physical node."],
|
|
blockerClass: "external_resource",
|
|
requiredInputs: ["Reachable physical node with ISO boot path"],
|
|
fallbackTasks: [],
|
|
}),
|
|
]);
|
|
|
|
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
|
const run = orchestrator.createRun("Prove the hardware path");
|
|
const finalRun = await orchestrator.runUntilStable(run.id, 5);
|
|
const snapshot = store.buildSnapshot(run.id);
|
|
|
|
expect(finalRun.status).toBe("waiting_input");
|
|
expect(snapshot.run.waitingInputs).toEqual(["Reachable physical node with ISO boot path"]);
|
|
expect(snapshot.blockedTasks).toHaveLength(1);
|
|
});
|
|
|
|
test("emits heartbeat only after five minutes of inactivity", async () => {
|
|
vi.useFakeTimers();
|
|
|
|
try {
|
|
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-heartbeat-"));
|
|
const config = loadConfig(repoPath);
|
|
const store = new RunStore(config.databasePath);
|
|
const artifacts = new ArtifactManager(config.runsDir);
|
|
const runner = new DeferredRunner();
|
|
const progressEvents: ProgressEvent[] = [];
|
|
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner, (event) => {
|
|
progressEvents.push(event);
|
|
});
|
|
|
|
const run = orchestrator.createRun("Wait on a long-running strategy turn");
|
|
const runPromise = orchestrator.runUntilStable(run.id, 1);
|
|
|
|
await vi.advanceTimersByTimeAsync(299_999);
|
|
expect(progressEvents.filter((event) => event.kind === "heartbeat")).toHaveLength(0);
|
|
|
|
await vi.advanceTimersByTimeAsync(1);
|
|
expect(progressEvents.filter((event) => event.kind === "heartbeat")).toHaveLength(1);
|
|
|
|
runner.complete({
|
|
decision: "blocked",
|
|
summary: "Still waiting.",
|
|
rationale: "Synthetic test stop.",
|
|
goalProgress: "None.",
|
|
risks: [],
|
|
tasks: [],
|
|
blockedReason: "Stop after heartbeat.",
|
|
blockerClass: "tool_failure",
|
|
requiredInputs: [],
|
|
fallbackTasks: [],
|
|
});
|
|
|
|
const finalRun = await runPromise;
|
|
expect(finalRun.status).toBe("blocked");
|
|
} finally {
|
|
vi.useRealTimers();
|
|
}
|
|
});
|
|
});
|