Initial agent-r orchestrator

This commit is contained in:
centra 2026-04-04 14:59:45 +09:00
commit a909a99310
Signed by: centra
GPG key ID: 0C09689D20B25ACA
17 changed files with 4530 additions and 0 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
node_modules
dist
.agent-r
coverage
*.log

51
README.md Normal file
View file

@ -0,0 +1,51 @@
# agent-r
`agent-r` is a local TypeScript CLI that orchestrates three Codex roles:
- `strategy`: read-only planning, task breakdown, self-check
- `implementation`: workspace-write execution for one task at a time
- `checker`: read-only independent completion review
The CLI persists state in SQLite plus run artifacts under `.agent-r/`, so a long-running effort can be resumed across invocations.
## Commands
```bash
npm install
npm run build
agent-r run "Build a plugin system" --repo /path/to/repo
agent-r resume <run-id> --repo /path/to/repo
agent-r status <run-id> --repo /path/to/repo
agent-r inspect <run-id> --repo /path/to/repo
agent-r logs <run-id> --repo /path/to/repo --agent strategy
```
## Config
Place `agent-r.config.toml` in the target repo if you want to override defaults.
```toml
[state]
dir = ".agent-r"
max_task_attempts = 3
max_cycles_per_run = 40
max_replans = 12
max_tasks = 200
session_refresh_turns = 20
[codex]
command = "codex"
[roles.strategy]
sandbox = "read-only"
search = true
[roles.implementation]
sandbox = "workspace-write"
search = false
[roles.checker]
sandbox = "read-only"
search = false
```

2038
package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

31
package.json Normal file
View file

@ -0,0 +1,31 @@
{
"name": "agent-r",
"version": "0.1.0",
"private": true,
"type": "module",
"description": "Autonomous multi-agent Codex orchestrator with strategy, implementation, and checking roles.",
"bin": {
"agent-r": "dist/index.js"
},
"engines": {
"node": ">=24.0.0"
},
"scripts": {
"build": "tsc -p tsconfig.json && chmod +x dist/index.js",
"dev": "NODE_OPTIONS=--disable-warning=ExperimentalWarning tsx src/index.ts",
"lint": "tsc -p tsconfig.json --noEmit",
"test": "NODE_OPTIONS=--disable-warning=ExperimentalWarning vitest run"
},
"dependencies": {
"@openai/codex-sdk": "^0.118.0",
"ajv": "^8.18.0",
"commander": "^14.0.3",
"smol-toml": "^1.6.1"
},
"devDependencies": {
"@types/node": "^24.7.2",
"tsx": "^4.21.0",
"typescript": "^5.9.3",
"vitest": "^4.1.2"
}
}

69
src/artifacts.ts Normal file
View file

@ -0,0 +1,69 @@
import path from "node:path";
import { ensureDir, safeSlug, timestampId, writeJsonFile, writeTextFile } from "./fs-utils.js";
import type { AgentRole, InvocationArtifacts, RunArtifactsIndex } from "./types.js";
export class ArtifactManager {
constructor(private readonly runsDir: string) {
ensureDir(this.runsDir);
}
getRunDir(runId: string): string {
const dir = path.join(this.runsDir, runId);
ensureDir(dir);
return dir;
}
getRoleDir(runId: string, role: AgentRole): string {
const dir = path.join(this.getRunDir(runId), role);
ensureDir(dir);
return dir;
}
createInvocationArtifacts(runId: string, role: AgentRole, schemaName: string): InvocationArtifacts {
const roleDir = this.getRoleDir(runId, role);
const baseName = `${timestampId()}-${safeSlug(schemaName)}`;
return {
promptPath: path.join(roleDir, `${baseName}.prompt.md`),
schemaPath: path.join(roleDir, `${baseName}.schema.json`),
rawEventsPath: path.join(roleDir, `${baseName}.events.jsonl`),
stderrPath: path.join(roleDir, `${baseName}.stderr.log`),
lastMessagePath: path.join(roleDir, `${baseName}.last.txt`),
responsePath: path.join(roleDir, `${baseName}.response.json`),
};
}
writePrompt(filePath: string, prompt: string): void {
writeTextFile(filePath, prompt);
}
writeSchema(filePath: string, schema: Record<string, unknown>): void {
writeJsonFile(filePath, schema);
}
writeRawEvents(filePath: string, rawEvents: string[]): void {
writeTextFile(filePath, rawEvents.join("\n"));
}
writeStderr(filePath: string, stderr: string): void {
writeTextFile(filePath, stderr);
}
writeLastMessage(filePath: string, message: string): void {
writeTextFile(filePath, message);
}
writeResponse(filePath: string, response: unknown): void {
writeJsonFile(filePath, response);
}
indexFromArtifacts(artifacts: InvocationArtifacts): RunArtifactsIndex {
return {
promptPath: artifacts.promptPath,
responsePath: artifacts.responsePath,
schemaPath: artifacts.schemaPath,
rawEventsPath: artifacts.rawEventsPath,
stderrPath: artifacts.stderrPath,
lastMessagePath: artifacts.lastMessagePath,
};
}
}

139
src/codex-runner.ts Normal file
View file

@ -0,0 +1,139 @@
import { Ajv } from "ajv";
import { Codex, type ThreadEvent, type ThreadOptions } from "@openai/codex-sdk";
import type { AgentInvocation, AgentInvocationResult, RawAgentRunner } from "./types.js";
function extractValidationError(message: string, errorText: string): string {
return [
"The previous response did not parse or validate.",
`Problem: ${errorText}`,
"Respond again with JSON only, matching the schema exactly.",
"Previous response:",
message,
].join("\n\n");
}
export class CodexSdkRunner implements RawAgentRunner {
private readonly ajv = new Ajv({ allErrors: true, strict: false });
private readonly codex: Codex;
constructor(codexCommand: string) {
this.codex = new Codex({
codexPathOverride: codexCommand,
});
}
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
let prompt = request.prompt;
let sessionId = request.sessionId;
const maxAttempts = request.maxValidationRetries ?? 2;
let lastFailure = "";
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const raw = await this.invokeOnce({
...request,
prompt,
sessionId,
});
sessionId = raw.sessionId;
try {
const parsed = JSON.parse(raw.rawMessage) as unknown;
const validate = this.ajv.compile(request.schema);
if (!validate(parsed)) {
const errorText = this.ajv.errorsText(validate.errors);
lastFailure = errorText;
if (attempt === maxAttempts) {
throw new Error(`Schema validation failed: ${errorText}`);
}
prompt = extractValidationError(raw.rawMessage, errorText);
continue;
}
return {
...raw,
output: parsed as T,
};
} catch (error) {
const errorText = error instanceof Error ? error.message : String(error);
lastFailure = errorText;
if (attempt === maxAttempts) {
throw new Error(`Structured output failed after ${attempt} attempt(s): ${lastFailure}`);
}
prompt = extractValidationError(raw.rawMessage, errorText);
}
}
throw new Error(`Structured output failed: ${lastFailure}`);
}
private async invokeOnce<T>(request: AgentInvocation<T>): Promise<Omit<AgentInvocationResult<T>, "output">> {
const threadOptions: ThreadOptions = {
sandboxMode: request.roleConfig.sandbox,
workingDirectory: request.cwd,
skipGitRepoCheck: request.roleConfig.skipGitRepoCheck,
approvalPolicy: "never",
networkAccessEnabled: request.roleConfig.search,
webSearchEnabled: request.roleConfig.search,
webSearchMode: request.roleConfig.search ? "live" : "disabled",
};
if (request.roleConfig.model) {
threadOptions.model = request.roleConfig.model;
}
const thread = request.sessionId
? this.codex.resumeThread(request.sessionId, threadOptions)
: this.codex.startThread(threadOptions);
const streamed = await thread.runStreamed(request.prompt, {
outputSchema: request.schema,
});
const rawEvents: string[] = [];
let sessionId = request.sessionId;
let rawMessage = "";
for await (const event of streamed.events) {
rawEvents.push(JSON.stringify(event));
sessionId = this.extractSessionId(sessionId, event);
rawMessage = this.extractLatestMessage(rawMessage, event);
this.throwIfFailed(event);
}
if (!rawMessage.trim()) {
throw new Error("Codex SDK completed without a final agent message.");
}
return {
sessionId: sessionId ?? thread.id,
rawMessage,
rawEvents,
stderr: "",
artifacts: request.artifacts,
};
}
private extractSessionId(current: string | null, event: ThreadEvent): string | null {
if (event.type === "thread.started") {
return event.thread_id;
}
return current;
}
private extractLatestMessage(current: string, event: ThreadEvent): string {
if (event.type === "item.completed" && event.item.type === "agent_message") {
return event.item.text;
}
return current;
}
private throwIfFailed(event: ThreadEvent): void {
if (event.type === "error") {
throw new Error(event.message);
}
if (event.type === "turn.failed") {
throw new Error(event.error.message);
}
}
}

138
src/config.ts Normal file
View file

@ -0,0 +1,138 @@
import { existsSync, readFileSync } from "node:fs";
import path from "node:path";
import { parse } from "smol-toml";
import type { AgentRole, ResolvedConfig, RoleConfig } from "./types.js";
interface ConfigRoleOverride {
model?: string;
sandbox?: RoleConfig["sandbox"];
search?: boolean;
skipGitRepoCheck?: boolean;
skip_git_repo_check?: boolean;
extraArgs?: string[];
extra_args?: string[];
}
interface ConfigFileShape {
state?: {
dir?: string;
max_task_attempts?: number;
max_cycles_per_run?: number;
max_replans?: number;
max_tasks?: number;
session_refresh_turns?: number;
};
codex?: {
command?: string;
};
roles?: Partial<Record<AgentRole, ConfigRoleOverride>>;
}
function defaultRoleConfig(role: AgentRole): RoleConfig {
if (role === "strategy") {
return {
sandbox: "read-only",
search: true,
skipGitRepoCheck: false,
extraArgs: [],
};
}
if (role === "checker") {
return {
sandbox: "read-only",
search: false,
skipGitRepoCheck: false,
extraArgs: [],
};
}
return {
sandbox: "workspace-write",
search: false,
skipGitRepoCheck: false,
extraArgs: [],
};
}
function mergeRoleConfig(role: AgentRole, override?: Partial<RoleConfig>): RoleConfig {
const base = defaultRoleConfig(role);
const merged: RoleConfig = {
sandbox: override?.sandbox ?? base.sandbox,
search: override?.search ?? base.search,
skipGitRepoCheck: override?.skipGitRepoCheck ?? base.skipGitRepoCheck,
extraArgs: override?.extraArgs ?? base.extraArgs,
};
const model = override?.model ?? base.model;
if (model) {
merged.model = model;
}
return merged;
}
export function loadConfig(repoPath: string, configPath?: string): ResolvedConfig {
const absoluteRepoPath = path.resolve(repoPath);
const effectiveConfigPath = configPath ? path.resolve(configPath) : path.join(absoluteRepoPath, "agent-r.config.toml");
let parsedConfig: ConfigFileShape = {};
if (existsSync(effectiveConfigPath)) {
parsedConfig = parse(readFileSync(effectiveConfigPath, "utf8")) as ConfigFileShape;
}
const stateDir = path.resolve(
absoluteRepoPath,
parsedConfig.state?.dir ?? ".agent-r",
);
return {
repoPath: absoluteRepoPath,
stateDir,
databasePath: path.join(stateDir, "state.sqlite"),
runsDir: path.join(stateDir, "runs"),
codexCommand: parsedConfig.codex?.command ?? "codex",
maxTaskAttempts: parsedConfig.state?.max_task_attempts ?? 3,
maxCyclesPerInvocation: parsedConfig.state?.max_cycles_per_run ?? 40,
maxReplans: parsedConfig.state?.max_replans ?? 12,
maxTasks: parsedConfig.state?.max_tasks ?? 200,
sessionRefreshTurns: parsedConfig.state?.session_refresh_turns ?? 20,
roles: {
strategy: mergeRoleConfig("strategy", normalizeRoleOverride(parsedConfig.roles?.strategy)),
implementation: mergeRoleConfig("implementation", normalizeRoleOverride(parsedConfig.roles?.implementation)),
checker: mergeRoleConfig("checker", normalizeRoleOverride(parsedConfig.roles?.checker)),
},
};
}
function normalizeRoleOverride(override?: ConfigRoleOverride): Partial<RoleConfig> | undefined {
if (!override) {
return undefined;
}
const normalized: Partial<RoleConfig> = {};
if (override.model) {
normalized.model = override.model;
}
if (override.sandbox) {
normalized.sandbox = override.sandbox;
}
if (override.search !== undefined) {
normalized.search = override.search;
}
const skipGitRepoCheck = override.skipGitRepoCheck ?? override.skip_git_repo_check;
if (skipGitRepoCheck !== undefined) {
normalized.skipGitRepoCheck = skipGitRepoCheck;
}
const extraArgs = override.extraArgs ?? override.extra_args;
if (extraArgs !== undefined) {
normalized.extraArgs = extraArgs;
}
return normalized;
}

42
src/fs-utils.ts Normal file
View file

@ -0,0 +1,42 @@
import { mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
import path from "node:path";
export function ensureDir(dirPath: string): void {
mkdirSync(dirPath, { recursive: true });
}
export function writeTextFile(filePath: string, text: string): void {
ensureDir(path.dirname(filePath));
writeFileSync(filePath, text, "utf8");
}
export function writeJsonFile(filePath: string, value: unknown): void {
writeTextFile(filePath, `${JSON.stringify(value, null, 2)}\n`);
}
export function readTextFile(filePath: string): string {
return readFileSync(filePath, "utf8");
}
export function safeSlug(value: string): string {
return value
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "")
.slice(0, 40) || "artifact";
}
export function timestampId(date = new Date()): string {
return date.toISOString().replace(/[:.]/g, "-");
}
export function listFilesRecursive(dirPath: string): string[] {
const entries = readdirSync(dirPath, { withFileTypes: true });
return entries.flatMap((entry) => {
const fullPath = path.join(dirPath, entry.name);
if (entry.isDirectory()) {
return listFilesRecursive(fullPath);
}
return [fullPath];
});
}

127
src/index.ts Normal file
View file

@ -0,0 +1,127 @@
#!/usr/bin/env -S node --disable-warning=ExperimentalWarning
import path from "node:path";
import { Command } from "commander";
import { ArtifactManager } from "./artifacts.js";
import { CodexSdkRunner } from "./codex-runner.js";
import { loadConfig } from "./config.js";
import { listFilesRecursive, readTextFile } from "./fs-utils.js";
import { AgentROrchestrator } from "./orchestrator.js";
import { RunStore } from "./store.js";
import type { AgentRole } from "./types.js";
function resolveRepoPath(repo?: string): string {
return path.resolve(repo ?? process.cwd());
}
function bootstrap(repo?: string, configPath?: string) {
const repoPath = resolveRepoPath(repo);
const config = loadConfig(repoPath, configPath);
const store = new RunStore(config.databasePath);
const artifacts = new ArtifactManager(config.runsDir);
const runner = new CodexSdkRunner(config.codexCommand);
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
return { repoPath, config, store, artifacts, orchestrator };
}
function printRunStatus(label: string, run: {
id: string;
status: string;
summary: string | null;
goal: string;
cycleCount: number;
replanCount: number;
currentTaskId: string | null;
}): void {
console.log(`${label}: ${run.id}`);
console.log(`status: ${run.status}`);
console.log(`goal: ${run.goal}`);
console.log(`summary: ${run.summary ?? "none"}`);
console.log(`cycles: ${run.cycleCount}`);
console.log(`replans: ${run.replanCount}`);
console.log(`current task: ${run.currentTaskId ?? "none"}`);
}
const program = new Command();
program.name("agent-r").description("Autonomous Codex multi-agent orchestrator.");
program
.command("run")
.argument("<goal>", "high-level development goal")
.option("--repo <path>", "target repository path")
.option("--config <path>", "path to agent-r config file")
.option("--max-cycles <count>", "override max cycles for this invocation", Number)
.action(async (goal: string, options: { repo?: string; config?: string; maxCycles?: number }) => {
const { orchestrator, store } = bootstrap(options.repo, options.config);
const run = orchestrator.createRun(goal);
const finalRun = await orchestrator.runUntilStable(run.id, options.maxCycles);
printRunStatus("run", finalRun);
console.log(`run id: ${run.id}`);
console.log(`pending tasks: ${store.buildSnapshot(run.id).pendingTasks.length}`);
});
program
.command("resume")
.argument("<runId>", "run id")
.option("--repo <path>", "target repository path")
.option("--config <path>", "path to agent-r config file")
.option("--max-cycles <count>", "override max cycles for this invocation", Number)
.action(async (runId: string, options: { repo?: string; config?: string; maxCycles?: number }) => {
const { orchestrator } = bootstrap(options.repo, options.config);
const finalRun = await orchestrator.runUntilStable(runId, options.maxCycles);
printRunStatus("run", finalRun);
});
program
.command("status")
.argument("<runId>", "run id")
.option("--repo <path>", "target repository path")
.option("--config <path>", "path to agent-r config file")
.action((runId: string, options: { repo?: string; config?: string }) => {
const { store } = bootstrap(options.repo, options.config);
const snapshot = store.buildSnapshot(runId);
printRunStatus("run", snapshot.run);
console.log(`pending: ${snapshot.pendingTasks.length}`);
console.log(`completed: ${snapshot.completedTasks.length}`);
console.log(`blocked/abandoned: ${snapshot.blockedTasks.length}`);
});
program
.command("inspect")
.argument("<runId>", "run id")
.option("--repo <path>", "target repository path")
.option("--config <path>", "path to agent-r config file")
.action((runId: string, options: { repo?: string; config?: string }) => {
const { store } = bootstrap(options.repo, options.config);
console.log(JSON.stringify(store.buildSnapshot(runId), null, 2));
});
program
.command("logs")
.argument("<runId>", "run id")
.option("--repo <path>", "target repository path")
.option("--config <path>", "path to agent-r config file")
.option("--agent <role>", "limit to a single role")
.action((runId: string, options: { repo?: string; config?: string; agent?: AgentRole }) => {
const { artifacts, store } = bootstrap(options.repo, options.config);
const snapshot = store.buildSnapshot(runId);
const artifactFiles = options.agent
? store.listArtifacts(runId, options.agent).map((artifact) => artifact.path)
: listFilesRecursive(artifacts.getRunDir(runId));
console.log(`run: ${runId}`);
console.log(`status: ${snapshot.run.status}`);
console.log("");
for (const filePath of artifactFiles) {
console.log(`# ${filePath}`);
console.log(readTextFile(filePath));
console.log("");
}
});
program.parseAsync(process.argv).catch((error: unknown) => {
const message = error instanceof Error ? error.stack ?? error.message : String(error);
console.error(message);
process.exitCode = 1;
});

423
src/orchestrator.ts Normal file
View file

@ -0,0 +1,423 @@
import type {
AgentRole,
AgentStateRecord,
CheckVerdictOutput,
ImplementationResultOutput,
RawAgentRunner,
ResolvedConfig,
RunRecord,
RunSnapshot,
StrategyPlanOutput,
StrategySelfCheckOutput,
TaskDraft,
TaskRecord,
} from "./types.js";
import { ArtifactManager } from "./artifacts.js";
import {
buildCheckPrompt,
buildImplementationPrompt,
buildStrategyPlanPrompt,
buildStrategySelfCheckPrompt,
formatCheckVerdictSummary,
formatImplementationSummary,
formatSelfCheckSummary,
formatStrategyPlanSummary,
} from "./prompts.js";
import { checkVerdictSchema, implementationResultSchema, strategyPlanSchema, strategySelfCheckSchema } from "./schema-catalog.js";
import { RunStore } from "./store.js";
function nowIso(): string {
return new Date().toISOString();
}
function blockerSignature(blockers: string[]): string | null {
const compact = blockers.map((value) => value.trim().toLowerCase()).filter(Boolean).sort().join("|");
return compact || null;
}
function uniqueTaskDrafts(tasks: TaskDraft[]): TaskDraft[] {
const seen = new Set<string>();
const result: TaskDraft[] = [];
for (const task of tasks) {
const key = JSON.stringify(task);
if (seen.has(key)) {
continue;
}
seen.add(key);
result.push(task);
}
return result;
}
export class AgentROrchestrator {
constructor(
private readonly config: ResolvedConfig,
private readonly store: RunStore,
private readonly artifacts: ArtifactManager,
private readonly runner: RawAgentRunner,
) {}
createRun(goal: string): RunRecord {
return this.store.createRun(goal, this.config.repoPath);
}
async runUntilStable(runId: string, maxCycles = this.config.maxCyclesPerInvocation): Promise<RunRecord> {
this.store.requeueInProgressTasks(runId);
for (let cycle = 0; cycle < maxCycles; cycle += 1) {
const snapshot = this.store.buildSnapshot(runId);
if (snapshot.run.status === "done" || snapshot.run.status === "blocked") {
return snapshot.run;
}
this.store.incrementCycle(runId);
switch (snapshot.run.status) {
case "planning":
if (snapshot.pendingTasks.length > 0) {
await this.dispatchNextTask(runId);
} else {
await this.runStrategyPlan(runId);
}
break;
case "implementing":
await this.dispatchNextTask(runId);
break;
case "self_check":
await this.runStrategySelfCheck(runId);
break;
case "independent_check":
await this.runIndependentCheck(runId);
break;
default:
throw new Error(`Unsupported run state: ${snapshot.run.status}`);
}
}
this.store.addEvent(runId, "system", "cycle_budget_exhausted", "Reached max cycles for this invocation.", { maxCycles });
return this.store.getRun(runId);
}
private async runStrategyPlan(runId: string): Promise<void> {
const snapshot = this.store.buildSnapshot(runId);
const response = await this.invokeRole<StrategyPlanOutput>({
runId,
role: "strategy",
schemaName: "strategy-plan",
prompt: buildStrategyPlanPrompt(snapshot),
schema: strategyPlanSchema,
});
const plan = response.output;
this.store.addEvent(runId, "strategy", "strategy_plan", formatStrategyPlanSummary(plan), plan);
this.store.updateRun(runId, { summary: plan.summary, currentTaskId: null });
this.store.incrementReplanCount(runId);
if (this.store.getRun(runId).replanCount > this.config.maxReplans) {
this.store.updateRun(runId, {
status: "blocked",
summary: `Exceeded replan limit (${this.config.maxReplans}).`,
currentTaskId: null,
});
this.store.addEvent(runId, "system", "blocked", "Run blocked because the replan limit was exceeded.", {});
return;
}
if (plan.decision === "blocked") {
this.store.updateRun(runId, {
status: "blocked",
summary: plan.blockedReason ?? plan.summary,
currentTaskId: null,
});
this.store.addEvent(runId, "strategy", "blocked", "Strategy declared the run blocked.", plan);
this.checkpoint(runId);
return;
}
if (plan.decision === "done") {
this.store.updateRun(runId, {
status: "self_check",
summary: plan.summary,
currentTaskId: null,
});
this.checkpoint(runId);
return;
}
const tasks = uniqueTaskDrafts(plan.tasks).slice(0, this.config.maxTasks);
if (!tasks.length) {
this.store.updateRun(runId, {
status: "blocked",
summary: "Strategy returned continue without any tasks.",
currentTaskId: null,
});
this.store.addEvent(runId, "system", "blocked", "Run blocked because no tasks were returned.", plan);
this.checkpoint(runId);
return;
}
this.store.replacePendingTasks(runId, tasks);
this.store.updateRun(runId, {
status: "planning",
summary: plan.summary,
currentTaskId: null,
});
this.checkpoint(runId);
}
private async dispatchNextTask(runId: string): Promise<void> {
const claimedTask = this.store.claimNextPendingTask(runId);
if (!claimedTask) {
this.store.updateRun(runId, { status: "planning", currentTaskId: null });
return;
}
this.store.updateRun(runId, {
status: "implementing",
currentTaskId: claimedTask.id,
summary: `Implementing ${claimedTask.title}`,
});
this.store.addEvent(runId, "system", "task_dispatched", `Dispatched task ${claimedTask.id}.`, { taskId: claimedTask.id });
const snapshot = this.store.buildSnapshot(runId);
const response = await this.invokeRole<ImplementationResultOutput>({
runId,
role: "implementation",
schemaName: "implementation-result",
prompt: buildImplementationPrompt(snapshot, claimedTask),
schema: implementationResultSchema,
});
const result = response.output;
if (result.taskId !== claimedTask.id) {
throw new Error(`Implementation agent returned mismatched task id: expected ${claimedTask.id}, got ${result.taskId}`);
}
const signature = blockerSignature(result.blockers);
const previousAttempt = this.store.latestTaskAttempt(claimedTask.id);
const latestTask = this.store.getTask(claimedTask.id);
this.store.addTaskAttempt(
runId,
claimedTask.id,
latestTask.attemptCount,
result.status,
result.summary,
result,
signature,
);
this.store.addEvent(runId, "implementation", "implementation_result", formatImplementationSummary(result), result);
if (result.status === "completed") {
this.store.completeTask(claimedTask.id, result.summary);
this.store.updateRun(runId, {
status: "planning",
currentTaskId: null,
summary: result.summary,
});
this.checkpoint(runId);
return;
}
const repeatedBlocker =
previousAttempt &&
previousAttempt.blockerSignature &&
previousAttempt.blockerSignature === signature;
if (result.status === "blocked") {
this.store.blockTask(claimedTask.id, result.summary, signature);
if (repeatedBlocker) {
this.store.updateRun(runId, {
status: "blocked",
currentTaskId: null,
summary: `Repeated blocker for task ${claimedTask.title}.`,
});
this.store.addEvent(runId, "system", "blocked", "Run blocked because the same blocker repeated.", {
taskId: claimedTask.id,
blockerSignature: signature,
});
this.checkpoint(runId);
return;
}
this.store.updateRun(runId, {
status: "planning",
currentTaskId: null,
summary: result.summary,
});
this.checkpoint(runId);
return;
}
if (latestTask.attemptCount >= this.config.maxTaskAttempts) {
this.store.blockTask(claimedTask.id, result.summary, signature);
this.store.updateRun(runId, {
status: "blocked",
currentTaskId: null,
summary: `Task ${claimedTask.title} exceeded the retry limit.`,
});
this.store.addEvent(runId, "system", "blocked", "Run blocked because task retry limit was exceeded.", {
taskId: claimedTask.id,
attempts: latestTask.attemptCount,
});
this.checkpoint(runId);
return;
}
this.store.abandonTask(claimedTask.id, result.summary, signature);
this.store.updateRun(runId, {
status: "planning",
currentTaskId: null,
summary: result.summary,
});
this.checkpoint(runId);
}
private async runStrategySelfCheck(runId: string): Promise<void> {
const snapshot = this.store.buildSnapshot(runId);
const lastPlan = snapshot.recentEvents.findLast((event) => event.kind === "strategy_plan")?.payload as
| StrategyPlanOutput
| undefined;
if (!lastPlan) {
this.store.updateRun(runId, { status: "planning" });
return;
}
const response = await this.invokeRole<StrategySelfCheckOutput>({
runId,
role: "strategy",
schemaName: "strategy-self-check",
prompt: buildStrategySelfCheckPrompt(snapshot, lastPlan),
schema: strategySelfCheckSchema,
});
const selfCheck = response.output;
this.store.addApproval(
runId,
"strategy_self_check",
selfCheck.readyForIndependentCheck ? "approved" : "rejected",
selfCheck.rationale,
selfCheck,
);
this.store.addEvent(runId, "strategy", "strategy_self_check", formatSelfCheckSummary(selfCheck), selfCheck);
this.store.updateRun(runId, {
status: selfCheck.readyForIndependentCheck ? "independent_check" : "planning",
summary: selfCheck.summary,
currentTaskId: null,
});
this.checkpoint(runId);
}
private async runIndependentCheck(runId: string): Promise<void> {
const snapshot = this.store.buildSnapshot(runId);
const latestSelfCheck = this.store.latestApproval(runId, "strategy_self_check")?.payload as
| StrategySelfCheckOutput
| undefined;
if (!latestSelfCheck) {
this.store.updateRun(runId, { status: "planning" });
return;
}
const response = await this.invokeRole<CheckVerdictOutput>({
runId,
role: "checker",
schemaName: "independent-check",
prompt: buildCheckPrompt(snapshot, latestSelfCheck),
schema: checkVerdictSchema,
});
const verdict = response.output;
this.store.addApproval(
runId,
"checker",
verdict.verdict === "approved" ? "approved" : "rejected",
verdict.rationale,
verdict,
);
this.store.addEvent(runId, "checker", "check_verdict", formatCheckVerdictSummary(verdict), verdict);
if (verdict.verdict === "approved") {
this.store.updateRun(runId, {
status: "done",
summary: verdict.summary,
currentTaskId: null,
});
this.checkpoint(runId);
return;
}
this.store.updateRun(runId, {
status: "planning",
summary: verdict.summary,
currentTaskId: null,
});
this.checkpoint(runId);
}
private async invokeRole<T>(options: {
runId: string;
role: AgentRole;
schemaName: string;
prompt: string;
schema: Record<string, unknown>;
}): Promise<{ output: T }> {
const state = this.store.getAgentState(options.runId, options.role);
const nextSession = this.shouldRotate(state) ? null : state?.sessionId ?? null;
const prompt = this.shouldRotate(state)
? `Session refresh for role ${options.role}. Continue the run using this compressed state.\n\n${options.prompt}`
: options.prompt;
const artifacts = this.artifacts.createInvocationArtifacts(options.runId, options.role, options.schemaName);
this.artifacts.writePrompt(artifacts.promptPath, prompt);
this.artifacts.writeSchema(artifacts.schemaPath, options.schema);
const result = await this.runner.invoke<T>({
runId: options.runId,
role: options.role,
sessionId: nextSession,
prompt,
schemaName: options.schemaName,
schema: options.schema,
cwd: this.config.repoPath,
roleConfig: this.config.roles[options.role],
artifacts,
maxValidationRetries: 2,
});
this.artifacts.writeRawEvents(artifacts.rawEventsPath, result.rawEvents);
this.artifacts.writeStderr(artifacts.stderrPath, result.stderr);
this.artifacts.writeLastMessage(artifacts.lastMessagePath, result.rawMessage);
this.artifacts.writeResponse(artifacts.responsePath, result.output);
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.prompt`, artifacts.promptPath, {});
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.schema`, artifacts.schemaPath, {});
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.events`, artifacts.rawEventsPath, {});
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.stderr`, artifacts.stderrPath, {});
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.last`, artifacts.lastMessagePath, {});
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.response`, artifacts.responsePath, {});
const nextState: AgentStateRecord = {
runId: options.runId,
role: options.role,
sessionId: result.sessionId,
turns: (this.shouldRotate(state) ? 0 : state?.turns ?? 0) + 1,
rotationCount: state ? state.rotationCount + (this.shouldRotate(state) ? 1 : 0) : 0,
lastPromptPath: artifacts.promptPath,
lastResponsePath: artifacts.responsePath,
updatedAt: nowIso(),
};
this.store.saveAgentState(nextState);
return { output: result.output };
}
private shouldRotate(state: AgentStateRecord | null): boolean {
return Boolean(state?.sessionId && state.turns >= this.config.sessionRefreshTurns);
}
private checkpoint(runId: string): void {
const snapshot = this.store.buildSnapshot(runId);
this.store.addCheckpoint(runId, snapshot.run.status, snapshot);
}
}

250
src/prompts.ts Normal file
View file

@ -0,0 +1,250 @@
import type {
CheckVerdictOutput,
RunSnapshot,
StrategyPlanOutput,
StrategySelfCheckOutput,
TaskDraft,
TaskRecord,
} from "./types.js";
function bulletList(values: string[]): string {
return values.length ? values.map((value) => `- ${value}`).join("\n") : "- none";
}
function renderTask(task: TaskDraft | TaskRecord): string {
return [
`Title: ${task.title}`,
`Objective: ${task.objective}`,
`Acceptance:`,
bulletList(task.acceptanceCriteria),
`Verification:`,
bulletList(task.verificationSteps),
`Allowed paths:`,
bulletList(task.allowedPaths),
].join("\n");
}
function renderTasks(tasks: TaskDraft[] | TaskRecord[]): string {
return tasks.length
? tasks.map((task, index) => `Task ${index + 1}\n${renderTask(task)}`).join("\n\n")
: "No tasks.";
}
function renderApprovals(snapshot: RunSnapshot): string {
if (!snapshot.approvals.length) {
return "No approvals yet.";
}
return snapshot.approvals
.map(
(approval) =>
`- ${approval.createdAt} ${approval.source} ${approval.verdict}: ${approval.rationale}`,
)
.join("\n");
}
function renderRecentAttempts(snapshot: RunSnapshot): string {
if (!snapshot.recentAttempts.length) {
return "No implementation attempts yet.";
}
return snapshot.recentAttempts
.map((attempt) => {
const payload = JSON.parse(attempt.resultJson) as { changes?: string[]; followUps?: string[]; blockers?: string[] };
return [
`- Task ${attempt.taskId} attempt ${attempt.attemptNumber}: ${attempt.status}`,
` Summary: ${attempt.summary}`,
` Changes: ${(payload.changes ?? []).join("; ") || "none"}`,
` Follow-ups: ${(payload.followUps ?? []).join("; ") || "none"}`,
` Blockers: ${(payload.blockers ?? []).join("; ") || "none"}`,
].join("\n");
})
.join("\n");
}
function renderEvents(snapshot: RunSnapshot): string {
if (!snapshot.recentEvents.length) {
return "No events yet.";
}
return snapshot.recentEvents
.map((event) => `- ${event.ts} ${event.kind}: ${event.message}`)
.join("\n");
}
function renderRunState(snapshot: RunSnapshot): string {
return [
`Run ID: ${snapshot.run.id}`,
`Goal: ${snapshot.run.goal}`,
`Status: ${snapshot.run.status}`,
`Repo: ${snapshot.run.repoPath}`,
`Cycles executed: ${snapshot.run.cycleCount}`,
`Replans: ${snapshot.run.replanCount}`,
`Current task: ${snapshot.run.currentTaskId ?? "none"}`,
].join("\n");
}
export function buildStrategyPlanPrompt(snapshot: RunSnapshot): string {
const checkerApproval = snapshot.approvals.findLast((approval) => approval.source === "checker");
return `
You are the strategy agent for an autonomous development system.
Role rules:
- You may inspect the repository directly, but treat that as research only.
- Do not edit files and do not implement tasks yourself.
- Break work into concrete tasks for the implementation agent.
- Lean on the provided summary first. Read more of the repo only when it materially improves strategy.
- Only declare the project done when the user goal is actually satisfied, with evidence from completed work.
Return JSON that matches the provided schema exactly.
Current run state:
${renderRunState(snapshot)}
Pending tasks:
${renderTasks(snapshot.pendingTasks)}
Completed tasks:
${renderTasks(snapshot.completedTasks)}
Blocked or abandoned tasks:
${renderTasks(snapshot.blockedTasks)}
Recent implementation attempts:
${renderRecentAttempts(snapshot)}
Recent events:
${renderEvents(snapshot)}
Latest approvals:
${renderApprovals(snapshot)}
Latest checker verdict:
${checkerApproval ? JSON.stringify(checkerApproval.payload, null, 2) : "No checker verdict yet."}
Decision policy:
- Use \`decision: "continue"\` when there is meaningful work left. Provide the complete next backlog.
- Use \`decision: "done"\` only when no substantial work remains.
- Use \`decision: "blocked"\` only for external blockers or unresolved contradictions.
- Keep the backlog concise. Prefer a few substantive tasks over many trivial tasks.
`.trim();
}
export function buildImplementationPrompt(snapshot: RunSnapshot, task: TaskRecord): string {
return `
You are the implementation agent for an autonomous development system.
Role rules:
- You are responsible for executing exactly one task.
- You may modify files in the repository.
- Stay within the task's allowed paths unless the task itself truly requires broader changes, and call that out if it happens.
- Run relevant verification when possible.
- If you cannot complete the task, return \`needs_replan\` or \`blocked\` with concrete blockers.
Return JSON that matches the provided schema exactly.
Overall goal:
${snapshot.run.goal}
Task:
${renderTask(task)}
Recent completed tasks:
${renderTasks(snapshot.completedTasks.slice(-5))}
Recent blocked tasks:
${renderTasks(snapshot.blockedTasks.slice(-5))}
Recent approvals:
${renderApprovals(snapshot)}
You must echo the exact task id \`${task.id}\`.
`.trim();
}
export function buildStrategySelfCheckPrompt(
snapshot: RunSnapshot,
lastPlan: StrategyPlanOutput,
): string {
return `
You are the strategy agent performing a self-check before independent review.
Role rules:
- Re-evaluate the original goal against the repository and the execution history.
- You may inspect the repo, but you still must not edit files.
- Be strict. If meaningful work remains, do not send the run to the checker.
Return JSON that matches the provided schema exactly.
Original goal:
${snapshot.run.goal}
Latest strategy decision:
${JSON.stringify(lastPlan, null, 2)}
Completed tasks:
${renderTasks(snapshot.completedTasks)}
Blocked or abandoned tasks:
${renderTasks(snapshot.blockedTasks)}
Recent implementation attempts:
${renderRecentAttempts(snapshot)}
`.trim();
}
export function buildCheckPrompt(
snapshot: RunSnapshot,
selfCheck: StrategySelfCheckOutput,
): string {
return `
You are the independent check agent for an autonomous development system.
Role rules:
- You are independent from the strategy agent.
- You may inspect the repository directly.
- You must not edit files.
- Evaluate whether the original user goal is actually complete.
- Reject completion when there are still meaningful tasks, risks, or missing evidence.
Return JSON that matches the provided schema exactly.
Original goal:
${snapshot.run.goal}
Current run state:
${renderRunState(snapshot)}
Strategy self-check:
${JSON.stringify(selfCheck, null, 2)}
Completed tasks:
${renderTasks(snapshot.completedTasks)}
Blocked or abandoned tasks:
${renderTasks(snapshot.blockedTasks)}
Recent implementation attempts:
${renderRecentAttempts(snapshot)}
`.trim();
}
export function formatStrategyPlanSummary(plan: StrategyPlanOutput): string {
return `${plan.decision}: ${plan.summary}`;
}
export function formatImplementationSummary(result: {
status: string;
summary: string;
touchedFiles: string[];
}): string {
return `${result.status}: ${result.summary} | touched: ${result.touchedFiles.join(", ") || "none"}`;
}
export function formatSelfCheckSummary(result: StrategySelfCheckOutput): string {
return `${result.readyForIndependentCheck ? "ready" : "not ready"}: ${result.summary}`;
}
export function formatCheckVerdictSummary(result: CheckVerdictOutput): string {
return `${result.verdict}: ${result.summary}`;
}

141
src/schema-catalog.ts Normal file
View file

@ -0,0 +1,141 @@
export const taskDraftSchema = {
type: "object",
additionalProperties: false,
required: ["title", "objective", "acceptanceCriteria", "verificationSteps", "allowedPaths"],
properties: {
title: { type: "string", minLength: 1 },
objective: { type: "string", minLength: 1 },
acceptanceCriteria: {
type: "array",
items: { type: "string", minLength: 1 },
},
verificationSteps: {
type: "array",
items: { type: "string", minLength: 1 },
},
allowedPaths: {
type: "array",
items: { type: "string", minLength: 1 },
},
},
} as const;
export const strategyPlanSchema = {
type: "object",
additionalProperties: false,
required: ["decision", "summary", "rationale", "goalProgress", "risks", "tasks"],
properties: {
decision: {
type: "string",
enum: ["continue", "done", "blocked"],
},
summary: { type: "string", minLength: 1 },
rationale: { type: "string", minLength: 1 },
goalProgress: { type: "string", minLength: 1 },
risks: {
type: "array",
items: { type: "string", minLength: 1 },
},
tasks: {
type: "array",
items: taskDraftSchema,
},
blockedReason: { type: "string" },
},
} as const;
export const strategySelfCheckSchema = {
type: "object",
additionalProperties: false,
required: ["readyForIndependentCheck", "summary", "rationale", "evidence", "remainingGaps"],
properties: {
readyForIndependentCheck: { type: "boolean" },
summary: { type: "string", minLength: 1 },
rationale: { type: "string", minLength: 1 },
evidence: {
type: "array",
items: { type: "string", minLength: 1 },
},
remainingGaps: {
type: "array",
items: { type: "string", minLength: 1 },
},
},
} as const;
export const implementationResultSchema = {
type: "object",
additionalProperties: false,
required: [
"taskId",
"status",
"summary",
"changes",
"verification",
"followUps",
"touchedFiles",
"blockers",
],
properties: {
taskId: { type: "string", minLength: 1 },
status: {
type: "string",
enum: ["completed", "needs_replan", "blocked"],
},
summary: { type: "string", minLength: 1 },
changes: {
type: "array",
items: { type: "string", minLength: 1 },
},
verification: {
type: "array",
items: {
type: "object",
additionalProperties: false,
required: ["command", "outcome", "details"],
properties: {
command: { type: "string", minLength: 1 },
outcome: {
type: "string",
enum: ["passed", "failed", "not_run"],
},
details: { type: "string", minLength: 1 },
},
},
},
followUps: {
type: "array",
items: { type: "string", minLength: 1 },
},
touchedFiles: {
type: "array",
items: { type: "string", minLength: 1 },
},
blockers: {
type: "array",
items: { type: "string", minLength: 1 },
},
},
} as const;
export const checkVerdictSchema = {
type: "object",
additionalProperties: false,
required: ["verdict", "summary", "rationale", "evidence", "remainingTasks"],
properties: {
verdict: {
type: "string",
enum: ["approved", "rejected"],
},
summary: { type: "string", minLength: 1 },
rationale: { type: "string", minLength: 1 },
evidence: {
type: "array",
items: { type: "string", minLength: 1 },
},
remainingTasks: {
type: "array",
items: taskDraftSchema,
},
},
} as const;

647
src/store.ts Normal file
View file

@ -0,0 +1,647 @@
import { randomUUID } from "node:crypto";
import { DatabaseSync } from "node:sqlite";
import path from "node:path";
import { ensureDir } from "./fs-utils.js";
import type {
AgentRole,
AgentStateRecord,
ApprovalRecord,
ApprovalSource,
ApprovalVerdict,
ArtifactRecord,
AttemptStatus,
CheckpointRecord,
EventRecord,
RunRecord,
RunSnapshot,
RunStatus,
TaskAttemptRecord,
TaskDraft,
TaskRecord,
TaskStatus,
} from "./types.js";
function nowIso(): string {
return new Date().toISOString();
}
function parseJson<T>(value: string | null): T {
if (!value) {
return [] as T;
}
return JSON.parse(value) as T;
}
function toJson(value: unknown): string {
return JSON.stringify(value);
}
function mapTaskRow(row: Record<string, unknown>): TaskRecord {
return {
id: String(row.id),
runId: String(row.run_id),
title: String(row.title),
objective: String(row.objective),
acceptanceCriteria: parseJson<string[]>(String(row.acceptance_criteria_json)),
verificationSteps: parseJson<string[]>(String(row.verification_steps_json)),
allowedPaths: parseJson<string[]>(String(row.allowed_paths_json)),
status: row.status as TaskStatus,
attemptCount: Number(row.attempt_count),
implementationSummary: row.implementation_summary ? String(row.implementation_summary) : null,
blockerSignature: row.blocker_signature ? String(row.blocker_signature) : null,
createdAt: String(row.created_at),
updatedAt: String(row.updated_at),
};
}
function mapAttemptRow(row: Record<string, unknown>): TaskAttemptRecord {
return {
id: String(row.id),
runId: String(row.run_id),
taskId: String(row.task_id),
attemptNumber: Number(row.attempt_number),
status: row.status as AttemptStatus,
summary: String(row.summary),
blockerSignature: row.blocker_signature ? String(row.blocker_signature) : null,
resultJson: String(row.result_json),
createdAt: String(row.created_at),
};
}
function mapRunRow(row: Record<string, unknown>): RunRecord {
return {
id: String(row.id),
goal: String(row.goal),
repoPath: String(row.repo_path),
status: row.status as RunStatus,
summary: row.summary ? String(row.summary) : null,
cycleCount: Number(row.cycle_count),
replanCount: Number(row.replan_count),
currentTaskId: row.current_task_id ? String(row.current_task_id) : null,
createdAt: String(row.created_at),
updatedAt: String(row.updated_at),
};
}
function mapAgentRow(row: Record<string, unknown>): AgentStateRecord {
return {
runId: String(row.run_id),
role: row.role as AgentRole,
sessionId: row.session_id ? String(row.session_id) : null,
turns: Number(row.turns),
rotationCount: Number(row.rotation_count),
lastPromptPath: row.last_prompt_path ? String(row.last_prompt_path) : null,
lastResponsePath: row.last_response_path ? String(row.last_response_path) : null,
updatedAt: String(row.updated_at),
};
}
export class RunStore {
private readonly db: DatabaseSync;
constructor(dbPath: string) {
ensureDir(path.dirname(dbPath));
this.db = new DatabaseSync(dbPath);
this.db.exec("PRAGMA journal_mode = WAL;");
this.db.exec(`
CREATE TABLE IF NOT EXISTS runs (
id TEXT PRIMARY KEY,
goal TEXT NOT NULL,
repo_path TEXT NOT NULL,
status TEXT NOT NULL,
summary TEXT,
cycle_count INTEGER NOT NULL DEFAULT 0,
replan_count INTEGER NOT NULL DEFAULT 0,
current_task_id TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS agents (
run_id TEXT NOT NULL,
role TEXT NOT NULL,
session_id TEXT,
turns INTEGER NOT NULL DEFAULT 0,
rotation_count INTEGER NOT NULL DEFAULT 0,
last_prompt_path TEXT,
last_response_path TEXT,
updated_at TEXT NOT NULL,
PRIMARY KEY (run_id, role)
);
CREATE TABLE IF NOT EXISTS tasks (
id TEXT PRIMARY KEY,
run_id TEXT NOT NULL,
title TEXT NOT NULL,
objective TEXT NOT NULL,
acceptance_criteria_json TEXT NOT NULL,
verification_steps_json TEXT NOT NULL,
allowed_paths_json TEXT NOT NULL,
status TEXT NOT NULL,
attempt_count INTEGER NOT NULL DEFAULT 0,
implementation_summary TEXT,
blocker_signature TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS task_attempts (
id TEXT PRIMARY KEY,
run_id TEXT NOT NULL,
task_id TEXT NOT NULL,
attempt_number INTEGER NOT NULL,
status TEXT NOT NULL,
summary TEXT NOT NULL,
blocker_signature TEXT,
result_json TEXT NOT NULL,
created_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id TEXT NOT NULL,
ts TEXT NOT NULL,
source TEXT NOT NULL,
kind TEXT NOT NULL,
message TEXT NOT NULL,
payload_json TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS artifacts (
id TEXT PRIMARY KEY,
run_id TEXT NOT NULL,
role TEXT,
kind TEXT NOT NULL,
path TEXT NOT NULL,
created_at TEXT NOT NULL,
metadata_json TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS checkpoints (
id TEXT PRIMARY KEY,
run_id TEXT NOT NULL,
status TEXT NOT NULL,
payload_json TEXT NOT NULL,
created_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS approvals (
id TEXT PRIMARY KEY,
run_id TEXT NOT NULL,
source TEXT NOT NULL,
verdict TEXT NOT NULL,
rationale TEXT NOT NULL,
payload_json TEXT NOT NULL,
created_at TEXT NOT NULL
);
`);
}
createRun(goal: string, repoPath: string): RunRecord {
const timestamp = nowIso();
const id = randomUUID();
this.db
.prepare(`
INSERT INTO runs (id, goal, repo_path, status, summary, cycle_count, replan_count, current_task_id, created_at, updated_at)
VALUES (?, ?, ?, 'planning', NULL, 0, 0, NULL, ?, ?)
`)
.run(id, goal, repoPath, timestamp, timestamp);
this.addEvent(id, "system", "run_created", "Run created.", { goal, repoPath });
return this.getRun(id);
}
getRun(runId: string): RunRecord {
const row = this.db.prepare("SELECT * FROM runs WHERE id = ?").get(runId) as Record<string, unknown> | undefined;
if (!row) {
throw new Error(`Run not found: ${runId}`);
}
return mapRunRow(row);
}
listRuns(): RunRecord[] {
const rows = this.db.prepare("SELECT * FROM runs ORDER BY created_at DESC").all() as Record<string, unknown>[];
return rows.map(mapRunRow);
}
updateRun(runId: string, updates: Partial<Pick<RunRecord, "status" | "summary" | "currentTaskId">>): RunRecord {
const current = this.getRun(runId);
const next: RunRecord = {
...current,
status: updates.status ?? current.status,
summary: updates.summary ?? current.summary,
currentTaskId: updates.currentTaskId === undefined ? current.currentTaskId : updates.currentTaskId,
updatedAt: nowIso(),
};
this.db
.prepare(`
UPDATE runs
SET status = ?, summary = ?, current_task_id = ?, updated_at = ?
WHERE id = ?
`)
.run(next.status, next.summary, next.currentTaskId, next.updatedAt, runId);
return this.getRun(runId);
}
incrementCycle(runId: string): void {
this.db.prepare("UPDATE runs SET cycle_count = cycle_count + 1, updated_at = ? WHERE id = ?").run(nowIso(), runId);
}
incrementReplanCount(runId: string): void {
this.db.prepare("UPDATE runs SET replan_count = replan_count + 1, updated_at = ? WHERE id = ?").run(nowIso(), runId);
}
getAgentState(runId: string, role: AgentRole): AgentStateRecord | null {
const row = this.db.prepare("SELECT * FROM agents WHERE run_id = ? AND role = ?").get(runId, role) as
| Record<string, unknown>
| undefined;
return row ? mapAgentRow(row) : null;
}
saveAgentState(record: AgentStateRecord): void {
this.db
.prepare(`
INSERT INTO agents (run_id, role, session_id, turns, rotation_count, last_prompt_path, last_response_path, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(run_id, role) DO UPDATE SET
session_id = excluded.session_id,
turns = excluded.turns,
rotation_count = excluded.rotation_count,
last_prompt_path = excluded.last_prompt_path,
last_response_path = excluded.last_response_path,
updated_at = excluded.updated_at
`)
.run(
record.runId,
record.role,
record.sessionId,
record.turns,
record.rotationCount,
record.lastPromptPath,
record.lastResponsePath,
record.updatedAt,
);
}
requeueInProgressTasks(runId: string): number {
const timestamp = nowIso();
const result = this.db
.prepare(`
UPDATE tasks
SET status = 'pending', updated_at = ?
WHERE run_id = ? AND status = 'in_progress'
`)
.run(timestamp, runId);
const changes = Number(result.changes ?? 0);
if (changes > 0) {
this.addEvent(runId, "system", "requeue_in_progress", "Requeued in-progress tasks on resume.", { changes });
}
return changes;
}
replacePendingTasks(runId: string, tasks: TaskDraft[]): TaskRecord[] {
const now = nowIso();
this.db
.prepare(`
UPDATE tasks
SET status = 'abandoned', updated_at = ?
WHERE run_id = ? AND status IN ('pending', 'in_progress')
`)
.run(now, runId);
const insertStmt = this.db.prepare(`
INSERT INTO tasks (
id, run_id, title, objective, acceptance_criteria_json, verification_steps_json,
allowed_paths_json, status, attempt_count, implementation_summary, blocker_signature, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, 'pending', 0, NULL, NULL, ?, ?)
`);
const records: TaskRecord[] = [];
for (const task of tasks) {
const id = randomUUID();
insertStmt.run(
id,
runId,
task.title,
task.objective,
toJson(task.acceptanceCriteria),
toJson(task.verificationSteps),
toJson(task.allowedPaths),
now,
now,
);
records.push(this.getTask(id));
}
return records;
}
listTasks(runId: string, statuses?: TaskStatus[]): TaskRecord[] {
if (!statuses?.length) {
const rows = this.db
.prepare("SELECT * FROM tasks WHERE run_id = ? ORDER BY created_at ASC")
.all(runId) as Record<string, unknown>[];
return rows.map(mapTaskRow);
}
const placeholders = statuses.map(() => "?").join(", ");
const rows = this.db
.prepare(`SELECT * FROM tasks WHERE run_id = ? AND status IN (${placeholders}) ORDER BY created_at ASC`)
.all(runId, ...statuses) as Record<string, unknown>[];
return rows.map(mapTaskRow);
}
getTask(taskId: string): TaskRecord {
const row = this.db.prepare("SELECT * FROM tasks WHERE id = ?").get(taskId) as Record<string, unknown> | undefined;
if (!row) {
throw new Error(`Task not found: ${taskId}`);
}
return mapTaskRow(row);
}
claimNextPendingTask(runId: string): TaskRecord | null {
const row = this.db
.prepare(`
SELECT * FROM tasks
WHERE run_id = ? AND status = 'pending'
ORDER BY created_at ASC
LIMIT 1
`)
.get(runId) as Record<string, unknown> | undefined;
if (!row) {
return null;
}
const taskId = String(row.id);
this.db
.prepare(`
UPDATE tasks
SET status = 'in_progress', attempt_count = attempt_count + 1, updated_at = ?
WHERE id = ?
`)
.run(nowIso(), taskId);
return this.getTask(taskId);
}
completeTask(taskId: string, summary: string): TaskRecord {
this.db
.prepare(`
UPDATE tasks
SET status = 'completed', implementation_summary = ?, blocker_signature = NULL, updated_at = ?
WHERE id = ?
`)
.run(summary, nowIso(), taskId);
return this.getTask(taskId);
}
abandonTask(taskId: string, summary: string | null, blockerSignature: string | null): TaskRecord {
this.db
.prepare(`
UPDATE tasks
SET status = 'abandoned', implementation_summary = ?, blocker_signature = ?, updated_at = ?
WHERE id = ?
`)
.run(summary, blockerSignature, nowIso(), taskId);
return this.getTask(taskId);
}
blockTask(taskId: string, summary: string | null, blockerSignature: string | null): TaskRecord {
this.db
.prepare(`
UPDATE tasks
SET status = 'blocked', implementation_summary = ?, blocker_signature = ?, updated_at = ?
WHERE id = ?
`)
.run(summary, blockerSignature, nowIso(), taskId);
return this.getTask(taskId);
}
latestTaskAttempt(taskId: string): TaskAttemptRecord | null {
const row = this.db
.prepare("SELECT * FROM task_attempts WHERE task_id = ? ORDER BY created_at DESC LIMIT 1")
.get(taskId) as Record<string, unknown> | undefined;
return row ? mapAttemptRow(row) : null;
}
addTaskAttempt(
runId: string,
taskId: string,
attemptNumber: number,
status: AttemptStatus,
summary: string,
result: unknown,
blockerSignature: string | null,
): TaskAttemptRecord {
const id = randomUUID();
const createdAt = nowIso();
this.db
.prepare(`
INSERT INTO task_attempts (id, run_id, task_id, attempt_number, status, summary, blocker_signature, result_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`)
.run(id, runId, taskId, attemptNumber, status, summary, blockerSignature, toJson(result), createdAt);
return this.latestTaskAttempt(taskId) as TaskAttemptRecord;
}
addEvent(runId: string, source: string, kind: string, message: string, payload: unknown): void {
this.db
.prepare(`
INSERT INTO events (run_id, ts, source, kind, message, payload_json)
VALUES (?, ?, ?, ?, ?, ?)
`)
.run(runId, nowIso(), source, kind, message, toJson(payload));
}
listEvents(runId: string, limit = 50): EventRecord[] {
const rows = this.db
.prepare(`
SELECT * FROM events
WHERE run_id = ?
ORDER BY id DESC
LIMIT ?
`)
.all(runId, limit) as Record<string, unknown>[];
return rows
.map((row) => ({
id: Number(row.id),
runId: String(row.run_id),
ts: String(row.ts),
source: String(row.source),
kind: String(row.kind),
message: String(row.message),
payload: JSON.parse(String(row.payload_json)),
}))
.reverse();
}
addArtifact(runId: string, role: AgentRole | null, kind: string, filePath: string, metadata: unknown): ArtifactRecord {
const id = randomUUID();
const createdAt = nowIso();
this.db
.prepare(`
INSERT INTO artifacts (id, run_id, role, kind, path, created_at, metadata_json)
VALUES (?, ?, ?, ?, ?, ?, ?)
`)
.run(id, runId, role, kind, filePath, createdAt, toJson(metadata));
return {
id,
runId,
role,
kind,
path: filePath,
createdAt,
metadata,
};
}
listArtifacts(runId: string, role?: AgentRole): ArtifactRecord[] {
const rows = role
? (this.db
.prepare(`
SELECT * FROM artifacts
WHERE run_id = ? AND role = ?
ORDER BY created_at ASC
`)
.all(runId, role) as Record<string, unknown>[])
: (this.db
.prepare(`
SELECT * FROM artifacts
WHERE run_id = ?
ORDER BY created_at ASC
`)
.all(runId) as Record<string, unknown>[]);
return rows.map((row) => ({
id: String(row.id),
runId: String(row.run_id),
role: row.role ? (String(row.role) as AgentRole) : null,
kind: String(row.kind),
path: String(row.path),
createdAt: String(row.created_at),
metadata: JSON.parse(String(row.metadata_json)),
}));
}
addApproval(
runId: string,
source: ApprovalSource,
verdict: ApprovalVerdict,
rationale: string,
payload: unknown,
): ApprovalRecord {
const id = randomUUID();
const createdAt = nowIso();
this.db
.prepare(`
INSERT INTO approvals (id, run_id, source, verdict, rationale, payload_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
`)
.run(id, runId, source, verdict, rationale, toJson(payload), createdAt);
return {
id,
runId,
source,
verdict,
rationale,
payload,
createdAt,
};
}
listApprovals(runId: string): ApprovalRecord[] {
const rows = this.db
.prepare("SELECT * FROM approvals WHERE run_id = ? ORDER BY created_at ASC")
.all(runId) as Record<string, unknown>[];
return rows.map((row) => ({
id: String(row.id),
runId: String(row.run_id),
source: row.source as ApprovalSource,
verdict: row.verdict as ApprovalVerdict,
rationale: String(row.rationale),
payload: JSON.parse(String(row.payload_json)),
createdAt: String(row.created_at),
}));
}
latestApproval(runId: string, source: ApprovalSource): ApprovalRecord | null {
const row = this.db
.prepare(`
SELECT * FROM approvals
WHERE run_id = ? AND source = ?
ORDER BY created_at DESC
LIMIT 1
`)
.get(runId, source) as Record<string, unknown> | undefined;
if (!row) {
return null;
}
return {
id: String(row.id),
runId: String(row.run_id),
source: row.source as ApprovalSource,
verdict: row.verdict as ApprovalVerdict,
rationale: String(row.rationale),
payload: JSON.parse(String(row.payload_json)),
createdAt: String(row.created_at),
};
}
addCheckpoint(runId: string, status: RunStatus, payload: unknown): CheckpointRecord {
const id = randomUUID();
const createdAt = nowIso();
this.db
.prepare(`
INSERT INTO checkpoints (id, run_id, status, payload_json, created_at)
VALUES (?, ?, ?, ?, ?)
`)
.run(id, runId, status, toJson(payload), createdAt);
return {
id,
runId,
status,
payload,
createdAt,
};
}
buildSnapshot(runId: string): RunSnapshot {
const run = this.getRun(runId);
const agents = (["strategy", "implementation", "checker"] as const).reduce<RunSnapshot["agents"]>((acc, role) => {
const agent = this.getAgentState(runId, role);
if (agent) {
acc[role] = agent;
}
return acc;
}, {});
return {
run,
agents,
pendingTasks: this.listTasks(runId, ["pending"]),
inProgressTasks: this.listTasks(runId, ["in_progress"]),
completedTasks: this.listTasks(runId, ["completed"]),
blockedTasks: this.listTasks(runId, ["blocked", "abandoned"]),
recentAttempts: this.listRecentAttempts(runId),
recentEvents: this.listEvents(runId),
approvals: this.listApprovals(runId),
artifacts: this.listArtifacts(runId),
};
}
listRecentAttempts(runId: string, limit = 12): TaskAttemptRecord[] {
const rows = this.db
.prepare(`
SELECT * FROM task_attempts
WHERE run_id = ?
ORDER BY created_at DESC
LIMIT ?
`)
.all(runId, limit) as Record<string, unknown>[];
return rows.map(mapAttemptRow).reverse();
}
}

230
src/types.ts Normal file
View file

@ -0,0 +1,230 @@
export const agentRoles = ["strategy", "implementation", "checker"] as const;
export type AgentRole = (typeof agentRoles)[number];
export type SandboxMode = "read-only" | "workspace-write" | "danger-full-access";
export type RunStatus =
| "planning"
| "implementing"
| "self_check"
| "independent_check"
| "blocked"
| "done";
export type TaskStatus = "pending" | "in_progress" | "completed" | "blocked" | "abandoned";
export type AttemptStatus = "completed" | "needs_replan" | "blocked";
export type ApprovalSource = "strategy_self_check" | "checker";
export type ApprovalVerdict = "approved" | "rejected";
export interface RoleConfig {
model?: string;
sandbox: SandboxMode;
search: boolean;
skipGitRepoCheck: boolean;
extraArgs: string[];
}
export interface ResolvedConfig {
repoPath: string;
stateDir: string;
databasePath: string;
runsDir: string;
codexCommand: string;
maxTaskAttempts: number;
maxCyclesPerInvocation: number;
maxReplans: number;
maxTasks: number;
sessionRefreshTurns: number;
roles: Record<AgentRole, RoleConfig>;
}
export interface RunRecord {
id: string;
goal: string;
repoPath: string;
status: RunStatus;
summary: string | null;
cycleCount: number;
replanCount: number;
currentTaskId: string | null;
createdAt: string;
updatedAt: string;
}
export interface AgentStateRecord {
runId: string;
role: AgentRole;
sessionId: string | null;
turns: number;
rotationCount: number;
lastPromptPath: string | null;
lastResponsePath: string | null;
updatedAt: string;
}
export interface TaskDraft {
title: string;
objective: string;
acceptanceCriteria: string[];
verificationSteps: string[];
allowedPaths: string[];
}
export interface TaskRecord extends TaskDraft {
id: string;
runId: string;
status: TaskStatus;
attemptCount: number;
implementationSummary: string | null;
blockerSignature: string | null;
createdAt: string;
updatedAt: string;
}
export interface TaskAttemptRecord {
id: string;
runId: string;
taskId: string;
attemptNumber: number;
status: AttemptStatus;
summary: string;
blockerSignature: string | null;
resultJson: string;
createdAt: string;
}
export interface EventRecord {
id: number;
runId: string;
ts: string;
source: string;
kind: string;
message: string;
payload: unknown;
}
export interface ArtifactRecord {
id: string;
runId: string;
role: AgentRole | null;
kind: string;
path: string;
createdAt: string;
metadata: unknown;
}
export interface ApprovalRecord {
id: string;
runId: string;
source: ApprovalSource;
verdict: ApprovalVerdict;
rationale: string;
payload: unknown;
createdAt: string;
}
export interface CheckpointRecord {
id: string;
runId: string;
status: RunStatus;
payload: unknown;
createdAt: string;
}
export interface VerificationResult {
command: string;
outcome: "passed" | "failed" | "not_run";
details: string;
}
export interface StrategyPlanOutput {
decision: "continue" | "done" | "blocked";
summary: string;
rationale: string;
goalProgress: string;
risks: string[];
tasks: TaskDraft[];
blockedReason?: string;
}
export interface StrategySelfCheckOutput {
readyForIndependentCheck: boolean;
summary: string;
rationale: string;
evidence: string[];
remainingGaps: string[];
}
export interface ImplementationResultOutput {
taskId: string;
status: AttemptStatus;
summary: string;
changes: string[];
verification: VerificationResult[];
followUps: string[];
touchedFiles: string[];
blockers: string[];
}
export interface CheckVerdictOutput {
verdict: "approved" | "rejected";
summary: string;
rationale: string;
evidence: string[];
remainingTasks: TaskDraft[];
}
export interface RunSnapshot {
run: RunRecord;
agents: Partial<Record<AgentRole, AgentStateRecord>>;
pendingTasks: TaskRecord[];
inProgressTasks: TaskRecord[];
completedTasks: TaskRecord[];
blockedTasks: TaskRecord[];
recentAttempts: TaskAttemptRecord[];
recentEvents: EventRecord[];
approvals: ApprovalRecord[];
artifacts: ArtifactRecord[];
}
export interface InvocationArtifacts {
promptPath: string;
schemaPath: string;
rawEventsPath: string;
stderrPath: string;
lastMessagePath: string;
responsePath: string;
}
export interface AgentInvocation<T> {
runId: string;
role: AgentRole;
sessionId: string | null;
prompt: string;
schemaName: string;
schema: Record<string, unknown>;
cwd: string;
roleConfig: RoleConfig;
artifacts: InvocationArtifacts;
maxValidationRetries?: number;
}
export interface AgentInvocationResult<T> {
sessionId: string | null;
output: T;
rawMessage: string;
rawEvents: string[];
stderr: string;
artifacts: InvocationArtifacts;
}
export interface RawAgentRunner {
invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>>;
}
export interface RunArtifactsIndex {
promptPath: string;
responsePath: string;
schemaPath: string;
rawEventsPath: string;
stderrPath: string;
lastMessagePath: string;
}

138
test/orchestrator.test.ts Normal file
View file

@ -0,0 +1,138 @@
import { mkdtempSync, readFileSync } from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { describe, expect, test } from "vitest";
import { ArtifactManager } from "../src/artifacts.js";
import { loadConfig } from "../src/config.js";
import { AgentROrchestrator } from "../src/orchestrator.js";
import { RunStore } from "../src/store.js";
import type { AgentInvocation, AgentInvocationResult, RawAgentRunner } from "../src/types.js";
class ScriptedRunner implements RawAgentRunner {
public readonly prompts: string[] = [];
constructor(
private readonly outputs: string[],
) {}
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
const next = this.outputs.shift();
if (!next) {
throw new Error("No scripted output remaining.");
}
this.prompts.push(request.prompt);
readFileSync(request.artifacts.schemaPath, "utf8");
const parsed = JSON.parse(next) as Record<string, unknown>;
if (typeof parsed.taskId === "string" && !parsed.taskId) {
const match = request.prompt.match(/exact task id `([^`]+)`/);
if (match) {
parsed.taskId = match[1];
}
}
return {
sessionId: request.sessionId ?? `session-${this.prompts.length}`,
output: parsed as T,
rawMessage: JSON.stringify(parsed),
rawEvents: ['{"type":"turn.completed"}'],
stderr: "",
artifacts: request.artifacts,
};
}
}
describe("AgentROrchestrator", () => {
test("runs through plan, implementation, self-check, and independent check", async () => {
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-orchestrator-"));
const config = loadConfig(repoPath);
const store = new RunStore(config.databasePath);
const artifacts = new ArtifactManager(config.runsDir);
const runner = new ScriptedRunner([
JSON.stringify({
decision: "continue",
summary: "Implement the CLI skeleton.",
rationale: "The project is empty and needs a first vertical slice.",
goalProgress: "No implementation exists yet.",
risks: [],
tasks: [
{
title: "Create CLI entrypoint",
objective: "Build the initial command surface.",
acceptanceCriteria: ["A run command exists."],
verificationSteps: ["Run the help command."],
allowedPaths: ["src", "package.json"],
},
],
}),
JSON.stringify({
taskId: "",
status: "completed",
summary: "Added the CLI skeleton.",
changes: ["Created an entrypoint."],
verification: [{ command: "agent-r --help", outcome: "passed", details: "Help output rendered." }],
followUps: [],
touchedFiles: ["src/index.ts", "package.json"],
blockers: [],
}),
JSON.stringify({
decision: "done",
summary: "The requested slice is complete.",
rationale: "The only planned task is complete and verified.",
goalProgress: "The vertical slice exists.",
risks: [],
tasks: [],
}),
JSON.stringify({
readyForIndependentCheck: true,
summary: "The run is ready for independent review.",
rationale: "Implementation and verification are present.",
evidence: ["CLI entrypoint exists."],
remainingGaps: [],
}),
JSON.stringify({
verdict: "approved",
summary: "The goal is satisfied.",
rationale: "The requested slice exists and is verified.",
evidence: ["CLI entrypoint present."],
remainingTasks: [],
}),
]);
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
const run = orchestrator.createRun("Create a CLI skeleton");
const firstStatus = await orchestrator.runUntilStable(run.id, 10);
const snapshot = store.buildSnapshot(run.id);
expect(firstStatus.status).toBe("done");
expect(snapshot.completedTasks).toHaveLength(1);
expect(snapshot.approvals).toHaveLength(2);
expect(snapshot.pendingTasks).toHaveLength(0);
expect(runner.prompts.at(1)).toContain("You must echo the exact task id");
});
test("strategy prompt explicitly permits read-only repository inspection", async () => {
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-strategy-"));
const config = loadConfig(repoPath);
const store = new RunStore(config.databasePath);
const artifacts = new ArtifactManager(config.runsDir);
const runner = new ScriptedRunner([
JSON.stringify({
decision: "blocked",
summary: "Cannot continue.",
rationale: "Test stop.",
goalProgress: "None.",
risks: [],
tasks: [],
blockedReason: "Stop immediately.",
}),
]);
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
const run = orchestrator.createRun("Inspect repo");
await orchestrator.runUntilStable(run.id, 1);
expect(runner.prompts[0]).toContain("You may inspect the repository directly");
expect(runner.prompts[0]).toContain("Do not edit files");
});
});

40
test/store.test.ts Normal file
View file

@ -0,0 +1,40 @@
import { mkdtempSync } from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { describe, expect, test } from "vitest";
import { RunStore } from "../src/store.js";
describe("RunStore", () => {
test("creates runs, tasks, and snapshots", () => {
const tempDir = mkdtempSync(path.join(tmpdir(), "agent-r-store-"));
const store = new RunStore(path.join(tempDir, "state.sqlite"));
const run = store.createRun("Build something", tempDir);
store.replacePendingTasks(run.id, [
{
title: "Task 1",
objective: "Do work",
acceptanceCriteria: ["It works"],
verificationSteps: ["Run tests"],
allowedPaths: ["src"],
},
{
title: "Task 2",
objective: "Do more work",
acceptanceCriteria: ["It still works"],
verificationSteps: ["Run tests again"],
allowedPaths: ["tests"],
},
]);
const claimed = store.claimNextPendingTask(run.id);
expect(claimed?.status).toBe("in_progress");
store.completeTask(claimed!.id, "Finished");
store.addEvent(run.id, "test", "custom", "Recorded a test event.", { ok: true });
const snapshot = store.buildSnapshot(run.id);
expect(snapshot.pendingTasks).toHaveLength(1);
expect(snapshot.completedTasks).toHaveLength(1);
expect(snapshot.recentEvents.at(-1)?.kind).toBe("custom");
});
});

21
tsconfig.json Normal file
View file

@ -0,0 +1,21 @@
{
"compilerOptions": {
"target": "ES2023",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"lib": ["ES2023"],
"types": ["node"],
"strict": true,
"noUncheckedIndexedAccess": true,
"exactOptionalPropertyTypes": true,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"rootDir": "src",
"outDir": "dist",
"declaration": true,
"sourceMap": true,
"skipLibCheck": true
},
"include": ["src/**/*.ts"]
}