Initial agent-r orchestrator
This commit is contained in:
commit
a909a99310
17 changed files with 4530 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
node_modules
|
||||
dist
|
||||
.agent-r
|
||||
coverage
|
||||
*.log
|
||||
51
README.md
Normal file
51
README.md
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# agent-r
|
||||
|
||||
`agent-r` is a local TypeScript CLI that orchestrates three Codex roles:
|
||||
|
||||
- `strategy`: read-only planning, task breakdown, self-check
|
||||
- `implementation`: workspace-write execution for one task at a time
|
||||
- `checker`: read-only independent completion review
|
||||
|
||||
The CLI persists state in SQLite plus run artifacts under `.agent-r/`, so a long-running effort can be resumed across invocations.
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
agent-r run "Build a plugin system" --repo /path/to/repo
|
||||
agent-r resume <run-id> --repo /path/to/repo
|
||||
agent-r status <run-id> --repo /path/to/repo
|
||||
agent-r inspect <run-id> --repo /path/to/repo
|
||||
agent-r logs <run-id> --repo /path/to/repo --agent strategy
|
||||
```
|
||||
|
||||
## Config
|
||||
|
||||
Place `agent-r.config.toml` in the target repo if you want to override defaults.
|
||||
|
||||
```toml
|
||||
[state]
|
||||
dir = ".agent-r"
|
||||
max_task_attempts = 3
|
||||
max_cycles_per_run = 40
|
||||
max_replans = 12
|
||||
max_tasks = 200
|
||||
session_refresh_turns = 20
|
||||
|
||||
[codex]
|
||||
command = "codex"
|
||||
|
||||
[roles.strategy]
|
||||
sandbox = "read-only"
|
||||
search = true
|
||||
|
||||
[roles.implementation]
|
||||
sandbox = "workspace-write"
|
||||
search = false
|
||||
|
||||
[roles.checker]
|
||||
sandbox = "read-only"
|
||||
search = false
|
||||
```
|
||||
2038
package-lock.json
generated
Normal file
2038
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
31
package.json
Normal file
31
package.json
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"name": "agent-r",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"description": "Autonomous multi-agent Codex orchestrator with strategy, implementation, and checking roles.",
|
||||
"bin": {
|
||||
"agent-r": "dist/index.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=24.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc -p tsconfig.json && chmod +x dist/index.js",
|
||||
"dev": "NODE_OPTIONS=--disable-warning=ExperimentalWarning tsx src/index.ts",
|
||||
"lint": "tsc -p tsconfig.json --noEmit",
|
||||
"test": "NODE_OPTIONS=--disable-warning=ExperimentalWarning vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"@openai/codex-sdk": "^0.118.0",
|
||||
"ajv": "^8.18.0",
|
||||
"commander": "^14.0.3",
|
||||
"smol-toml": "^1.6.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.7.2",
|
||||
"tsx": "^4.21.0",
|
||||
"typescript": "^5.9.3",
|
||||
"vitest": "^4.1.2"
|
||||
}
|
||||
}
|
||||
69
src/artifacts.ts
Normal file
69
src/artifacts.ts
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
import path from "node:path";
|
||||
import { ensureDir, safeSlug, timestampId, writeJsonFile, writeTextFile } from "./fs-utils.js";
|
||||
import type { AgentRole, InvocationArtifacts, RunArtifactsIndex } from "./types.js";
|
||||
|
||||
export class ArtifactManager {
|
||||
constructor(private readonly runsDir: string) {
|
||||
ensureDir(this.runsDir);
|
||||
}
|
||||
|
||||
getRunDir(runId: string): string {
|
||||
const dir = path.join(this.runsDir, runId);
|
||||
ensureDir(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
getRoleDir(runId: string, role: AgentRole): string {
|
||||
const dir = path.join(this.getRunDir(runId), role);
|
||||
ensureDir(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
createInvocationArtifacts(runId: string, role: AgentRole, schemaName: string): InvocationArtifacts {
|
||||
const roleDir = this.getRoleDir(runId, role);
|
||||
const baseName = `${timestampId()}-${safeSlug(schemaName)}`;
|
||||
return {
|
||||
promptPath: path.join(roleDir, `${baseName}.prompt.md`),
|
||||
schemaPath: path.join(roleDir, `${baseName}.schema.json`),
|
||||
rawEventsPath: path.join(roleDir, `${baseName}.events.jsonl`),
|
||||
stderrPath: path.join(roleDir, `${baseName}.stderr.log`),
|
||||
lastMessagePath: path.join(roleDir, `${baseName}.last.txt`),
|
||||
responsePath: path.join(roleDir, `${baseName}.response.json`),
|
||||
};
|
||||
}
|
||||
|
||||
writePrompt(filePath: string, prompt: string): void {
|
||||
writeTextFile(filePath, prompt);
|
||||
}
|
||||
|
||||
writeSchema(filePath: string, schema: Record<string, unknown>): void {
|
||||
writeJsonFile(filePath, schema);
|
||||
}
|
||||
|
||||
writeRawEvents(filePath: string, rawEvents: string[]): void {
|
||||
writeTextFile(filePath, rawEvents.join("\n"));
|
||||
}
|
||||
|
||||
writeStderr(filePath: string, stderr: string): void {
|
||||
writeTextFile(filePath, stderr);
|
||||
}
|
||||
|
||||
writeLastMessage(filePath: string, message: string): void {
|
||||
writeTextFile(filePath, message);
|
||||
}
|
||||
|
||||
writeResponse(filePath: string, response: unknown): void {
|
||||
writeJsonFile(filePath, response);
|
||||
}
|
||||
|
||||
indexFromArtifacts(artifacts: InvocationArtifacts): RunArtifactsIndex {
|
||||
return {
|
||||
promptPath: artifacts.promptPath,
|
||||
responsePath: artifacts.responsePath,
|
||||
schemaPath: artifacts.schemaPath,
|
||||
rawEventsPath: artifacts.rawEventsPath,
|
||||
stderrPath: artifacts.stderrPath,
|
||||
lastMessagePath: artifacts.lastMessagePath,
|
||||
};
|
||||
}
|
||||
}
|
||||
139
src/codex-runner.ts
Normal file
139
src/codex-runner.ts
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
import { Ajv } from "ajv";
|
||||
import { Codex, type ThreadEvent, type ThreadOptions } from "@openai/codex-sdk";
|
||||
import type { AgentInvocation, AgentInvocationResult, RawAgentRunner } from "./types.js";
|
||||
|
||||
function extractValidationError(message: string, errorText: string): string {
|
||||
return [
|
||||
"The previous response did not parse or validate.",
|
||||
`Problem: ${errorText}`,
|
||||
"Respond again with JSON only, matching the schema exactly.",
|
||||
"Previous response:",
|
||||
message,
|
||||
].join("\n\n");
|
||||
}
|
||||
|
||||
export class CodexSdkRunner implements RawAgentRunner {
|
||||
private readonly ajv = new Ajv({ allErrors: true, strict: false });
|
||||
private readonly codex: Codex;
|
||||
|
||||
constructor(codexCommand: string) {
|
||||
this.codex = new Codex({
|
||||
codexPathOverride: codexCommand,
|
||||
});
|
||||
}
|
||||
|
||||
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
|
||||
let prompt = request.prompt;
|
||||
let sessionId = request.sessionId;
|
||||
const maxAttempts = request.maxValidationRetries ?? 2;
|
||||
let lastFailure = "";
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
const raw = await this.invokeOnce({
|
||||
...request,
|
||||
prompt,
|
||||
sessionId,
|
||||
});
|
||||
|
||||
sessionId = raw.sessionId;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(raw.rawMessage) as unknown;
|
||||
const validate = this.ajv.compile(request.schema);
|
||||
if (!validate(parsed)) {
|
||||
const errorText = this.ajv.errorsText(validate.errors);
|
||||
lastFailure = errorText;
|
||||
if (attempt === maxAttempts) {
|
||||
throw new Error(`Schema validation failed: ${errorText}`);
|
||||
}
|
||||
prompt = extractValidationError(raw.rawMessage, errorText);
|
||||
continue;
|
||||
}
|
||||
|
||||
return {
|
||||
...raw,
|
||||
output: parsed as T,
|
||||
};
|
||||
} catch (error) {
|
||||
const errorText = error instanceof Error ? error.message : String(error);
|
||||
lastFailure = errorText;
|
||||
if (attempt === maxAttempts) {
|
||||
throw new Error(`Structured output failed after ${attempt} attempt(s): ${lastFailure}`);
|
||||
}
|
||||
prompt = extractValidationError(raw.rawMessage, errorText);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Structured output failed: ${lastFailure}`);
|
||||
}
|
||||
|
||||
private async invokeOnce<T>(request: AgentInvocation<T>): Promise<Omit<AgentInvocationResult<T>, "output">> {
|
||||
const threadOptions: ThreadOptions = {
|
||||
sandboxMode: request.roleConfig.sandbox,
|
||||
workingDirectory: request.cwd,
|
||||
skipGitRepoCheck: request.roleConfig.skipGitRepoCheck,
|
||||
approvalPolicy: "never",
|
||||
networkAccessEnabled: request.roleConfig.search,
|
||||
webSearchEnabled: request.roleConfig.search,
|
||||
webSearchMode: request.roleConfig.search ? "live" : "disabled",
|
||||
};
|
||||
|
||||
if (request.roleConfig.model) {
|
||||
threadOptions.model = request.roleConfig.model;
|
||||
}
|
||||
|
||||
const thread = request.sessionId
|
||||
? this.codex.resumeThread(request.sessionId, threadOptions)
|
||||
: this.codex.startThread(threadOptions);
|
||||
|
||||
const streamed = await thread.runStreamed(request.prompt, {
|
||||
outputSchema: request.schema,
|
||||
});
|
||||
|
||||
const rawEvents: string[] = [];
|
||||
let sessionId = request.sessionId;
|
||||
let rawMessage = "";
|
||||
|
||||
for await (const event of streamed.events) {
|
||||
rawEvents.push(JSON.stringify(event));
|
||||
sessionId = this.extractSessionId(sessionId, event);
|
||||
rawMessage = this.extractLatestMessage(rawMessage, event);
|
||||
this.throwIfFailed(event);
|
||||
}
|
||||
|
||||
if (!rawMessage.trim()) {
|
||||
throw new Error("Codex SDK completed without a final agent message.");
|
||||
}
|
||||
|
||||
return {
|
||||
sessionId: sessionId ?? thread.id,
|
||||
rawMessage,
|
||||
rawEvents,
|
||||
stderr: "",
|
||||
artifacts: request.artifacts,
|
||||
};
|
||||
}
|
||||
|
||||
private extractSessionId(current: string | null, event: ThreadEvent): string | null {
|
||||
if (event.type === "thread.started") {
|
||||
return event.thread_id;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
private extractLatestMessage(current: string, event: ThreadEvent): string {
|
||||
if (event.type === "item.completed" && event.item.type === "agent_message") {
|
||||
return event.item.text;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
private throwIfFailed(event: ThreadEvent): void {
|
||||
if (event.type === "error") {
|
||||
throw new Error(event.message);
|
||||
}
|
||||
if (event.type === "turn.failed") {
|
||||
throw new Error(event.error.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
138
src/config.ts
Normal file
138
src/config.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
import { existsSync, readFileSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import { parse } from "smol-toml";
|
||||
import type { AgentRole, ResolvedConfig, RoleConfig } from "./types.js";
|
||||
|
||||
interface ConfigRoleOverride {
|
||||
model?: string;
|
||||
sandbox?: RoleConfig["sandbox"];
|
||||
search?: boolean;
|
||||
skipGitRepoCheck?: boolean;
|
||||
skip_git_repo_check?: boolean;
|
||||
extraArgs?: string[];
|
||||
extra_args?: string[];
|
||||
}
|
||||
|
||||
interface ConfigFileShape {
|
||||
state?: {
|
||||
dir?: string;
|
||||
max_task_attempts?: number;
|
||||
max_cycles_per_run?: number;
|
||||
max_replans?: number;
|
||||
max_tasks?: number;
|
||||
session_refresh_turns?: number;
|
||||
};
|
||||
codex?: {
|
||||
command?: string;
|
||||
};
|
||||
roles?: Partial<Record<AgentRole, ConfigRoleOverride>>;
|
||||
}
|
||||
|
||||
function defaultRoleConfig(role: AgentRole): RoleConfig {
|
||||
if (role === "strategy") {
|
||||
return {
|
||||
sandbox: "read-only",
|
||||
search: true,
|
||||
skipGitRepoCheck: false,
|
||||
extraArgs: [],
|
||||
};
|
||||
}
|
||||
|
||||
if (role === "checker") {
|
||||
return {
|
||||
sandbox: "read-only",
|
||||
search: false,
|
||||
skipGitRepoCheck: false,
|
||||
extraArgs: [],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
sandbox: "workspace-write",
|
||||
search: false,
|
||||
skipGitRepoCheck: false,
|
||||
extraArgs: [],
|
||||
};
|
||||
}
|
||||
|
||||
function mergeRoleConfig(role: AgentRole, override?: Partial<RoleConfig>): RoleConfig {
|
||||
const base = defaultRoleConfig(role);
|
||||
const merged: RoleConfig = {
|
||||
sandbox: override?.sandbox ?? base.sandbox,
|
||||
search: override?.search ?? base.search,
|
||||
skipGitRepoCheck: override?.skipGitRepoCheck ?? base.skipGitRepoCheck,
|
||||
extraArgs: override?.extraArgs ?? base.extraArgs,
|
||||
};
|
||||
|
||||
const model = override?.model ?? base.model;
|
||||
if (model) {
|
||||
merged.model = model;
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
export function loadConfig(repoPath: string, configPath?: string): ResolvedConfig {
|
||||
const absoluteRepoPath = path.resolve(repoPath);
|
||||
const effectiveConfigPath = configPath ? path.resolve(configPath) : path.join(absoluteRepoPath, "agent-r.config.toml");
|
||||
let parsedConfig: ConfigFileShape = {};
|
||||
|
||||
if (existsSync(effectiveConfigPath)) {
|
||||
parsedConfig = parse(readFileSync(effectiveConfigPath, "utf8")) as ConfigFileShape;
|
||||
}
|
||||
|
||||
const stateDir = path.resolve(
|
||||
absoluteRepoPath,
|
||||
parsedConfig.state?.dir ?? ".agent-r",
|
||||
);
|
||||
|
||||
return {
|
||||
repoPath: absoluteRepoPath,
|
||||
stateDir,
|
||||
databasePath: path.join(stateDir, "state.sqlite"),
|
||||
runsDir: path.join(stateDir, "runs"),
|
||||
codexCommand: parsedConfig.codex?.command ?? "codex",
|
||||
maxTaskAttempts: parsedConfig.state?.max_task_attempts ?? 3,
|
||||
maxCyclesPerInvocation: parsedConfig.state?.max_cycles_per_run ?? 40,
|
||||
maxReplans: parsedConfig.state?.max_replans ?? 12,
|
||||
maxTasks: parsedConfig.state?.max_tasks ?? 200,
|
||||
sessionRefreshTurns: parsedConfig.state?.session_refresh_turns ?? 20,
|
||||
roles: {
|
||||
strategy: mergeRoleConfig("strategy", normalizeRoleOverride(parsedConfig.roles?.strategy)),
|
||||
implementation: mergeRoleConfig("implementation", normalizeRoleOverride(parsedConfig.roles?.implementation)),
|
||||
checker: mergeRoleConfig("checker", normalizeRoleOverride(parsedConfig.roles?.checker)),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeRoleOverride(override?: ConfigRoleOverride): Partial<RoleConfig> | undefined {
|
||||
if (!override) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const normalized: Partial<RoleConfig> = {};
|
||||
|
||||
if (override.model) {
|
||||
normalized.model = override.model;
|
||||
}
|
||||
|
||||
if (override.sandbox) {
|
||||
normalized.sandbox = override.sandbox;
|
||||
}
|
||||
|
||||
if (override.search !== undefined) {
|
||||
normalized.search = override.search;
|
||||
}
|
||||
|
||||
const skipGitRepoCheck = override.skipGitRepoCheck ?? override.skip_git_repo_check;
|
||||
if (skipGitRepoCheck !== undefined) {
|
||||
normalized.skipGitRepoCheck = skipGitRepoCheck;
|
||||
}
|
||||
|
||||
const extraArgs = override.extraArgs ?? override.extra_args;
|
||||
if (extraArgs !== undefined) {
|
||||
normalized.extraArgs = extraArgs;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
42
src/fs-utils.ts
Normal file
42
src/fs-utils.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import { mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
export function ensureDir(dirPath: string): void {
|
||||
mkdirSync(dirPath, { recursive: true });
|
||||
}
|
||||
|
||||
export function writeTextFile(filePath: string, text: string): void {
|
||||
ensureDir(path.dirname(filePath));
|
||||
writeFileSync(filePath, text, "utf8");
|
||||
}
|
||||
|
||||
export function writeJsonFile(filePath: string, value: unknown): void {
|
||||
writeTextFile(filePath, `${JSON.stringify(value, null, 2)}\n`);
|
||||
}
|
||||
|
||||
export function readTextFile(filePath: string): string {
|
||||
return readFileSync(filePath, "utf8");
|
||||
}
|
||||
|
||||
export function safeSlug(value: string): string {
|
||||
return value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, "-")
|
||||
.replace(/^-+|-+$/g, "")
|
||||
.slice(0, 40) || "artifact";
|
||||
}
|
||||
|
||||
export function timestampId(date = new Date()): string {
|
||||
return date.toISOString().replace(/[:.]/g, "-");
|
||||
}
|
||||
|
||||
export function listFilesRecursive(dirPath: string): string[] {
|
||||
const entries = readdirSync(dirPath, { withFileTypes: true });
|
||||
return entries.flatMap((entry) => {
|
||||
const fullPath = path.join(dirPath, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
return listFilesRecursive(fullPath);
|
||||
}
|
||||
return [fullPath];
|
||||
});
|
||||
}
|
||||
127
src/index.ts
Normal file
127
src/index.ts
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
#!/usr/bin/env -S node --disable-warning=ExperimentalWarning
|
||||
import path from "node:path";
|
||||
import { Command } from "commander";
|
||||
import { ArtifactManager } from "./artifacts.js";
|
||||
import { CodexSdkRunner } from "./codex-runner.js";
|
||||
import { loadConfig } from "./config.js";
|
||||
import { listFilesRecursive, readTextFile } from "./fs-utils.js";
|
||||
import { AgentROrchestrator } from "./orchestrator.js";
|
||||
import { RunStore } from "./store.js";
|
||||
import type { AgentRole } from "./types.js";
|
||||
|
||||
function resolveRepoPath(repo?: string): string {
|
||||
return path.resolve(repo ?? process.cwd());
|
||||
}
|
||||
|
||||
function bootstrap(repo?: string, configPath?: string) {
|
||||
const repoPath = resolveRepoPath(repo);
|
||||
const config = loadConfig(repoPath, configPath);
|
||||
const store = new RunStore(config.databasePath);
|
||||
const artifacts = new ArtifactManager(config.runsDir);
|
||||
const runner = new CodexSdkRunner(config.codexCommand);
|
||||
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
||||
return { repoPath, config, store, artifacts, orchestrator };
|
||||
}
|
||||
|
||||
function printRunStatus(label: string, run: {
|
||||
id: string;
|
||||
status: string;
|
||||
summary: string | null;
|
||||
goal: string;
|
||||
cycleCount: number;
|
||||
replanCount: number;
|
||||
currentTaskId: string | null;
|
||||
}): void {
|
||||
console.log(`${label}: ${run.id}`);
|
||||
console.log(`status: ${run.status}`);
|
||||
console.log(`goal: ${run.goal}`);
|
||||
console.log(`summary: ${run.summary ?? "none"}`);
|
||||
console.log(`cycles: ${run.cycleCount}`);
|
||||
console.log(`replans: ${run.replanCount}`);
|
||||
console.log(`current task: ${run.currentTaskId ?? "none"}`);
|
||||
}
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program.name("agent-r").description("Autonomous Codex multi-agent orchestrator.");
|
||||
|
||||
program
|
||||
.command("run")
|
||||
.argument("<goal>", "high-level development goal")
|
||||
.option("--repo <path>", "target repository path")
|
||||
.option("--config <path>", "path to agent-r config file")
|
||||
.option("--max-cycles <count>", "override max cycles for this invocation", Number)
|
||||
.action(async (goal: string, options: { repo?: string; config?: string; maxCycles?: number }) => {
|
||||
const { orchestrator, store } = bootstrap(options.repo, options.config);
|
||||
const run = orchestrator.createRun(goal);
|
||||
const finalRun = await orchestrator.runUntilStable(run.id, options.maxCycles);
|
||||
printRunStatus("run", finalRun);
|
||||
console.log(`run id: ${run.id}`);
|
||||
console.log(`pending tasks: ${store.buildSnapshot(run.id).pendingTasks.length}`);
|
||||
});
|
||||
|
||||
program
|
||||
.command("resume")
|
||||
.argument("<runId>", "run id")
|
||||
.option("--repo <path>", "target repository path")
|
||||
.option("--config <path>", "path to agent-r config file")
|
||||
.option("--max-cycles <count>", "override max cycles for this invocation", Number)
|
||||
.action(async (runId: string, options: { repo?: string; config?: string; maxCycles?: number }) => {
|
||||
const { orchestrator } = bootstrap(options.repo, options.config);
|
||||
const finalRun = await orchestrator.runUntilStable(runId, options.maxCycles);
|
||||
printRunStatus("run", finalRun);
|
||||
});
|
||||
|
||||
program
|
||||
.command("status")
|
||||
.argument("<runId>", "run id")
|
||||
.option("--repo <path>", "target repository path")
|
||||
.option("--config <path>", "path to agent-r config file")
|
||||
.action((runId: string, options: { repo?: string; config?: string }) => {
|
||||
const { store } = bootstrap(options.repo, options.config);
|
||||
const snapshot = store.buildSnapshot(runId);
|
||||
printRunStatus("run", snapshot.run);
|
||||
console.log(`pending: ${snapshot.pendingTasks.length}`);
|
||||
console.log(`completed: ${snapshot.completedTasks.length}`);
|
||||
console.log(`blocked/abandoned: ${snapshot.blockedTasks.length}`);
|
||||
});
|
||||
|
||||
program
|
||||
.command("inspect")
|
||||
.argument("<runId>", "run id")
|
||||
.option("--repo <path>", "target repository path")
|
||||
.option("--config <path>", "path to agent-r config file")
|
||||
.action((runId: string, options: { repo?: string; config?: string }) => {
|
||||
const { store } = bootstrap(options.repo, options.config);
|
||||
console.log(JSON.stringify(store.buildSnapshot(runId), null, 2));
|
||||
});
|
||||
|
||||
program
|
||||
.command("logs")
|
||||
.argument("<runId>", "run id")
|
||||
.option("--repo <path>", "target repository path")
|
||||
.option("--config <path>", "path to agent-r config file")
|
||||
.option("--agent <role>", "limit to a single role")
|
||||
.action((runId: string, options: { repo?: string; config?: string; agent?: AgentRole }) => {
|
||||
const { artifacts, store } = bootstrap(options.repo, options.config);
|
||||
const snapshot = store.buildSnapshot(runId);
|
||||
const artifactFiles = options.agent
|
||||
? store.listArtifacts(runId, options.agent).map((artifact) => artifact.path)
|
||||
: listFilesRecursive(artifacts.getRunDir(runId));
|
||||
|
||||
console.log(`run: ${runId}`);
|
||||
console.log(`status: ${snapshot.run.status}`);
|
||||
console.log("");
|
||||
|
||||
for (const filePath of artifactFiles) {
|
||||
console.log(`# ${filePath}`);
|
||||
console.log(readTextFile(filePath));
|
||||
console.log("");
|
||||
}
|
||||
});
|
||||
|
||||
program.parseAsync(process.argv).catch((error: unknown) => {
|
||||
const message = error instanceof Error ? error.stack ?? error.message : String(error);
|
||||
console.error(message);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
423
src/orchestrator.ts
Normal file
423
src/orchestrator.ts
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
import type {
|
||||
AgentRole,
|
||||
AgentStateRecord,
|
||||
CheckVerdictOutput,
|
||||
ImplementationResultOutput,
|
||||
RawAgentRunner,
|
||||
ResolvedConfig,
|
||||
RunRecord,
|
||||
RunSnapshot,
|
||||
StrategyPlanOutput,
|
||||
StrategySelfCheckOutput,
|
||||
TaskDraft,
|
||||
TaskRecord,
|
||||
} from "./types.js";
|
||||
import { ArtifactManager } from "./artifacts.js";
|
||||
import {
|
||||
buildCheckPrompt,
|
||||
buildImplementationPrompt,
|
||||
buildStrategyPlanPrompt,
|
||||
buildStrategySelfCheckPrompt,
|
||||
formatCheckVerdictSummary,
|
||||
formatImplementationSummary,
|
||||
formatSelfCheckSummary,
|
||||
formatStrategyPlanSummary,
|
||||
} from "./prompts.js";
|
||||
import { checkVerdictSchema, implementationResultSchema, strategyPlanSchema, strategySelfCheckSchema } from "./schema-catalog.js";
|
||||
import { RunStore } from "./store.js";
|
||||
|
||||
function nowIso(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function blockerSignature(blockers: string[]): string | null {
|
||||
const compact = blockers.map((value) => value.trim().toLowerCase()).filter(Boolean).sort().join("|");
|
||||
return compact || null;
|
||||
}
|
||||
|
||||
function uniqueTaskDrafts(tasks: TaskDraft[]): TaskDraft[] {
|
||||
const seen = new Set<string>();
|
||||
const result: TaskDraft[] = [];
|
||||
for (const task of tasks) {
|
||||
const key = JSON.stringify(task);
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
result.push(task);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export class AgentROrchestrator {
|
||||
constructor(
|
||||
private readonly config: ResolvedConfig,
|
||||
private readonly store: RunStore,
|
||||
private readonly artifacts: ArtifactManager,
|
||||
private readonly runner: RawAgentRunner,
|
||||
) {}
|
||||
|
||||
createRun(goal: string): RunRecord {
|
||||
return this.store.createRun(goal, this.config.repoPath);
|
||||
}
|
||||
|
||||
async runUntilStable(runId: string, maxCycles = this.config.maxCyclesPerInvocation): Promise<RunRecord> {
|
||||
this.store.requeueInProgressTasks(runId);
|
||||
|
||||
for (let cycle = 0; cycle < maxCycles; cycle += 1) {
|
||||
const snapshot = this.store.buildSnapshot(runId);
|
||||
if (snapshot.run.status === "done" || snapshot.run.status === "blocked") {
|
||||
return snapshot.run;
|
||||
}
|
||||
|
||||
this.store.incrementCycle(runId);
|
||||
|
||||
switch (snapshot.run.status) {
|
||||
case "planning":
|
||||
if (snapshot.pendingTasks.length > 0) {
|
||||
await this.dispatchNextTask(runId);
|
||||
} else {
|
||||
await this.runStrategyPlan(runId);
|
||||
}
|
||||
break;
|
||||
case "implementing":
|
||||
await this.dispatchNextTask(runId);
|
||||
break;
|
||||
case "self_check":
|
||||
await this.runStrategySelfCheck(runId);
|
||||
break;
|
||||
case "independent_check":
|
||||
await this.runIndependentCheck(runId);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported run state: ${snapshot.run.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
this.store.addEvent(runId, "system", "cycle_budget_exhausted", "Reached max cycles for this invocation.", { maxCycles });
|
||||
return this.store.getRun(runId);
|
||||
}
|
||||
|
||||
private async runStrategyPlan(runId: string): Promise<void> {
|
||||
const snapshot = this.store.buildSnapshot(runId);
|
||||
const response = await this.invokeRole<StrategyPlanOutput>({
|
||||
runId,
|
||||
role: "strategy",
|
||||
schemaName: "strategy-plan",
|
||||
prompt: buildStrategyPlanPrompt(snapshot),
|
||||
schema: strategyPlanSchema,
|
||||
});
|
||||
|
||||
const plan = response.output;
|
||||
this.store.addEvent(runId, "strategy", "strategy_plan", formatStrategyPlanSummary(plan), plan);
|
||||
this.store.updateRun(runId, { summary: plan.summary, currentTaskId: null });
|
||||
this.store.incrementReplanCount(runId);
|
||||
|
||||
if (this.store.getRun(runId).replanCount > this.config.maxReplans) {
|
||||
this.store.updateRun(runId, {
|
||||
status: "blocked",
|
||||
summary: `Exceeded replan limit (${this.config.maxReplans}).`,
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.store.addEvent(runId, "system", "blocked", "Run blocked because the replan limit was exceeded.", {});
|
||||
return;
|
||||
}
|
||||
|
||||
if (plan.decision === "blocked") {
|
||||
this.store.updateRun(runId, {
|
||||
status: "blocked",
|
||||
summary: plan.blockedReason ?? plan.summary,
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.store.addEvent(runId, "strategy", "blocked", "Strategy declared the run blocked.", plan);
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (plan.decision === "done") {
|
||||
this.store.updateRun(runId, {
|
||||
status: "self_check",
|
||||
summary: plan.summary,
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
const tasks = uniqueTaskDrafts(plan.tasks).slice(0, this.config.maxTasks);
|
||||
if (!tasks.length) {
|
||||
this.store.updateRun(runId, {
|
||||
status: "blocked",
|
||||
summary: "Strategy returned continue without any tasks.",
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.store.addEvent(runId, "system", "blocked", "Run blocked because no tasks were returned.", plan);
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
this.store.replacePendingTasks(runId, tasks);
|
||||
this.store.updateRun(runId, {
|
||||
status: "planning",
|
||||
summary: plan.summary,
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
}
|
||||
|
||||
private async dispatchNextTask(runId: string): Promise<void> {
|
||||
const claimedTask = this.store.claimNextPendingTask(runId);
|
||||
if (!claimedTask) {
|
||||
this.store.updateRun(runId, { status: "planning", currentTaskId: null });
|
||||
return;
|
||||
}
|
||||
|
||||
this.store.updateRun(runId, {
|
||||
status: "implementing",
|
||||
currentTaskId: claimedTask.id,
|
||||
summary: `Implementing ${claimedTask.title}`,
|
||||
});
|
||||
this.store.addEvent(runId, "system", "task_dispatched", `Dispatched task ${claimedTask.id}.`, { taskId: claimedTask.id });
|
||||
|
||||
const snapshot = this.store.buildSnapshot(runId);
|
||||
const response = await this.invokeRole<ImplementationResultOutput>({
|
||||
runId,
|
||||
role: "implementation",
|
||||
schemaName: "implementation-result",
|
||||
prompt: buildImplementationPrompt(snapshot, claimedTask),
|
||||
schema: implementationResultSchema,
|
||||
});
|
||||
|
||||
const result = response.output;
|
||||
if (result.taskId !== claimedTask.id) {
|
||||
throw new Error(`Implementation agent returned mismatched task id: expected ${claimedTask.id}, got ${result.taskId}`);
|
||||
}
|
||||
|
||||
const signature = blockerSignature(result.blockers);
|
||||
const previousAttempt = this.store.latestTaskAttempt(claimedTask.id);
|
||||
const latestTask = this.store.getTask(claimedTask.id);
|
||||
|
||||
this.store.addTaskAttempt(
|
||||
runId,
|
||||
claimedTask.id,
|
||||
latestTask.attemptCount,
|
||||
result.status,
|
||||
result.summary,
|
||||
result,
|
||||
signature,
|
||||
);
|
||||
this.store.addEvent(runId, "implementation", "implementation_result", formatImplementationSummary(result), result);
|
||||
|
||||
if (result.status === "completed") {
|
||||
this.store.completeTask(claimedTask.id, result.summary);
|
||||
this.store.updateRun(runId, {
|
||||
status: "planning",
|
||||
currentTaskId: null,
|
||||
summary: result.summary,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
const repeatedBlocker =
|
||||
previousAttempt &&
|
||||
previousAttempt.blockerSignature &&
|
||||
previousAttempt.blockerSignature === signature;
|
||||
|
||||
if (result.status === "blocked") {
|
||||
this.store.blockTask(claimedTask.id, result.summary, signature);
|
||||
if (repeatedBlocker) {
|
||||
this.store.updateRun(runId, {
|
||||
status: "blocked",
|
||||
currentTaskId: null,
|
||||
summary: `Repeated blocker for task ${claimedTask.title}.`,
|
||||
});
|
||||
this.store.addEvent(runId, "system", "blocked", "Run blocked because the same blocker repeated.", {
|
||||
taskId: claimedTask.id,
|
||||
blockerSignature: signature,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
this.store.updateRun(runId, {
|
||||
status: "planning",
|
||||
currentTaskId: null,
|
||||
summary: result.summary,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (latestTask.attemptCount >= this.config.maxTaskAttempts) {
|
||||
this.store.blockTask(claimedTask.id, result.summary, signature);
|
||||
this.store.updateRun(runId, {
|
||||
status: "blocked",
|
||||
currentTaskId: null,
|
||||
summary: `Task ${claimedTask.title} exceeded the retry limit.`,
|
||||
});
|
||||
this.store.addEvent(runId, "system", "blocked", "Run blocked because task retry limit was exceeded.", {
|
||||
taskId: claimedTask.id,
|
||||
attempts: latestTask.attemptCount,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
this.store.abandonTask(claimedTask.id, result.summary, signature);
|
||||
this.store.updateRun(runId, {
|
||||
status: "planning",
|
||||
currentTaskId: null,
|
||||
summary: result.summary,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
}
|
||||
|
||||
private async runStrategySelfCheck(runId: string): Promise<void> {
|
||||
const snapshot = this.store.buildSnapshot(runId);
|
||||
const lastPlan = snapshot.recentEvents.findLast((event) => event.kind === "strategy_plan")?.payload as
|
||||
| StrategyPlanOutput
|
||||
| undefined;
|
||||
if (!lastPlan) {
|
||||
this.store.updateRun(runId, { status: "planning" });
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await this.invokeRole<StrategySelfCheckOutput>({
|
||||
runId,
|
||||
role: "strategy",
|
||||
schemaName: "strategy-self-check",
|
||||
prompt: buildStrategySelfCheckPrompt(snapshot, lastPlan),
|
||||
schema: strategySelfCheckSchema,
|
||||
});
|
||||
|
||||
const selfCheck = response.output;
|
||||
this.store.addApproval(
|
||||
runId,
|
||||
"strategy_self_check",
|
||||
selfCheck.readyForIndependentCheck ? "approved" : "rejected",
|
||||
selfCheck.rationale,
|
||||
selfCheck,
|
||||
);
|
||||
this.store.addEvent(runId, "strategy", "strategy_self_check", formatSelfCheckSummary(selfCheck), selfCheck);
|
||||
|
||||
this.store.updateRun(runId, {
|
||||
status: selfCheck.readyForIndependentCheck ? "independent_check" : "planning",
|
||||
summary: selfCheck.summary,
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
}
|
||||
|
||||
private async runIndependentCheck(runId: string): Promise<void> {
|
||||
const snapshot = this.store.buildSnapshot(runId);
|
||||
const latestSelfCheck = this.store.latestApproval(runId, "strategy_self_check")?.payload as
|
||||
| StrategySelfCheckOutput
|
||||
| undefined;
|
||||
|
||||
if (!latestSelfCheck) {
|
||||
this.store.updateRun(runId, { status: "planning" });
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await this.invokeRole<CheckVerdictOutput>({
|
||||
runId,
|
||||
role: "checker",
|
||||
schemaName: "independent-check",
|
||||
prompt: buildCheckPrompt(snapshot, latestSelfCheck),
|
||||
schema: checkVerdictSchema,
|
||||
});
|
||||
|
||||
const verdict = response.output;
|
||||
this.store.addApproval(
|
||||
runId,
|
||||
"checker",
|
||||
verdict.verdict === "approved" ? "approved" : "rejected",
|
||||
verdict.rationale,
|
||||
verdict,
|
||||
);
|
||||
this.store.addEvent(runId, "checker", "check_verdict", formatCheckVerdictSummary(verdict), verdict);
|
||||
|
||||
if (verdict.verdict === "approved") {
|
||||
this.store.updateRun(runId, {
|
||||
status: "done",
|
||||
summary: verdict.summary,
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
this.store.updateRun(runId, {
|
||||
status: "planning",
|
||||
summary: verdict.summary,
|
||||
currentTaskId: null,
|
||||
});
|
||||
this.checkpoint(runId);
|
||||
}
|
||||
|
||||
private async invokeRole<T>(options: {
|
||||
runId: string;
|
||||
role: AgentRole;
|
||||
schemaName: string;
|
||||
prompt: string;
|
||||
schema: Record<string, unknown>;
|
||||
}): Promise<{ output: T }> {
|
||||
const state = this.store.getAgentState(options.runId, options.role);
|
||||
const nextSession = this.shouldRotate(state) ? null : state?.sessionId ?? null;
|
||||
const prompt = this.shouldRotate(state)
|
||||
? `Session refresh for role ${options.role}. Continue the run using this compressed state.\n\n${options.prompt}`
|
||||
: options.prompt;
|
||||
const artifacts = this.artifacts.createInvocationArtifacts(options.runId, options.role, options.schemaName);
|
||||
|
||||
this.artifacts.writePrompt(artifacts.promptPath, prompt);
|
||||
this.artifacts.writeSchema(artifacts.schemaPath, options.schema);
|
||||
|
||||
const result = await this.runner.invoke<T>({
|
||||
runId: options.runId,
|
||||
role: options.role,
|
||||
sessionId: nextSession,
|
||||
prompt,
|
||||
schemaName: options.schemaName,
|
||||
schema: options.schema,
|
||||
cwd: this.config.repoPath,
|
||||
roleConfig: this.config.roles[options.role],
|
||||
artifacts,
|
||||
maxValidationRetries: 2,
|
||||
});
|
||||
|
||||
this.artifacts.writeRawEvents(artifacts.rawEventsPath, result.rawEvents);
|
||||
this.artifacts.writeStderr(artifacts.stderrPath, result.stderr);
|
||||
this.artifacts.writeLastMessage(artifacts.lastMessagePath, result.rawMessage);
|
||||
this.artifacts.writeResponse(artifacts.responsePath, result.output);
|
||||
|
||||
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.prompt`, artifacts.promptPath, {});
|
||||
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.schema`, artifacts.schemaPath, {});
|
||||
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.events`, artifacts.rawEventsPath, {});
|
||||
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.stderr`, artifacts.stderrPath, {});
|
||||
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.last`, artifacts.lastMessagePath, {});
|
||||
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.response`, artifacts.responsePath, {});
|
||||
|
||||
const nextState: AgentStateRecord = {
|
||||
runId: options.runId,
|
||||
role: options.role,
|
||||
sessionId: result.sessionId,
|
||||
turns: (this.shouldRotate(state) ? 0 : state?.turns ?? 0) + 1,
|
||||
rotationCount: state ? state.rotationCount + (this.shouldRotate(state) ? 1 : 0) : 0,
|
||||
lastPromptPath: artifacts.promptPath,
|
||||
lastResponsePath: artifacts.responsePath,
|
||||
updatedAt: nowIso(),
|
||||
};
|
||||
this.store.saveAgentState(nextState);
|
||||
|
||||
return { output: result.output };
|
||||
}
|
||||
|
||||
private shouldRotate(state: AgentStateRecord | null): boolean {
|
||||
return Boolean(state?.sessionId && state.turns >= this.config.sessionRefreshTurns);
|
||||
}
|
||||
|
||||
private checkpoint(runId: string): void {
|
||||
const snapshot = this.store.buildSnapshot(runId);
|
||||
this.store.addCheckpoint(runId, snapshot.run.status, snapshot);
|
||||
}
|
||||
}
|
||||
250
src/prompts.ts
Normal file
250
src/prompts.ts
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
import type {
|
||||
CheckVerdictOutput,
|
||||
RunSnapshot,
|
||||
StrategyPlanOutput,
|
||||
StrategySelfCheckOutput,
|
||||
TaskDraft,
|
||||
TaskRecord,
|
||||
} from "./types.js";
|
||||
|
||||
function bulletList(values: string[]): string {
|
||||
return values.length ? values.map((value) => `- ${value}`).join("\n") : "- none";
|
||||
}
|
||||
|
||||
function renderTask(task: TaskDraft | TaskRecord): string {
|
||||
return [
|
||||
`Title: ${task.title}`,
|
||||
`Objective: ${task.objective}`,
|
||||
`Acceptance:`,
|
||||
bulletList(task.acceptanceCriteria),
|
||||
`Verification:`,
|
||||
bulletList(task.verificationSteps),
|
||||
`Allowed paths:`,
|
||||
bulletList(task.allowedPaths),
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function renderTasks(tasks: TaskDraft[] | TaskRecord[]): string {
|
||||
return tasks.length
|
||||
? tasks.map((task, index) => `Task ${index + 1}\n${renderTask(task)}`).join("\n\n")
|
||||
: "No tasks.";
|
||||
}
|
||||
|
||||
function renderApprovals(snapshot: RunSnapshot): string {
|
||||
if (!snapshot.approvals.length) {
|
||||
return "No approvals yet.";
|
||||
}
|
||||
|
||||
return snapshot.approvals
|
||||
.map(
|
||||
(approval) =>
|
||||
`- ${approval.createdAt} ${approval.source} ${approval.verdict}: ${approval.rationale}`,
|
||||
)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function renderRecentAttempts(snapshot: RunSnapshot): string {
|
||||
if (!snapshot.recentAttempts.length) {
|
||||
return "No implementation attempts yet.";
|
||||
}
|
||||
|
||||
return snapshot.recentAttempts
|
||||
.map((attempt) => {
|
||||
const payload = JSON.parse(attempt.resultJson) as { changes?: string[]; followUps?: string[]; blockers?: string[] };
|
||||
return [
|
||||
`- Task ${attempt.taskId} attempt ${attempt.attemptNumber}: ${attempt.status}`,
|
||||
` Summary: ${attempt.summary}`,
|
||||
` Changes: ${(payload.changes ?? []).join("; ") || "none"}`,
|
||||
` Follow-ups: ${(payload.followUps ?? []).join("; ") || "none"}`,
|
||||
` Blockers: ${(payload.blockers ?? []).join("; ") || "none"}`,
|
||||
].join("\n");
|
||||
})
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function renderEvents(snapshot: RunSnapshot): string {
|
||||
if (!snapshot.recentEvents.length) {
|
||||
return "No events yet.";
|
||||
}
|
||||
|
||||
return snapshot.recentEvents
|
||||
.map((event) => `- ${event.ts} ${event.kind}: ${event.message}`)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function renderRunState(snapshot: RunSnapshot): string {
|
||||
return [
|
||||
`Run ID: ${snapshot.run.id}`,
|
||||
`Goal: ${snapshot.run.goal}`,
|
||||
`Status: ${snapshot.run.status}`,
|
||||
`Repo: ${snapshot.run.repoPath}`,
|
||||
`Cycles executed: ${snapshot.run.cycleCount}`,
|
||||
`Replans: ${snapshot.run.replanCount}`,
|
||||
`Current task: ${snapshot.run.currentTaskId ?? "none"}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
export function buildStrategyPlanPrompt(snapshot: RunSnapshot): string {
|
||||
const checkerApproval = snapshot.approvals.findLast((approval) => approval.source === "checker");
|
||||
return `
|
||||
You are the strategy agent for an autonomous development system.
|
||||
|
||||
Role rules:
|
||||
- You may inspect the repository directly, but treat that as research only.
|
||||
- Do not edit files and do not implement tasks yourself.
|
||||
- Break work into concrete tasks for the implementation agent.
|
||||
- Lean on the provided summary first. Read more of the repo only when it materially improves strategy.
|
||||
- Only declare the project done when the user goal is actually satisfied, with evidence from completed work.
|
||||
|
||||
Return JSON that matches the provided schema exactly.
|
||||
|
||||
Current run state:
|
||||
${renderRunState(snapshot)}
|
||||
|
||||
Pending tasks:
|
||||
${renderTasks(snapshot.pendingTasks)}
|
||||
|
||||
Completed tasks:
|
||||
${renderTasks(snapshot.completedTasks)}
|
||||
|
||||
Blocked or abandoned tasks:
|
||||
${renderTasks(snapshot.blockedTasks)}
|
||||
|
||||
Recent implementation attempts:
|
||||
${renderRecentAttempts(snapshot)}
|
||||
|
||||
Recent events:
|
||||
${renderEvents(snapshot)}
|
||||
|
||||
Latest approvals:
|
||||
${renderApprovals(snapshot)}
|
||||
|
||||
Latest checker verdict:
|
||||
${checkerApproval ? JSON.stringify(checkerApproval.payload, null, 2) : "No checker verdict yet."}
|
||||
|
||||
Decision policy:
|
||||
- Use \`decision: "continue"\` when there is meaningful work left. Provide the complete next backlog.
|
||||
- Use \`decision: "done"\` only when no substantial work remains.
|
||||
- Use \`decision: "blocked"\` only for external blockers or unresolved contradictions.
|
||||
- Keep the backlog concise. Prefer a few substantive tasks over many trivial tasks.
|
||||
`.trim();
|
||||
}
|
||||
|
||||
export function buildImplementationPrompt(snapshot: RunSnapshot, task: TaskRecord): string {
|
||||
return `
|
||||
You are the implementation agent for an autonomous development system.
|
||||
|
||||
Role rules:
|
||||
- You are responsible for executing exactly one task.
|
||||
- You may modify files in the repository.
|
||||
- Stay within the task's allowed paths unless the task itself truly requires broader changes, and call that out if it happens.
|
||||
- Run relevant verification when possible.
|
||||
- If you cannot complete the task, return \`needs_replan\` or \`blocked\` with concrete blockers.
|
||||
|
||||
Return JSON that matches the provided schema exactly.
|
||||
|
||||
Overall goal:
|
||||
${snapshot.run.goal}
|
||||
|
||||
Task:
|
||||
${renderTask(task)}
|
||||
|
||||
Recent completed tasks:
|
||||
${renderTasks(snapshot.completedTasks.slice(-5))}
|
||||
|
||||
Recent blocked tasks:
|
||||
${renderTasks(snapshot.blockedTasks.slice(-5))}
|
||||
|
||||
Recent approvals:
|
||||
${renderApprovals(snapshot)}
|
||||
|
||||
You must echo the exact task id \`${task.id}\`.
|
||||
`.trim();
|
||||
}
|
||||
|
||||
export function buildStrategySelfCheckPrompt(
|
||||
snapshot: RunSnapshot,
|
||||
lastPlan: StrategyPlanOutput,
|
||||
): string {
|
||||
return `
|
||||
You are the strategy agent performing a self-check before independent review.
|
||||
|
||||
Role rules:
|
||||
- Re-evaluate the original goal against the repository and the execution history.
|
||||
- You may inspect the repo, but you still must not edit files.
|
||||
- Be strict. If meaningful work remains, do not send the run to the checker.
|
||||
|
||||
Return JSON that matches the provided schema exactly.
|
||||
|
||||
Original goal:
|
||||
${snapshot.run.goal}
|
||||
|
||||
Latest strategy decision:
|
||||
${JSON.stringify(lastPlan, null, 2)}
|
||||
|
||||
Completed tasks:
|
||||
${renderTasks(snapshot.completedTasks)}
|
||||
|
||||
Blocked or abandoned tasks:
|
||||
${renderTasks(snapshot.blockedTasks)}
|
||||
|
||||
Recent implementation attempts:
|
||||
${renderRecentAttempts(snapshot)}
|
||||
`.trim();
|
||||
}
|
||||
|
||||
export function buildCheckPrompt(
|
||||
snapshot: RunSnapshot,
|
||||
selfCheck: StrategySelfCheckOutput,
|
||||
): string {
|
||||
return `
|
||||
You are the independent check agent for an autonomous development system.
|
||||
|
||||
Role rules:
|
||||
- You are independent from the strategy agent.
|
||||
- You may inspect the repository directly.
|
||||
- You must not edit files.
|
||||
- Evaluate whether the original user goal is actually complete.
|
||||
- Reject completion when there are still meaningful tasks, risks, or missing evidence.
|
||||
|
||||
Return JSON that matches the provided schema exactly.
|
||||
|
||||
Original goal:
|
||||
${snapshot.run.goal}
|
||||
|
||||
Current run state:
|
||||
${renderRunState(snapshot)}
|
||||
|
||||
Strategy self-check:
|
||||
${JSON.stringify(selfCheck, null, 2)}
|
||||
|
||||
Completed tasks:
|
||||
${renderTasks(snapshot.completedTasks)}
|
||||
|
||||
Blocked or abandoned tasks:
|
||||
${renderTasks(snapshot.blockedTasks)}
|
||||
|
||||
Recent implementation attempts:
|
||||
${renderRecentAttempts(snapshot)}
|
||||
`.trim();
|
||||
}
|
||||
|
||||
export function formatStrategyPlanSummary(plan: StrategyPlanOutput): string {
|
||||
return `${plan.decision}: ${plan.summary}`;
|
||||
}
|
||||
|
||||
export function formatImplementationSummary(result: {
|
||||
status: string;
|
||||
summary: string;
|
||||
touchedFiles: string[];
|
||||
}): string {
|
||||
return `${result.status}: ${result.summary} | touched: ${result.touchedFiles.join(", ") || "none"}`;
|
||||
}
|
||||
|
||||
export function formatSelfCheckSummary(result: StrategySelfCheckOutput): string {
|
||||
return `${result.readyForIndependentCheck ? "ready" : "not ready"}: ${result.summary}`;
|
||||
}
|
||||
|
||||
export function formatCheckVerdictSummary(result: CheckVerdictOutput): string {
|
||||
return `${result.verdict}: ${result.summary}`;
|
||||
}
|
||||
141
src/schema-catalog.ts
Normal file
141
src/schema-catalog.ts
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
export const taskDraftSchema = {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
required: ["title", "objective", "acceptanceCriteria", "verificationSteps", "allowedPaths"],
|
||||
properties: {
|
||||
title: { type: "string", minLength: 1 },
|
||||
objective: { type: "string", minLength: 1 },
|
||||
acceptanceCriteria: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
verificationSteps: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
allowedPaths: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
|
||||
export const strategyPlanSchema = {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
required: ["decision", "summary", "rationale", "goalProgress", "risks", "tasks"],
|
||||
properties: {
|
||||
decision: {
|
||||
type: "string",
|
||||
enum: ["continue", "done", "blocked"],
|
||||
},
|
||||
summary: { type: "string", minLength: 1 },
|
||||
rationale: { type: "string", minLength: 1 },
|
||||
goalProgress: { type: "string", minLength: 1 },
|
||||
risks: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
tasks: {
|
||||
type: "array",
|
||||
items: taskDraftSchema,
|
||||
},
|
||||
blockedReason: { type: "string" },
|
||||
},
|
||||
} as const;
|
||||
|
||||
export const strategySelfCheckSchema = {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
required: ["readyForIndependentCheck", "summary", "rationale", "evidence", "remainingGaps"],
|
||||
properties: {
|
||||
readyForIndependentCheck: { type: "boolean" },
|
||||
summary: { type: "string", minLength: 1 },
|
||||
rationale: { type: "string", minLength: 1 },
|
||||
evidence: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
remainingGaps: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
|
||||
export const implementationResultSchema = {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
required: [
|
||||
"taskId",
|
||||
"status",
|
||||
"summary",
|
||||
"changes",
|
||||
"verification",
|
||||
"followUps",
|
||||
"touchedFiles",
|
||||
"blockers",
|
||||
],
|
||||
properties: {
|
||||
taskId: { type: "string", minLength: 1 },
|
||||
status: {
|
||||
type: "string",
|
||||
enum: ["completed", "needs_replan", "blocked"],
|
||||
},
|
||||
summary: { type: "string", minLength: 1 },
|
||||
changes: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
verification: {
|
||||
type: "array",
|
||||
items: {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
required: ["command", "outcome", "details"],
|
||||
properties: {
|
||||
command: { type: "string", minLength: 1 },
|
||||
outcome: {
|
||||
type: "string",
|
||||
enum: ["passed", "failed", "not_run"],
|
||||
},
|
||||
details: { type: "string", minLength: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
followUps: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
touchedFiles: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
blockers: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
|
||||
export const checkVerdictSchema = {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
required: ["verdict", "summary", "rationale", "evidence", "remainingTasks"],
|
||||
properties: {
|
||||
verdict: {
|
||||
type: "string",
|
||||
enum: ["approved", "rejected"],
|
||||
},
|
||||
summary: { type: "string", minLength: 1 },
|
||||
rationale: { type: "string", minLength: 1 },
|
||||
evidence: {
|
||||
type: "array",
|
||||
items: { type: "string", minLength: 1 },
|
||||
},
|
||||
remainingTasks: {
|
||||
type: "array",
|
||||
items: taskDraftSchema,
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
647
src/store.ts
Normal file
647
src/store.ts
Normal file
|
|
@ -0,0 +1,647 @@
|
|||
import { randomUUID } from "node:crypto";
|
||||
import { DatabaseSync } from "node:sqlite";
|
||||
import path from "node:path";
|
||||
import { ensureDir } from "./fs-utils.js";
|
||||
import type {
|
||||
AgentRole,
|
||||
AgentStateRecord,
|
||||
ApprovalRecord,
|
||||
ApprovalSource,
|
||||
ApprovalVerdict,
|
||||
ArtifactRecord,
|
||||
AttemptStatus,
|
||||
CheckpointRecord,
|
||||
EventRecord,
|
||||
RunRecord,
|
||||
RunSnapshot,
|
||||
RunStatus,
|
||||
TaskAttemptRecord,
|
||||
TaskDraft,
|
||||
TaskRecord,
|
||||
TaskStatus,
|
||||
} from "./types.js";
|
||||
|
||||
function nowIso(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function parseJson<T>(value: string | null): T {
|
||||
if (!value) {
|
||||
return [] as T;
|
||||
}
|
||||
return JSON.parse(value) as T;
|
||||
}
|
||||
|
||||
function toJson(value: unknown): string {
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
|
||||
function mapTaskRow(row: Record<string, unknown>): TaskRecord {
|
||||
return {
|
||||
id: String(row.id),
|
||||
runId: String(row.run_id),
|
||||
title: String(row.title),
|
||||
objective: String(row.objective),
|
||||
acceptanceCriteria: parseJson<string[]>(String(row.acceptance_criteria_json)),
|
||||
verificationSteps: parseJson<string[]>(String(row.verification_steps_json)),
|
||||
allowedPaths: parseJson<string[]>(String(row.allowed_paths_json)),
|
||||
status: row.status as TaskStatus,
|
||||
attemptCount: Number(row.attempt_count),
|
||||
implementationSummary: row.implementation_summary ? String(row.implementation_summary) : null,
|
||||
blockerSignature: row.blocker_signature ? String(row.blocker_signature) : null,
|
||||
createdAt: String(row.created_at),
|
||||
updatedAt: String(row.updated_at),
|
||||
};
|
||||
}
|
||||
|
||||
function mapAttemptRow(row: Record<string, unknown>): TaskAttemptRecord {
|
||||
return {
|
||||
id: String(row.id),
|
||||
runId: String(row.run_id),
|
||||
taskId: String(row.task_id),
|
||||
attemptNumber: Number(row.attempt_number),
|
||||
status: row.status as AttemptStatus,
|
||||
summary: String(row.summary),
|
||||
blockerSignature: row.blocker_signature ? String(row.blocker_signature) : null,
|
||||
resultJson: String(row.result_json),
|
||||
createdAt: String(row.created_at),
|
||||
};
|
||||
}
|
||||
|
||||
function mapRunRow(row: Record<string, unknown>): RunRecord {
|
||||
return {
|
||||
id: String(row.id),
|
||||
goal: String(row.goal),
|
||||
repoPath: String(row.repo_path),
|
||||
status: row.status as RunStatus,
|
||||
summary: row.summary ? String(row.summary) : null,
|
||||
cycleCount: Number(row.cycle_count),
|
||||
replanCount: Number(row.replan_count),
|
||||
currentTaskId: row.current_task_id ? String(row.current_task_id) : null,
|
||||
createdAt: String(row.created_at),
|
||||
updatedAt: String(row.updated_at),
|
||||
};
|
||||
}
|
||||
|
||||
function mapAgentRow(row: Record<string, unknown>): AgentStateRecord {
|
||||
return {
|
||||
runId: String(row.run_id),
|
||||
role: row.role as AgentRole,
|
||||
sessionId: row.session_id ? String(row.session_id) : null,
|
||||
turns: Number(row.turns),
|
||||
rotationCount: Number(row.rotation_count),
|
||||
lastPromptPath: row.last_prompt_path ? String(row.last_prompt_path) : null,
|
||||
lastResponsePath: row.last_response_path ? String(row.last_response_path) : null,
|
||||
updatedAt: String(row.updated_at),
|
||||
};
|
||||
}
|
||||
|
||||
export class RunStore {
|
||||
private readonly db: DatabaseSync;
|
||||
|
||||
constructor(dbPath: string) {
|
||||
ensureDir(path.dirname(dbPath));
|
||||
this.db = new DatabaseSync(dbPath);
|
||||
this.db.exec("PRAGMA journal_mode = WAL;");
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
goal TEXT NOT NULL,
|
||||
repo_path TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
summary TEXT,
|
||||
cycle_count INTEGER NOT NULL DEFAULT 0,
|
||||
replan_count INTEGER NOT NULL DEFAULT 0,
|
||||
current_task_id TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS agents (
|
||||
run_id TEXT NOT NULL,
|
||||
role TEXT NOT NULL,
|
||||
session_id TEXT,
|
||||
turns INTEGER NOT NULL DEFAULT 0,
|
||||
rotation_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_prompt_path TEXT,
|
||||
last_response_path TEXT,
|
||||
updated_at TEXT NOT NULL,
|
||||
PRIMARY KEY (run_id, role)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tasks (
|
||||
id TEXT PRIMARY KEY,
|
||||
run_id TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
objective TEXT NOT NULL,
|
||||
acceptance_criteria_json TEXT NOT NULL,
|
||||
verification_steps_json TEXT NOT NULL,
|
||||
allowed_paths_json TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
attempt_count INTEGER NOT NULL DEFAULT 0,
|
||||
implementation_summary TEXT,
|
||||
blocker_signature TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS task_attempts (
|
||||
id TEXT PRIMARY KEY,
|
||||
run_id TEXT NOT NULL,
|
||||
task_id TEXT NOT NULL,
|
||||
attempt_number INTEGER NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
summary TEXT NOT NULL,
|
||||
blocker_signature TEXT,
|
||||
result_json TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
run_id TEXT NOT NULL,
|
||||
ts TEXT NOT NULL,
|
||||
source TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
message TEXT NOT NULL,
|
||||
payload_json TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS artifacts (
|
||||
id TEXT PRIMARY KEY,
|
||||
run_id TEXT NOT NULL,
|
||||
role TEXT,
|
||||
kind TEXT NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
metadata_json TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS checkpoints (
|
||||
id TEXT PRIMARY KEY,
|
||||
run_id TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
payload_json TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS approvals (
|
||||
id TEXT PRIMARY KEY,
|
||||
run_id TEXT NOT NULL,
|
||||
source TEXT NOT NULL,
|
||||
verdict TEXT NOT NULL,
|
||||
rationale TEXT NOT NULL,
|
||||
payload_json TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
`);
|
||||
}
|
||||
|
||||
createRun(goal: string, repoPath: string): RunRecord {
|
||||
const timestamp = nowIso();
|
||||
const id = randomUUID();
|
||||
this.db
|
||||
.prepare(`
|
||||
INSERT INTO runs (id, goal, repo_path, status, summary, cycle_count, replan_count, current_task_id, created_at, updated_at)
|
||||
VALUES (?, ?, ?, 'planning', NULL, 0, 0, NULL, ?, ?)
|
||||
`)
|
||||
.run(id, goal, repoPath, timestamp, timestamp);
|
||||
|
||||
this.addEvent(id, "system", "run_created", "Run created.", { goal, repoPath });
|
||||
return this.getRun(id);
|
||||
}
|
||||
|
||||
getRun(runId: string): RunRecord {
|
||||
const row = this.db.prepare("SELECT * FROM runs WHERE id = ?").get(runId) as Record<string, unknown> | undefined;
|
||||
if (!row) {
|
||||
throw new Error(`Run not found: ${runId}`);
|
||||
}
|
||||
return mapRunRow(row);
|
||||
}
|
||||
|
||||
listRuns(): RunRecord[] {
|
||||
const rows = this.db.prepare("SELECT * FROM runs ORDER BY created_at DESC").all() as Record<string, unknown>[];
|
||||
return rows.map(mapRunRow);
|
||||
}
|
||||
|
||||
updateRun(runId: string, updates: Partial<Pick<RunRecord, "status" | "summary" | "currentTaskId">>): RunRecord {
|
||||
const current = this.getRun(runId);
|
||||
const next: RunRecord = {
|
||||
...current,
|
||||
status: updates.status ?? current.status,
|
||||
summary: updates.summary ?? current.summary,
|
||||
currentTaskId: updates.currentTaskId === undefined ? current.currentTaskId : updates.currentTaskId,
|
||||
updatedAt: nowIso(),
|
||||
};
|
||||
this.db
|
||||
.prepare(`
|
||||
UPDATE runs
|
||||
SET status = ?, summary = ?, current_task_id = ?, updated_at = ?
|
||||
WHERE id = ?
|
||||
`)
|
||||
.run(next.status, next.summary, next.currentTaskId, next.updatedAt, runId);
|
||||
return this.getRun(runId);
|
||||
}
|
||||
|
||||
incrementCycle(runId: string): void {
|
||||
this.db.prepare("UPDATE runs SET cycle_count = cycle_count + 1, updated_at = ? WHERE id = ?").run(nowIso(), runId);
|
||||
}
|
||||
|
||||
incrementReplanCount(runId: string): void {
|
||||
this.db.prepare("UPDATE runs SET replan_count = replan_count + 1, updated_at = ? WHERE id = ?").run(nowIso(), runId);
|
||||
}
|
||||
|
||||
getAgentState(runId: string, role: AgentRole): AgentStateRecord | null {
|
||||
const row = this.db.prepare("SELECT * FROM agents WHERE run_id = ? AND role = ?").get(runId, role) as
|
||||
| Record<string, unknown>
|
||||
| undefined;
|
||||
return row ? mapAgentRow(row) : null;
|
||||
}
|
||||
|
||||
saveAgentState(record: AgentStateRecord): void {
|
||||
this.db
|
||||
.prepare(`
|
||||
INSERT INTO agents (run_id, role, session_id, turns, rotation_count, last_prompt_path, last_response_path, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(run_id, role) DO UPDATE SET
|
||||
session_id = excluded.session_id,
|
||||
turns = excluded.turns,
|
||||
rotation_count = excluded.rotation_count,
|
||||
last_prompt_path = excluded.last_prompt_path,
|
||||
last_response_path = excluded.last_response_path,
|
||||
updated_at = excluded.updated_at
|
||||
`)
|
||||
.run(
|
||||
record.runId,
|
||||
record.role,
|
||||
record.sessionId,
|
||||
record.turns,
|
||||
record.rotationCount,
|
||||
record.lastPromptPath,
|
||||
record.lastResponsePath,
|
||||
record.updatedAt,
|
||||
);
|
||||
}
|
||||
|
||||
requeueInProgressTasks(runId: string): number {
|
||||
const timestamp = nowIso();
|
||||
const result = this.db
|
||||
.prepare(`
|
||||
UPDATE tasks
|
||||
SET status = 'pending', updated_at = ?
|
||||
WHERE run_id = ? AND status = 'in_progress'
|
||||
`)
|
||||
.run(timestamp, runId);
|
||||
const changes = Number(result.changes ?? 0);
|
||||
if (changes > 0) {
|
||||
this.addEvent(runId, "system", "requeue_in_progress", "Requeued in-progress tasks on resume.", { changes });
|
||||
}
|
||||
return changes;
|
||||
}
|
||||
|
||||
replacePendingTasks(runId: string, tasks: TaskDraft[]): TaskRecord[] {
|
||||
const now = nowIso();
|
||||
this.db
|
||||
.prepare(`
|
||||
UPDATE tasks
|
||||
SET status = 'abandoned', updated_at = ?
|
||||
WHERE run_id = ? AND status IN ('pending', 'in_progress')
|
||||
`)
|
||||
.run(now, runId);
|
||||
|
||||
const insertStmt = this.db.prepare(`
|
||||
INSERT INTO tasks (
|
||||
id, run_id, title, objective, acceptance_criteria_json, verification_steps_json,
|
||||
allowed_paths_json, status, attempt_count, implementation_summary, blocker_signature, created_at, updated_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, 'pending', 0, NULL, NULL, ?, ?)
|
||||
`);
|
||||
|
||||
const records: TaskRecord[] = [];
|
||||
for (const task of tasks) {
|
||||
const id = randomUUID();
|
||||
insertStmt.run(
|
||||
id,
|
||||
runId,
|
||||
task.title,
|
||||
task.objective,
|
||||
toJson(task.acceptanceCriteria),
|
||||
toJson(task.verificationSteps),
|
||||
toJson(task.allowedPaths),
|
||||
now,
|
||||
now,
|
||||
);
|
||||
records.push(this.getTask(id));
|
||||
}
|
||||
|
||||
return records;
|
||||
}
|
||||
|
||||
listTasks(runId: string, statuses?: TaskStatus[]): TaskRecord[] {
|
||||
if (!statuses?.length) {
|
||||
const rows = this.db
|
||||
.prepare("SELECT * FROM tasks WHERE run_id = ? ORDER BY created_at ASC")
|
||||
.all(runId) as Record<string, unknown>[];
|
||||
return rows.map(mapTaskRow);
|
||||
}
|
||||
|
||||
const placeholders = statuses.map(() => "?").join(", ");
|
||||
const rows = this.db
|
||||
.prepare(`SELECT * FROM tasks WHERE run_id = ? AND status IN (${placeholders}) ORDER BY created_at ASC`)
|
||||
.all(runId, ...statuses) as Record<string, unknown>[];
|
||||
return rows.map(mapTaskRow);
|
||||
}
|
||||
|
||||
getTask(taskId: string): TaskRecord {
|
||||
const row = this.db.prepare("SELECT * FROM tasks WHERE id = ?").get(taskId) as Record<string, unknown> | undefined;
|
||||
if (!row) {
|
||||
throw new Error(`Task not found: ${taskId}`);
|
||||
}
|
||||
return mapTaskRow(row);
|
||||
}
|
||||
|
||||
claimNextPendingTask(runId: string): TaskRecord | null {
|
||||
const row = this.db
|
||||
.prepare(`
|
||||
SELECT * FROM tasks
|
||||
WHERE run_id = ? AND status = 'pending'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT 1
|
||||
`)
|
||||
.get(runId) as Record<string, unknown> | undefined;
|
||||
|
||||
if (!row) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const taskId = String(row.id);
|
||||
this.db
|
||||
.prepare(`
|
||||
UPDATE tasks
|
||||
SET status = 'in_progress', attempt_count = attempt_count + 1, updated_at = ?
|
||||
WHERE id = ?
|
||||
`)
|
||||
.run(nowIso(), taskId);
|
||||
return this.getTask(taskId);
|
||||
}
|
||||
|
||||
completeTask(taskId: string, summary: string): TaskRecord {
|
||||
this.db
|
||||
.prepare(`
|
||||
UPDATE tasks
|
||||
SET status = 'completed', implementation_summary = ?, blocker_signature = NULL, updated_at = ?
|
||||
WHERE id = ?
|
||||
`)
|
||||
.run(summary, nowIso(), taskId);
|
||||
return this.getTask(taskId);
|
||||
}
|
||||
|
||||
abandonTask(taskId: string, summary: string | null, blockerSignature: string | null): TaskRecord {
|
||||
this.db
|
||||
.prepare(`
|
||||
UPDATE tasks
|
||||
SET status = 'abandoned', implementation_summary = ?, blocker_signature = ?, updated_at = ?
|
||||
WHERE id = ?
|
||||
`)
|
||||
.run(summary, blockerSignature, nowIso(), taskId);
|
||||
return this.getTask(taskId);
|
||||
}
|
||||
|
||||
blockTask(taskId: string, summary: string | null, blockerSignature: string | null): TaskRecord {
|
||||
this.db
|
||||
.prepare(`
|
||||
UPDATE tasks
|
||||
SET status = 'blocked', implementation_summary = ?, blocker_signature = ?, updated_at = ?
|
||||
WHERE id = ?
|
||||
`)
|
||||
.run(summary, blockerSignature, nowIso(), taskId);
|
||||
return this.getTask(taskId);
|
||||
}
|
||||
|
||||
latestTaskAttempt(taskId: string): TaskAttemptRecord | null {
|
||||
const row = this.db
|
||||
.prepare("SELECT * FROM task_attempts WHERE task_id = ? ORDER BY created_at DESC LIMIT 1")
|
||||
.get(taskId) as Record<string, unknown> | undefined;
|
||||
return row ? mapAttemptRow(row) : null;
|
||||
}
|
||||
|
||||
addTaskAttempt(
|
||||
runId: string,
|
||||
taskId: string,
|
||||
attemptNumber: number,
|
||||
status: AttemptStatus,
|
||||
summary: string,
|
||||
result: unknown,
|
||||
blockerSignature: string | null,
|
||||
): TaskAttemptRecord {
|
||||
const id = randomUUID();
|
||||
const createdAt = nowIso();
|
||||
this.db
|
||||
.prepare(`
|
||||
INSERT INTO task_attempts (id, run_id, task_id, attempt_number, status, summary, blocker_signature, result_json, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`)
|
||||
.run(id, runId, taskId, attemptNumber, status, summary, blockerSignature, toJson(result), createdAt);
|
||||
return this.latestTaskAttempt(taskId) as TaskAttemptRecord;
|
||||
}
|
||||
|
||||
addEvent(runId: string, source: string, kind: string, message: string, payload: unknown): void {
|
||||
this.db
|
||||
.prepare(`
|
||||
INSERT INTO events (run_id, ts, source, kind, message, payload_json)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`)
|
||||
.run(runId, nowIso(), source, kind, message, toJson(payload));
|
||||
}
|
||||
|
||||
listEvents(runId: string, limit = 50): EventRecord[] {
|
||||
const rows = this.db
|
||||
.prepare(`
|
||||
SELECT * FROM events
|
||||
WHERE run_id = ?
|
||||
ORDER BY id DESC
|
||||
LIMIT ?
|
||||
`)
|
||||
.all(runId, limit) as Record<string, unknown>[];
|
||||
return rows
|
||||
.map((row) => ({
|
||||
id: Number(row.id),
|
||||
runId: String(row.run_id),
|
||||
ts: String(row.ts),
|
||||
source: String(row.source),
|
||||
kind: String(row.kind),
|
||||
message: String(row.message),
|
||||
payload: JSON.parse(String(row.payload_json)),
|
||||
}))
|
||||
.reverse();
|
||||
}
|
||||
|
||||
addArtifact(runId: string, role: AgentRole | null, kind: string, filePath: string, metadata: unknown): ArtifactRecord {
|
||||
const id = randomUUID();
|
||||
const createdAt = nowIso();
|
||||
this.db
|
||||
.prepare(`
|
||||
INSERT INTO artifacts (id, run_id, role, kind, path, created_at, metadata_json)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
`)
|
||||
.run(id, runId, role, kind, filePath, createdAt, toJson(metadata));
|
||||
|
||||
return {
|
||||
id,
|
||||
runId,
|
||||
role,
|
||||
kind,
|
||||
path: filePath,
|
||||
createdAt,
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
||||
listArtifacts(runId: string, role?: AgentRole): ArtifactRecord[] {
|
||||
const rows = role
|
||||
? (this.db
|
||||
.prepare(`
|
||||
SELECT * FROM artifacts
|
||||
WHERE run_id = ? AND role = ?
|
||||
ORDER BY created_at ASC
|
||||
`)
|
||||
.all(runId, role) as Record<string, unknown>[])
|
||||
: (this.db
|
||||
.prepare(`
|
||||
SELECT * FROM artifacts
|
||||
WHERE run_id = ?
|
||||
ORDER BY created_at ASC
|
||||
`)
|
||||
.all(runId) as Record<string, unknown>[]);
|
||||
|
||||
return rows.map((row) => ({
|
||||
id: String(row.id),
|
||||
runId: String(row.run_id),
|
||||
role: row.role ? (String(row.role) as AgentRole) : null,
|
||||
kind: String(row.kind),
|
||||
path: String(row.path),
|
||||
createdAt: String(row.created_at),
|
||||
metadata: JSON.parse(String(row.metadata_json)),
|
||||
}));
|
||||
}
|
||||
|
||||
addApproval(
|
||||
runId: string,
|
||||
source: ApprovalSource,
|
||||
verdict: ApprovalVerdict,
|
||||
rationale: string,
|
||||
payload: unknown,
|
||||
): ApprovalRecord {
|
||||
const id = randomUUID();
|
||||
const createdAt = nowIso();
|
||||
this.db
|
||||
.prepare(`
|
||||
INSERT INTO approvals (id, run_id, source, verdict, rationale, payload_json, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
`)
|
||||
.run(id, runId, source, verdict, rationale, toJson(payload), createdAt);
|
||||
|
||||
return {
|
||||
id,
|
||||
runId,
|
||||
source,
|
||||
verdict,
|
||||
rationale,
|
||||
payload,
|
||||
createdAt,
|
||||
};
|
||||
}
|
||||
|
||||
listApprovals(runId: string): ApprovalRecord[] {
|
||||
const rows = this.db
|
||||
.prepare("SELECT * FROM approvals WHERE run_id = ? ORDER BY created_at ASC")
|
||||
.all(runId) as Record<string, unknown>[];
|
||||
return rows.map((row) => ({
|
||||
id: String(row.id),
|
||||
runId: String(row.run_id),
|
||||
source: row.source as ApprovalSource,
|
||||
verdict: row.verdict as ApprovalVerdict,
|
||||
rationale: String(row.rationale),
|
||||
payload: JSON.parse(String(row.payload_json)),
|
||||
createdAt: String(row.created_at),
|
||||
}));
|
||||
}
|
||||
|
||||
latestApproval(runId: string, source: ApprovalSource): ApprovalRecord | null {
|
||||
const row = this.db
|
||||
.prepare(`
|
||||
SELECT * FROM approvals
|
||||
WHERE run_id = ? AND source = ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
`)
|
||||
.get(runId, source) as Record<string, unknown> | undefined;
|
||||
if (!row) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(row.id),
|
||||
runId: String(row.run_id),
|
||||
source: row.source as ApprovalSource,
|
||||
verdict: row.verdict as ApprovalVerdict,
|
||||
rationale: String(row.rationale),
|
||||
payload: JSON.parse(String(row.payload_json)),
|
||||
createdAt: String(row.created_at),
|
||||
};
|
||||
}
|
||||
|
||||
addCheckpoint(runId: string, status: RunStatus, payload: unknown): CheckpointRecord {
|
||||
const id = randomUUID();
|
||||
const createdAt = nowIso();
|
||||
this.db
|
||||
.prepare(`
|
||||
INSERT INTO checkpoints (id, run_id, status, payload_json, created_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
`)
|
||||
.run(id, runId, status, toJson(payload), createdAt);
|
||||
|
||||
return {
|
||||
id,
|
||||
runId,
|
||||
status,
|
||||
payload,
|
||||
createdAt,
|
||||
};
|
||||
}
|
||||
|
||||
buildSnapshot(runId: string): RunSnapshot {
|
||||
const run = this.getRun(runId);
|
||||
const agents = (["strategy", "implementation", "checker"] as const).reduce<RunSnapshot["agents"]>((acc, role) => {
|
||||
const agent = this.getAgentState(runId, role);
|
||||
if (agent) {
|
||||
acc[role] = agent;
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return {
|
||||
run,
|
||||
agents,
|
||||
pendingTasks: this.listTasks(runId, ["pending"]),
|
||||
inProgressTasks: this.listTasks(runId, ["in_progress"]),
|
||||
completedTasks: this.listTasks(runId, ["completed"]),
|
||||
blockedTasks: this.listTasks(runId, ["blocked", "abandoned"]),
|
||||
recentAttempts: this.listRecentAttempts(runId),
|
||||
recentEvents: this.listEvents(runId),
|
||||
approvals: this.listApprovals(runId),
|
||||
artifacts: this.listArtifacts(runId),
|
||||
};
|
||||
}
|
||||
|
||||
listRecentAttempts(runId: string, limit = 12): TaskAttemptRecord[] {
|
||||
const rows = this.db
|
||||
.prepare(`
|
||||
SELECT * FROM task_attempts
|
||||
WHERE run_id = ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ?
|
||||
`)
|
||||
.all(runId, limit) as Record<string, unknown>[];
|
||||
return rows.map(mapAttemptRow).reverse();
|
||||
}
|
||||
}
|
||||
230
src/types.ts
Normal file
230
src/types.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
export const agentRoles = ["strategy", "implementation", "checker"] as const;
|
||||
|
||||
export type AgentRole = (typeof agentRoles)[number];
|
||||
export type SandboxMode = "read-only" | "workspace-write" | "danger-full-access";
|
||||
export type RunStatus =
|
||||
| "planning"
|
||||
| "implementing"
|
||||
| "self_check"
|
||||
| "independent_check"
|
||||
| "blocked"
|
||||
| "done";
|
||||
export type TaskStatus = "pending" | "in_progress" | "completed" | "blocked" | "abandoned";
|
||||
export type AttemptStatus = "completed" | "needs_replan" | "blocked";
|
||||
export type ApprovalSource = "strategy_self_check" | "checker";
|
||||
export type ApprovalVerdict = "approved" | "rejected";
|
||||
|
||||
export interface RoleConfig {
|
||||
model?: string;
|
||||
sandbox: SandboxMode;
|
||||
search: boolean;
|
||||
skipGitRepoCheck: boolean;
|
||||
extraArgs: string[];
|
||||
}
|
||||
|
||||
export interface ResolvedConfig {
|
||||
repoPath: string;
|
||||
stateDir: string;
|
||||
databasePath: string;
|
||||
runsDir: string;
|
||||
codexCommand: string;
|
||||
maxTaskAttempts: number;
|
||||
maxCyclesPerInvocation: number;
|
||||
maxReplans: number;
|
||||
maxTasks: number;
|
||||
sessionRefreshTurns: number;
|
||||
roles: Record<AgentRole, RoleConfig>;
|
||||
}
|
||||
|
||||
export interface RunRecord {
|
||||
id: string;
|
||||
goal: string;
|
||||
repoPath: string;
|
||||
status: RunStatus;
|
||||
summary: string | null;
|
||||
cycleCount: number;
|
||||
replanCount: number;
|
||||
currentTaskId: string | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
}
|
||||
|
||||
export interface AgentStateRecord {
|
||||
runId: string;
|
||||
role: AgentRole;
|
||||
sessionId: string | null;
|
||||
turns: number;
|
||||
rotationCount: number;
|
||||
lastPromptPath: string | null;
|
||||
lastResponsePath: string | null;
|
||||
updatedAt: string;
|
||||
}
|
||||
|
||||
export interface TaskDraft {
|
||||
title: string;
|
||||
objective: string;
|
||||
acceptanceCriteria: string[];
|
||||
verificationSteps: string[];
|
||||
allowedPaths: string[];
|
||||
}
|
||||
|
||||
export interface TaskRecord extends TaskDraft {
|
||||
id: string;
|
||||
runId: string;
|
||||
status: TaskStatus;
|
||||
attemptCount: number;
|
||||
implementationSummary: string | null;
|
||||
blockerSignature: string | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
}
|
||||
|
||||
export interface TaskAttemptRecord {
|
||||
id: string;
|
||||
runId: string;
|
||||
taskId: string;
|
||||
attemptNumber: number;
|
||||
status: AttemptStatus;
|
||||
summary: string;
|
||||
blockerSignature: string | null;
|
||||
resultJson: string;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface EventRecord {
|
||||
id: number;
|
||||
runId: string;
|
||||
ts: string;
|
||||
source: string;
|
||||
kind: string;
|
||||
message: string;
|
||||
payload: unknown;
|
||||
}
|
||||
|
||||
export interface ArtifactRecord {
|
||||
id: string;
|
||||
runId: string;
|
||||
role: AgentRole | null;
|
||||
kind: string;
|
||||
path: string;
|
||||
createdAt: string;
|
||||
metadata: unknown;
|
||||
}
|
||||
|
||||
export interface ApprovalRecord {
|
||||
id: string;
|
||||
runId: string;
|
||||
source: ApprovalSource;
|
||||
verdict: ApprovalVerdict;
|
||||
rationale: string;
|
||||
payload: unknown;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface CheckpointRecord {
|
||||
id: string;
|
||||
runId: string;
|
||||
status: RunStatus;
|
||||
payload: unknown;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface VerificationResult {
|
||||
command: string;
|
||||
outcome: "passed" | "failed" | "not_run";
|
||||
details: string;
|
||||
}
|
||||
|
||||
export interface StrategyPlanOutput {
|
||||
decision: "continue" | "done" | "blocked";
|
||||
summary: string;
|
||||
rationale: string;
|
||||
goalProgress: string;
|
||||
risks: string[];
|
||||
tasks: TaskDraft[];
|
||||
blockedReason?: string;
|
||||
}
|
||||
|
||||
export interface StrategySelfCheckOutput {
|
||||
readyForIndependentCheck: boolean;
|
||||
summary: string;
|
||||
rationale: string;
|
||||
evidence: string[];
|
||||
remainingGaps: string[];
|
||||
}
|
||||
|
||||
export interface ImplementationResultOutput {
|
||||
taskId: string;
|
||||
status: AttemptStatus;
|
||||
summary: string;
|
||||
changes: string[];
|
||||
verification: VerificationResult[];
|
||||
followUps: string[];
|
||||
touchedFiles: string[];
|
||||
blockers: string[];
|
||||
}
|
||||
|
||||
export interface CheckVerdictOutput {
|
||||
verdict: "approved" | "rejected";
|
||||
summary: string;
|
||||
rationale: string;
|
||||
evidence: string[];
|
||||
remainingTasks: TaskDraft[];
|
||||
}
|
||||
|
||||
export interface RunSnapshot {
|
||||
run: RunRecord;
|
||||
agents: Partial<Record<AgentRole, AgentStateRecord>>;
|
||||
pendingTasks: TaskRecord[];
|
||||
inProgressTasks: TaskRecord[];
|
||||
completedTasks: TaskRecord[];
|
||||
blockedTasks: TaskRecord[];
|
||||
recentAttempts: TaskAttemptRecord[];
|
||||
recentEvents: EventRecord[];
|
||||
approvals: ApprovalRecord[];
|
||||
artifacts: ArtifactRecord[];
|
||||
}
|
||||
|
||||
export interface InvocationArtifacts {
|
||||
promptPath: string;
|
||||
schemaPath: string;
|
||||
rawEventsPath: string;
|
||||
stderrPath: string;
|
||||
lastMessagePath: string;
|
||||
responsePath: string;
|
||||
}
|
||||
|
||||
export interface AgentInvocation<T> {
|
||||
runId: string;
|
||||
role: AgentRole;
|
||||
sessionId: string | null;
|
||||
prompt: string;
|
||||
schemaName: string;
|
||||
schema: Record<string, unknown>;
|
||||
cwd: string;
|
||||
roleConfig: RoleConfig;
|
||||
artifacts: InvocationArtifacts;
|
||||
maxValidationRetries?: number;
|
||||
}
|
||||
|
||||
export interface AgentInvocationResult<T> {
|
||||
sessionId: string | null;
|
||||
output: T;
|
||||
rawMessage: string;
|
||||
rawEvents: string[];
|
||||
stderr: string;
|
||||
artifacts: InvocationArtifacts;
|
||||
}
|
||||
|
||||
export interface RawAgentRunner {
|
||||
invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>>;
|
||||
}
|
||||
|
||||
export interface RunArtifactsIndex {
|
||||
promptPath: string;
|
||||
responsePath: string;
|
||||
schemaPath: string;
|
||||
rawEventsPath: string;
|
||||
stderrPath: string;
|
||||
lastMessagePath: string;
|
||||
}
|
||||
138
test/orchestrator.test.ts
Normal file
138
test/orchestrator.test.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
import { mkdtempSync, readFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { ArtifactManager } from "../src/artifacts.js";
|
||||
import { loadConfig } from "../src/config.js";
|
||||
import { AgentROrchestrator } from "../src/orchestrator.js";
|
||||
import { RunStore } from "../src/store.js";
|
||||
import type { AgentInvocation, AgentInvocationResult, RawAgentRunner } from "../src/types.js";
|
||||
|
||||
class ScriptedRunner implements RawAgentRunner {
|
||||
public readonly prompts: string[] = [];
|
||||
|
||||
constructor(
|
||||
private readonly outputs: string[],
|
||||
) {}
|
||||
|
||||
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
|
||||
const next = this.outputs.shift();
|
||||
if (!next) {
|
||||
throw new Error("No scripted output remaining.");
|
||||
}
|
||||
|
||||
this.prompts.push(request.prompt);
|
||||
readFileSync(request.artifacts.schemaPath, "utf8");
|
||||
const parsed = JSON.parse(next) as Record<string, unknown>;
|
||||
if (typeof parsed.taskId === "string" && !parsed.taskId) {
|
||||
const match = request.prompt.match(/exact task id `([^`]+)`/);
|
||||
if (match) {
|
||||
parsed.taskId = match[1];
|
||||
}
|
||||
}
|
||||
return {
|
||||
sessionId: request.sessionId ?? `session-${this.prompts.length}`,
|
||||
output: parsed as T,
|
||||
rawMessage: JSON.stringify(parsed),
|
||||
rawEvents: ['{"type":"turn.completed"}'],
|
||||
stderr: "",
|
||||
artifacts: request.artifacts,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
describe("AgentROrchestrator", () => {
|
||||
test("runs through plan, implementation, self-check, and independent check", async () => {
|
||||
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-orchestrator-"));
|
||||
const config = loadConfig(repoPath);
|
||||
const store = new RunStore(config.databasePath);
|
||||
const artifacts = new ArtifactManager(config.runsDir);
|
||||
const runner = new ScriptedRunner([
|
||||
JSON.stringify({
|
||||
decision: "continue",
|
||||
summary: "Implement the CLI skeleton.",
|
||||
rationale: "The project is empty and needs a first vertical slice.",
|
||||
goalProgress: "No implementation exists yet.",
|
||||
risks: [],
|
||||
tasks: [
|
||||
{
|
||||
title: "Create CLI entrypoint",
|
||||
objective: "Build the initial command surface.",
|
||||
acceptanceCriteria: ["A run command exists."],
|
||||
verificationSteps: ["Run the help command."],
|
||||
allowedPaths: ["src", "package.json"],
|
||||
},
|
||||
],
|
||||
}),
|
||||
JSON.stringify({
|
||||
taskId: "",
|
||||
status: "completed",
|
||||
summary: "Added the CLI skeleton.",
|
||||
changes: ["Created an entrypoint."],
|
||||
verification: [{ command: "agent-r --help", outcome: "passed", details: "Help output rendered." }],
|
||||
followUps: [],
|
||||
touchedFiles: ["src/index.ts", "package.json"],
|
||||
blockers: [],
|
||||
}),
|
||||
JSON.stringify({
|
||||
decision: "done",
|
||||
summary: "The requested slice is complete.",
|
||||
rationale: "The only planned task is complete and verified.",
|
||||
goalProgress: "The vertical slice exists.",
|
||||
risks: [],
|
||||
tasks: [],
|
||||
}),
|
||||
JSON.stringify({
|
||||
readyForIndependentCheck: true,
|
||||
summary: "The run is ready for independent review.",
|
||||
rationale: "Implementation and verification are present.",
|
||||
evidence: ["CLI entrypoint exists."],
|
||||
remainingGaps: [],
|
||||
}),
|
||||
JSON.stringify({
|
||||
verdict: "approved",
|
||||
summary: "The goal is satisfied.",
|
||||
rationale: "The requested slice exists and is verified.",
|
||||
evidence: ["CLI entrypoint present."],
|
||||
remainingTasks: [],
|
||||
}),
|
||||
]);
|
||||
|
||||
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
||||
const run = orchestrator.createRun("Create a CLI skeleton");
|
||||
|
||||
const firstStatus = await orchestrator.runUntilStable(run.id, 10);
|
||||
const snapshot = store.buildSnapshot(run.id);
|
||||
|
||||
expect(firstStatus.status).toBe("done");
|
||||
expect(snapshot.completedTasks).toHaveLength(1);
|
||||
expect(snapshot.approvals).toHaveLength(2);
|
||||
expect(snapshot.pendingTasks).toHaveLength(0);
|
||||
expect(runner.prompts.at(1)).toContain("You must echo the exact task id");
|
||||
});
|
||||
|
||||
test("strategy prompt explicitly permits read-only repository inspection", async () => {
|
||||
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-strategy-"));
|
||||
const config = loadConfig(repoPath);
|
||||
const store = new RunStore(config.databasePath);
|
||||
const artifacts = new ArtifactManager(config.runsDir);
|
||||
const runner = new ScriptedRunner([
|
||||
JSON.stringify({
|
||||
decision: "blocked",
|
||||
summary: "Cannot continue.",
|
||||
rationale: "Test stop.",
|
||||
goalProgress: "None.",
|
||||
risks: [],
|
||||
tasks: [],
|
||||
blockedReason: "Stop immediately.",
|
||||
}),
|
||||
]);
|
||||
|
||||
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
||||
const run = orchestrator.createRun("Inspect repo");
|
||||
await orchestrator.runUntilStable(run.id, 1);
|
||||
|
||||
expect(runner.prompts[0]).toContain("You may inspect the repository directly");
|
||||
expect(runner.prompts[0]).toContain("Do not edit files");
|
||||
});
|
||||
});
|
||||
40
test/store.test.ts
Normal file
40
test/store.test.ts
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import { mkdtempSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { RunStore } from "../src/store.js";
|
||||
|
||||
describe("RunStore", () => {
|
||||
test("creates runs, tasks, and snapshots", () => {
|
||||
const tempDir = mkdtempSync(path.join(tmpdir(), "agent-r-store-"));
|
||||
const store = new RunStore(path.join(tempDir, "state.sqlite"));
|
||||
const run = store.createRun("Build something", tempDir);
|
||||
|
||||
store.replacePendingTasks(run.id, [
|
||||
{
|
||||
title: "Task 1",
|
||||
objective: "Do work",
|
||||
acceptanceCriteria: ["It works"],
|
||||
verificationSteps: ["Run tests"],
|
||||
allowedPaths: ["src"],
|
||||
},
|
||||
{
|
||||
title: "Task 2",
|
||||
objective: "Do more work",
|
||||
acceptanceCriteria: ["It still works"],
|
||||
verificationSteps: ["Run tests again"],
|
||||
allowedPaths: ["tests"],
|
||||
},
|
||||
]);
|
||||
|
||||
const claimed = store.claimNextPendingTask(run.id);
|
||||
expect(claimed?.status).toBe("in_progress");
|
||||
store.completeTask(claimed!.id, "Finished");
|
||||
store.addEvent(run.id, "test", "custom", "Recorded a test event.", { ok: true });
|
||||
|
||||
const snapshot = store.buildSnapshot(run.id);
|
||||
expect(snapshot.pendingTasks).toHaveLength(1);
|
||||
expect(snapshot.completedTasks).toHaveLength(1);
|
||||
expect(snapshot.recentEvents.at(-1)?.kind).toBe("custom");
|
||||
});
|
||||
});
|
||||
21
tsconfig.json
Normal file
21
tsconfig.json
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2023",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"lib": ["ES2023"],
|
||||
"types": ["node"],
|
||||
"strict": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"exactOptionalPropertyTypes": true,
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"rootDir": "src",
|
||||
"outDir": "dist",
|
||||
"declaration": true,
|
||||
"sourceMap": true,
|
||||
"skipLibCheck": true
|
||||
},
|
||||
"include": ["src/**/*.ts"]
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue