Initial agent-r orchestrator
This commit is contained in:
commit
a909a99310
17 changed files with 4530 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
node_modules
|
||||||
|
dist
|
||||||
|
.agent-r
|
||||||
|
coverage
|
||||||
|
*.log
|
||||||
51
README.md
Normal file
51
README.md
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
# agent-r
|
||||||
|
|
||||||
|
`agent-r` is a local TypeScript CLI that orchestrates three Codex roles:
|
||||||
|
|
||||||
|
- `strategy`: read-only planning, task breakdown, self-check
|
||||||
|
- `implementation`: workspace-write execution for one task at a time
|
||||||
|
- `checker`: read-only independent completion review
|
||||||
|
|
||||||
|
The CLI persists state in SQLite plus run artifacts under `.agent-r/`, so a long-running effort can be resumed across invocations.
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
|
||||||
|
agent-r run "Build a plugin system" --repo /path/to/repo
|
||||||
|
agent-r resume <run-id> --repo /path/to/repo
|
||||||
|
agent-r status <run-id> --repo /path/to/repo
|
||||||
|
agent-r inspect <run-id> --repo /path/to/repo
|
||||||
|
agent-r logs <run-id> --repo /path/to/repo --agent strategy
|
||||||
|
```
|
||||||
|
|
||||||
|
## Config
|
||||||
|
|
||||||
|
Place `agent-r.config.toml` in the target repo if you want to override defaults.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[state]
|
||||||
|
dir = ".agent-r"
|
||||||
|
max_task_attempts = 3
|
||||||
|
max_cycles_per_run = 40
|
||||||
|
max_replans = 12
|
||||||
|
max_tasks = 200
|
||||||
|
session_refresh_turns = 20
|
||||||
|
|
||||||
|
[codex]
|
||||||
|
command = "codex"
|
||||||
|
|
||||||
|
[roles.strategy]
|
||||||
|
sandbox = "read-only"
|
||||||
|
search = true
|
||||||
|
|
||||||
|
[roles.implementation]
|
||||||
|
sandbox = "workspace-write"
|
||||||
|
search = false
|
||||||
|
|
||||||
|
[roles.checker]
|
||||||
|
sandbox = "read-only"
|
||||||
|
search = false
|
||||||
|
```
|
||||||
2038
package-lock.json
generated
Normal file
2038
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
31
package.json
Normal file
31
package.json
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"name": "agent-r",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"description": "Autonomous multi-agent Codex orchestrator with strategy, implementation, and checking roles.",
|
||||||
|
"bin": {
|
||||||
|
"agent-r": "dist/index.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=24.0.0"
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc -p tsconfig.json && chmod +x dist/index.js",
|
||||||
|
"dev": "NODE_OPTIONS=--disable-warning=ExperimentalWarning tsx src/index.ts",
|
||||||
|
"lint": "tsc -p tsconfig.json --noEmit",
|
||||||
|
"test": "NODE_OPTIONS=--disable-warning=ExperimentalWarning vitest run"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@openai/codex-sdk": "^0.118.0",
|
||||||
|
"ajv": "^8.18.0",
|
||||||
|
"commander": "^14.0.3",
|
||||||
|
"smol-toml": "^1.6.1"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^24.7.2",
|
||||||
|
"tsx": "^4.21.0",
|
||||||
|
"typescript": "^5.9.3",
|
||||||
|
"vitest": "^4.1.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
69
src/artifacts.ts
Normal file
69
src/artifacts.ts
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
import path from "node:path";
|
||||||
|
import { ensureDir, safeSlug, timestampId, writeJsonFile, writeTextFile } from "./fs-utils.js";
|
||||||
|
import type { AgentRole, InvocationArtifacts, RunArtifactsIndex } from "./types.js";
|
||||||
|
|
||||||
|
export class ArtifactManager {
|
||||||
|
constructor(private readonly runsDir: string) {
|
||||||
|
ensureDir(this.runsDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
getRunDir(runId: string): string {
|
||||||
|
const dir = path.join(this.runsDir, runId);
|
||||||
|
ensureDir(dir);
|
||||||
|
return dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
getRoleDir(runId: string, role: AgentRole): string {
|
||||||
|
const dir = path.join(this.getRunDir(runId), role);
|
||||||
|
ensureDir(dir);
|
||||||
|
return dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
createInvocationArtifacts(runId: string, role: AgentRole, schemaName: string): InvocationArtifacts {
|
||||||
|
const roleDir = this.getRoleDir(runId, role);
|
||||||
|
const baseName = `${timestampId()}-${safeSlug(schemaName)}`;
|
||||||
|
return {
|
||||||
|
promptPath: path.join(roleDir, `${baseName}.prompt.md`),
|
||||||
|
schemaPath: path.join(roleDir, `${baseName}.schema.json`),
|
||||||
|
rawEventsPath: path.join(roleDir, `${baseName}.events.jsonl`),
|
||||||
|
stderrPath: path.join(roleDir, `${baseName}.stderr.log`),
|
||||||
|
lastMessagePath: path.join(roleDir, `${baseName}.last.txt`),
|
||||||
|
responsePath: path.join(roleDir, `${baseName}.response.json`),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
writePrompt(filePath: string, prompt: string): void {
|
||||||
|
writeTextFile(filePath, prompt);
|
||||||
|
}
|
||||||
|
|
||||||
|
writeSchema(filePath: string, schema: Record<string, unknown>): void {
|
||||||
|
writeJsonFile(filePath, schema);
|
||||||
|
}
|
||||||
|
|
||||||
|
writeRawEvents(filePath: string, rawEvents: string[]): void {
|
||||||
|
writeTextFile(filePath, rawEvents.join("\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
writeStderr(filePath: string, stderr: string): void {
|
||||||
|
writeTextFile(filePath, stderr);
|
||||||
|
}
|
||||||
|
|
||||||
|
writeLastMessage(filePath: string, message: string): void {
|
||||||
|
writeTextFile(filePath, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
writeResponse(filePath: string, response: unknown): void {
|
||||||
|
writeJsonFile(filePath, response);
|
||||||
|
}
|
||||||
|
|
||||||
|
indexFromArtifacts(artifacts: InvocationArtifacts): RunArtifactsIndex {
|
||||||
|
return {
|
||||||
|
promptPath: artifacts.promptPath,
|
||||||
|
responsePath: artifacts.responsePath,
|
||||||
|
schemaPath: artifacts.schemaPath,
|
||||||
|
rawEventsPath: artifacts.rawEventsPath,
|
||||||
|
stderrPath: artifacts.stderrPath,
|
||||||
|
lastMessagePath: artifacts.lastMessagePath,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
139
src/codex-runner.ts
Normal file
139
src/codex-runner.ts
Normal file
|
|
@ -0,0 +1,139 @@
|
||||||
|
import { Ajv } from "ajv";
|
||||||
|
import { Codex, type ThreadEvent, type ThreadOptions } from "@openai/codex-sdk";
|
||||||
|
import type { AgentInvocation, AgentInvocationResult, RawAgentRunner } from "./types.js";
|
||||||
|
|
||||||
|
function extractValidationError(message: string, errorText: string): string {
|
||||||
|
return [
|
||||||
|
"The previous response did not parse or validate.",
|
||||||
|
`Problem: ${errorText}`,
|
||||||
|
"Respond again with JSON only, matching the schema exactly.",
|
||||||
|
"Previous response:",
|
||||||
|
message,
|
||||||
|
].join("\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
export class CodexSdkRunner implements RawAgentRunner {
|
||||||
|
private readonly ajv = new Ajv({ allErrors: true, strict: false });
|
||||||
|
private readonly codex: Codex;
|
||||||
|
|
||||||
|
constructor(codexCommand: string) {
|
||||||
|
this.codex = new Codex({
|
||||||
|
codexPathOverride: codexCommand,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
|
||||||
|
let prompt = request.prompt;
|
||||||
|
let sessionId = request.sessionId;
|
||||||
|
const maxAttempts = request.maxValidationRetries ?? 2;
|
||||||
|
let lastFailure = "";
|
||||||
|
|
||||||
|
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||||
|
const raw = await this.invokeOnce({
|
||||||
|
...request,
|
||||||
|
prompt,
|
||||||
|
sessionId,
|
||||||
|
});
|
||||||
|
|
||||||
|
sessionId = raw.sessionId;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(raw.rawMessage) as unknown;
|
||||||
|
const validate = this.ajv.compile(request.schema);
|
||||||
|
if (!validate(parsed)) {
|
||||||
|
const errorText = this.ajv.errorsText(validate.errors);
|
||||||
|
lastFailure = errorText;
|
||||||
|
if (attempt === maxAttempts) {
|
||||||
|
throw new Error(`Schema validation failed: ${errorText}`);
|
||||||
|
}
|
||||||
|
prompt = extractValidationError(raw.rawMessage, errorText);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...raw,
|
||||||
|
output: parsed as T,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
const errorText = error instanceof Error ? error.message : String(error);
|
||||||
|
lastFailure = errorText;
|
||||||
|
if (attempt === maxAttempts) {
|
||||||
|
throw new Error(`Structured output failed after ${attempt} attempt(s): ${lastFailure}`);
|
||||||
|
}
|
||||||
|
prompt = extractValidationError(raw.rawMessage, errorText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Structured output failed: ${lastFailure}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async invokeOnce<T>(request: AgentInvocation<T>): Promise<Omit<AgentInvocationResult<T>, "output">> {
|
||||||
|
const threadOptions: ThreadOptions = {
|
||||||
|
sandboxMode: request.roleConfig.sandbox,
|
||||||
|
workingDirectory: request.cwd,
|
||||||
|
skipGitRepoCheck: request.roleConfig.skipGitRepoCheck,
|
||||||
|
approvalPolicy: "never",
|
||||||
|
networkAccessEnabled: request.roleConfig.search,
|
||||||
|
webSearchEnabled: request.roleConfig.search,
|
||||||
|
webSearchMode: request.roleConfig.search ? "live" : "disabled",
|
||||||
|
};
|
||||||
|
|
||||||
|
if (request.roleConfig.model) {
|
||||||
|
threadOptions.model = request.roleConfig.model;
|
||||||
|
}
|
||||||
|
|
||||||
|
const thread = request.sessionId
|
||||||
|
? this.codex.resumeThread(request.sessionId, threadOptions)
|
||||||
|
: this.codex.startThread(threadOptions);
|
||||||
|
|
||||||
|
const streamed = await thread.runStreamed(request.prompt, {
|
||||||
|
outputSchema: request.schema,
|
||||||
|
});
|
||||||
|
|
||||||
|
const rawEvents: string[] = [];
|
||||||
|
let sessionId = request.sessionId;
|
||||||
|
let rawMessage = "";
|
||||||
|
|
||||||
|
for await (const event of streamed.events) {
|
||||||
|
rawEvents.push(JSON.stringify(event));
|
||||||
|
sessionId = this.extractSessionId(sessionId, event);
|
||||||
|
rawMessage = this.extractLatestMessage(rawMessage, event);
|
||||||
|
this.throwIfFailed(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!rawMessage.trim()) {
|
||||||
|
throw new Error("Codex SDK completed without a final agent message.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
sessionId: sessionId ?? thread.id,
|
||||||
|
rawMessage,
|
||||||
|
rawEvents,
|
||||||
|
stderr: "",
|
||||||
|
artifacts: request.artifacts,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractSessionId(current: string | null, event: ThreadEvent): string | null {
|
||||||
|
if (event.type === "thread.started") {
|
||||||
|
return event.thread_id;
|
||||||
|
}
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractLatestMessage(current: string, event: ThreadEvent): string {
|
||||||
|
if (event.type === "item.completed" && event.item.type === "agent_message") {
|
||||||
|
return event.item.text;
|
||||||
|
}
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
private throwIfFailed(event: ThreadEvent): void {
|
||||||
|
if (event.type === "error") {
|
||||||
|
throw new Error(event.message);
|
||||||
|
}
|
||||||
|
if (event.type === "turn.failed") {
|
||||||
|
throw new Error(event.error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
138
src/config.ts
Normal file
138
src/config.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
import { existsSync, readFileSync } from "node:fs";
|
||||||
|
import path from "node:path";
|
||||||
|
import { parse } from "smol-toml";
|
||||||
|
import type { AgentRole, ResolvedConfig, RoleConfig } from "./types.js";
|
||||||
|
|
||||||
|
interface ConfigRoleOverride {
|
||||||
|
model?: string;
|
||||||
|
sandbox?: RoleConfig["sandbox"];
|
||||||
|
search?: boolean;
|
||||||
|
skipGitRepoCheck?: boolean;
|
||||||
|
skip_git_repo_check?: boolean;
|
||||||
|
extraArgs?: string[];
|
||||||
|
extra_args?: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ConfigFileShape {
|
||||||
|
state?: {
|
||||||
|
dir?: string;
|
||||||
|
max_task_attempts?: number;
|
||||||
|
max_cycles_per_run?: number;
|
||||||
|
max_replans?: number;
|
||||||
|
max_tasks?: number;
|
||||||
|
session_refresh_turns?: number;
|
||||||
|
};
|
||||||
|
codex?: {
|
||||||
|
command?: string;
|
||||||
|
};
|
||||||
|
roles?: Partial<Record<AgentRole, ConfigRoleOverride>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
function defaultRoleConfig(role: AgentRole): RoleConfig {
|
||||||
|
if (role === "strategy") {
|
||||||
|
return {
|
||||||
|
sandbox: "read-only",
|
||||||
|
search: true,
|
||||||
|
skipGitRepoCheck: false,
|
||||||
|
extraArgs: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (role === "checker") {
|
||||||
|
return {
|
||||||
|
sandbox: "read-only",
|
||||||
|
search: false,
|
||||||
|
skipGitRepoCheck: false,
|
||||||
|
extraArgs: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
sandbox: "workspace-write",
|
||||||
|
search: false,
|
||||||
|
skipGitRepoCheck: false,
|
||||||
|
extraArgs: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function mergeRoleConfig(role: AgentRole, override?: Partial<RoleConfig>): RoleConfig {
|
||||||
|
const base = defaultRoleConfig(role);
|
||||||
|
const merged: RoleConfig = {
|
||||||
|
sandbox: override?.sandbox ?? base.sandbox,
|
||||||
|
search: override?.search ?? base.search,
|
||||||
|
skipGitRepoCheck: override?.skipGitRepoCheck ?? base.skipGitRepoCheck,
|
||||||
|
extraArgs: override?.extraArgs ?? base.extraArgs,
|
||||||
|
};
|
||||||
|
|
||||||
|
const model = override?.model ?? base.model;
|
||||||
|
if (model) {
|
||||||
|
merged.model = model;
|
||||||
|
}
|
||||||
|
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function loadConfig(repoPath: string, configPath?: string): ResolvedConfig {
|
||||||
|
const absoluteRepoPath = path.resolve(repoPath);
|
||||||
|
const effectiveConfigPath = configPath ? path.resolve(configPath) : path.join(absoluteRepoPath, "agent-r.config.toml");
|
||||||
|
let parsedConfig: ConfigFileShape = {};
|
||||||
|
|
||||||
|
if (existsSync(effectiveConfigPath)) {
|
||||||
|
parsedConfig = parse(readFileSync(effectiveConfigPath, "utf8")) as ConfigFileShape;
|
||||||
|
}
|
||||||
|
|
||||||
|
const stateDir = path.resolve(
|
||||||
|
absoluteRepoPath,
|
||||||
|
parsedConfig.state?.dir ?? ".agent-r",
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
repoPath: absoluteRepoPath,
|
||||||
|
stateDir,
|
||||||
|
databasePath: path.join(stateDir, "state.sqlite"),
|
||||||
|
runsDir: path.join(stateDir, "runs"),
|
||||||
|
codexCommand: parsedConfig.codex?.command ?? "codex",
|
||||||
|
maxTaskAttempts: parsedConfig.state?.max_task_attempts ?? 3,
|
||||||
|
maxCyclesPerInvocation: parsedConfig.state?.max_cycles_per_run ?? 40,
|
||||||
|
maxReplans: parsedConfig.state?.max_replans ?? 12,
|
||||||
|
maxTasks: parsedConfig.state?.max_tasks ?? 200,
|
||||||
|
sessionRefreshTurns: parsedConfig.state?.session_refresh_turns ?? 20,
|
||||||
|
roles: {
|
||||||
|
strategy: mergeRoleConfig("strategy", normalizeRoleOverride(parsedConfig.roles?.strategy)),
|
||||||
|
implementation: mergeRoleConfig("implementation", normalizeRoleOverride(parsedConfig.roles?.implementation)),
|
||||||
|
checker: mergeRoleConfig("checker", normalizeRoleOverride(parsedConfig.roles?.checker)),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeRoleOverride(override?: ConfigRoleOverride): Partial<RoleConfig> | undefined {
|
||||||
|
if (!override) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized: Partial<RoleConfig> = {};
|
||||||
|
|
||||||
|
if (override.model) {
|
||||||
|
normalized.model = override.model;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (override.sandbox) {
|
||||||
|
normalized.sandbox = override.sandbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (override.search !== undefined) {
|
||||||
|
normalized.search = override.search;
|
||||||
|
}
|
||||||
|
|
||||||
|
const skipGitRepoCheck = override.skipGitRepoCheck ?? override.skip_git_repo_check;
|
||||||
|
if (skipGitRepoCheck !== undefined) {
|
||||||
|
normalized.skipGitRepoCheck = skipGitRepoCheck;
|
||||||
|
}
|
||||||
|
|
||||||
|
const extraArgs = override.extraArgs ?? override.extra_args;
|
||||||
|
if (extraArgs !== undefined) {
|
||||||
|
normalized.extraArgs = extraArgs;
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
42
src/fs-utils.ts
Normal file
42
src/fs-utils.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
import { mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
export function ensureDir(dirPath: string): void {
|
||||||
|
mkdirSync(dirPath, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
export function writeTextFile(filePath: string, text: string): void {
|
||||||
|
ensureDir(path.dirname(filePath));
|
||||||
|
writeFileSync(filePath, text, "utf8");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function writeJsonFile(filePath: string, value: unknown): void {
|
||||||
|
writeTextFile(filePath, `${JSON.stringify(value, null, 2)}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function readTextFile(filePath: string): string {
|
||||||
|
return readFileSync(filePath, "utf8");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function safeSlug(value: string): string {
|
||||||
|
return value
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, "-")
|
||||||
|
.replace(/^-+|-+$/g, "")
|
||||||
|
.slice(0, 40) || "artifact";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function timestampId(date = new Date()): string {
|
||||||
|
return date.toISOString().replace(/[:.]/g, "-");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function listFilesRecursive(dirPath: string): string[] {
|
||||||
|
const entries = readdirSync(dirPath, { withFileTypes: true });
|
||||||
|
return entries.flatMap((entry) => {
|
||||||
|
const fullPath = path.join(dirPath, entry.name);
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
return listFilesRecursive(fullPath);
|
||||||
|
}
|
||||||
|
return [fullPath];
|
||||||
|
});
|
||||||
|
}
|
||||||
127
src/index.ts
Normal file
127
src/index.ts
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
#!/usr/bin/env -S node --disable-warning=ExperimentalWarning
|
||||||
|
import path from "node:path";
|
||||||
|
import { Command } from "commander";
|
||||||
|
import { ArtifactManager } from "./artifacts.js";
|
||||||
|
import { CodexSdkRunner } from "./codex-runner.js";
|
||||||
|
import { loadConfig } from "./config.js";
|
||||||
|
import { listFilesRecursive, readTextFile } from "./fs-utils.js";
|
||||||
|
import { AgentROrchestrator } from "./orchestrator.js";
|
||||||
|
import { RunStore } from "./store.js";
|
||||||
|
import type { AgentRole } from "./types.js";
|
||||||
|
|
||||||
|
function resolveRepoPath(repo?: string): string {
|
||||||
|
return path.resolve(repo ?? process.cwd());
|
||||||
|
}
|
||||||
|
|
||||||
|
function bootstrap(repo?: string, configPath?: string) {
|
||||||
|
const repoPath = resolveRepoPath(repo);
|
||||||
|
const config = loadConfig(repoPath, configPath);
|
||||||
|
const store = new RunStore(config.databasePath);
|
||||||
|
const artifacts = new ArtifactManager(config.runsDir);
|
||||||
|
const runner = new CodexSdkRunner(config.codexCommand);
|
||||||
|
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
||||||
|
return { repoPath, config, store, artifacts, orchestrator };
|
||||||
|
}
|
||||||
|
|
||||||
|
function printRunStatus(label: string, run: {
|
||||||
|
id: string;
|
||||||
|
status: string;
|
||||||
|
summary: string | null;
|
||||||
|
goal: string;
|
||||||
|
cycleCount: number;
|
||||||
|
replanCount: number;
|
||||||
|
currentTaskId: string | null;
|
||||||
|
}): void {
|
||||||
|
console.log(`${label}: ${run.id}`);
|
||||||
|
console.log(`status: ${run.status}`);
|
||||||
|
console.log(`goal: ${run.goal}`);
|
||||||
|
console.log(`summary: ${run.summary ?? "none"}`);
|
||||||
|
console.log(`cycles: ${run.cycleCount}`);
|
||||||
|
console.log(`replans: ${run.replanCount}`);
|
||||||
|
console.log(`current task: ${run.currentTaskId ?? "none"}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const program = new Command();
|
||||||
|
|
||||||
|
program.name("agent-r").description("Autonomous Codex multi-agent orchestrator.");
|
||||||
|
|
||||||
|
program
|
||||||
|
.command("run")
|
||||||
|
.argument("<goal>", "high-level development goal")
|
||||||
|
.option("--repo <path>", "target repository path")
|
||||||
|
.option("--config <path>", "path to agent-r config file")
|
||||||
|
.option("--max-cycles <count>", "override max cycles for this invocation", Number)
|
||||||
|
.action(async (goal: string, options: { repo?: string; config?: string; maxCycles?: number }) => {
|
||||||
|
const { orchestrator, store } = bootstrap(options.repo, options.config);
|
||||||
|
const run = orchestrator.createRun(goal);
|
||||||
|
const finalRun = await orchestrator.runUntilStable(run.id, options.maxCycles);
|
||||||
|
printRunStatus("run", finalRun);
|
||||||
|
console.log(`run id: ${run.id}`);
|
||||||
|
console.log(`pending tasks: ${store.buildSnapshot(run.id).pendingTasks.length}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
program
|
||||||
|
.command("resume")
|
||||||
|
.argument("<runId>", "run id")
|
||||||
|
.option("--repo <path>", "target repository path")
|
||||||
|
.option("--config <path>", "path to agent-r config file")
|
||||||
|
.option("--max-cycles <count>", "override max cycles for this invocation", Number)
|
||||||
|
.action(async (runId: string, options: { repo?: string; config?: string; maxCycles?: number }) => {
|
||||||
|
const { orchestrator } = bootstrap(options.repo, options.config);
|
||||||
|
const finalRun = await orchestrator.runUntilStable(runId, options.maxCycles);
|
||||||
|
printRunStatus("run", finalRun);
|
||||||
|
});
|
||||||
|
|
||||||
|
program
|
||||||
|
.command("status")
|
||||||
|
.argument("<runId>", "run id")
|
||||||
|
.option("--repo <path>", "target repository path")
|
||||||
|
.option("--config <path>", "path to agent-r config file")
|
||||||
|
.action((runId: string, options: { repo?: string; config?: string }) => {
|
||||||
|
const { store } = bootstrap(options.repo, options.config);
|
||||||
|
const snapshot = store.buildSnapshot(runId);
|
||||||
|
printRunStatus("run", snapshot.run);
|
||||||
|
console.log(`pending: ${snapshot.pendingTasks.length}`);
|
||||||
|
console.log(`completed: ${snapshot.completedTasks.length}`);
|
||||||
|
console.log(`blocked/abandoned: ${snapshot.blockedTasks.length}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
program
|
||||||
|
.command("inspect")
|
||||||
|
.argument("<runId>", "run id")
|
||||||
|
.option("--repo <path>", "target repository path")
|
||||||
|
.option("--config <path>", "path to agent-r config file")
|
||||||
|
.action((runId: string, options: { repo?: string; config?: string }) => {
|
||||||
|
const { store } = bootstrap(options.repo, options.config);
|
||||||
|
console.log(JSON.stringify(store.buildSnapshot(runId), null, 2));
|
||||||
|
});
|
||||||
|
|
||||||
|
program
|
||||||
|
.command("logs")
|
||||||
|
.argument("<runId>", "run id")
|
||||||
|
.option("--repo <path>", "target repository path")
|
||||||
|
.option("--config <path>", "path to agent-r config file")
|
||||||
|
.option("--agent <role>", "limit to a single role")
|
||||||
|
.action((runId: string, options: { repo?: string; config?: string; agent?: AgentRole }) => {
|
||||||
|
const { artifacts, store } = bootstrap(options.repo, options.config);
|
||||||
|
const snapshot = store.buildSnapshot(runId);
|
||||||
|
const artifactFiles = options.agent
|
||||||
|
? store.listArtifacts(runId, options.agent).map((artifact) => artifact.path)
|
||||||
|
: listFilesRecursive(artifacts.getRunDir(runId));
|
||||||
|
|
||||||
|
console.log(`run: ${runId}`);
|
||||||
|
console.log(`status: ${snapshot.run.status}`);
|
||||||
|
console.log("");
|
||||||
|
|
||||||
|
for (const filePath of artifactFiles) {
|
||||||
|
console.log(`# ${filePath}`);
|
||||||
|
console.log(readTextFile(filePath));
|
||||||
|
console.log("");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
program.parseAsync(process.argv).catch((error: unknown) => {
|
||||||
|
const message = error instanceof Error ? error.stack ?? error.message : String(error);
|
||||||
|
console.error(message);
|
||||||
|
process.exitCode = 1;
|
||||||
|
});
|
||||||
423
src/orchestrator.ts
Normal file
423
src/orchestrator.ts
Normal file
|
|
@ -0,0 +1,423 @@
|
||||||
|
import type {
|
||||||
|
AgentRole,
|
||||||
|
AgentStateRecord,
|
||||||
|
CheckVerdictOutput,
|
||||||
|
ImplementationResultOutput,
|
||||||
|
RawAgentRunner,
|
||||||
|
ResolvedConfig,
|
||||||
|
RunRecord,
|
||||||
|
RunSnapshot,
|
||||||
|
StrategyPlanOutput,
|
||||||
|
StrategySelfCheckOutput,
|
||||||
|
TaskDraft,
|
||||||
|
TaskRecord,
|
||||||
|
} from "./types.js";
|
||||||
|
import { ArtifactManager } from "./artifacts.js";
|
||||||
|
import {
|
||||||
|
buildCheckPrompt,
|
||||||
|
buildImplementationPrompt,
|
||||||
|
buildStrategyPlanPrompt,
|
||||||
|
buildStrategySelfCheckPrompt,
|
||||||
|
formatCheckVerdictSummary,
|
||||||
|
formatImplementationSummary,
|
||||||
|
formatSelfCheckSummary,
|
||||||
|
formatStrategyPlanSummary,
|
||||||
|
} from "./prompts.js";
|
||||||
|
import { checkVerdictSchema, implementationResultSchema, strategyPlanSchema, strategySelfCheckSchema } from "./schema-catalog.js";
|
||||||
|
import { RunStore } from "./store.js";
|
||||||
|
|
||||||
|
function nowIso(): string {
|
||||||
|
return new Date().toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function blockerSignature(blockers: string[]): string | null {
|
||||||
|
const compact = blockers.map((value) => value.trim().toLowerCase()).filter(Boolean).sort().join("|");
|
||||||
|
return compact || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniqueTaskDrafts(tasks: TaskDraft[]): TaskDraft[] {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
const result: TaskDraft[] = [];
|
||||||
|
for (const task of tasks) {
|
||||||
|
const key = JSON.stringify(task);
|
||||||
|
if (seen.has(key)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
seen.add(key);
|
||||||
|
result.push(task);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class AgentROrchestrator {
|
||||||
|
constructor(
|
||||||
|
private readonly config: ResolvedConfig,
|
||||||
|
private readonly store: RunStore,
|
||||||
|
private readonly artifacts: ArtifactManager,
|
||||||
|
private readonly runner: RawAgentRunner,
|
||||||
|
) {}
|
||||||
|
|
||||||
|
createRun(goal: string): RunRecord {
|
||||||
|
return this.store.createRun(goal, this.config.repoPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
async runUntilStable(runId: string, maxCycles = this.config.maxCyclesPerInvocation): Promise<RunRecord> {
|
||||||
|
this.store.requeueInProgressTasks(runId);
|
||||||
|
|
||||||
|
for (let cycle = 0; cycle < maxCycles; cycle += 1) {
|
||||||
|
const snapshot = this.store.buildSnapshot(runId);
|
||||||
|
if (snapshot.run.status === "done" || snapshot.run.status === "blocked") {
|
||||||
|
return snapshot.run;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.store.incrementCycle(runId);
|
||||||
|
|
||||||
|
switch (snapshot.run.status) {
|
||||||
|
case "planning":
|
||||||
|
if (snapshot.pendingTasks.length > 0) {
|
||||||
|
await this.dispatchNextTask(runId);
|
||||||
|
} else {
|
||||||
|
await this.runStrategyPlan(runId);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "implementing":
|
||||||
|
await this.dispatchNextTask(runId);
|
||||||
|
break;
|
||||||
|
case "self_check":
|
||||||
|
await this.runStrategySelfCheck(runId);
|
||||||
|
break;
|
||||||
|
case "independent_check":
|
||||||
|
await this.runIndependentCheck(runId);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Error(`Unsupported run state: ${snapshot.run.status}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.store.addEvent(runId, "system", "cycle_budget_exhausted", "Reached max cycles for this invocation.", { maxCycles });
|
||||||
|
return this.store.getRun(runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async runStrategyPlan(runId: string): Promise<void> {
|
||||||
|
const snapshot = this.store.buildSnapshot(runId);
|
||||||
|
const response = await this.invokeRole<StrategyPlanOutput>({
|
||||||
|
runId,
|
||||||
|
role: "strategy",
|
||||||
|
schemaName: "strategy-plan",
|
||||||
|
prompt: buildStrategyPlanPrompt(snapshot),
|
||||||
|
schema: strategyPlanSchema,
|
||||||
|
});
|
||||||
|
|
||||||
|
const plan = response.output;
|
||||||
|
this.store.addEvent(runId, "strategy", "strategy_plan", formatStrategyPlanSummary(plan), plan);
|
||||||
|
this.store.updateRun(runId, { summary: plan.summary, currentTaskId: null });
|
||||||
|
this.store.incrementReplanCount(runId);
|
||||||
|
|
||||||
|
if (this.store.getRun(runId).replanCount > this.config.maxReplans) {
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "blocked",
|
||||||
|
summary: `Exceeded replan limit (${this.config.maxReplans}).`,
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.store.addEvent(runId, "system", "blocked", "Run blocked because the replan limit was exceeded.", {});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (plan.decision === "blocked") {
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "blocked",
|
||||||
|
summary: plan.blockedReason ?? plan.summary,
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.store.addEvent(runId, "strategy", "blocked", "Strategy declared the run blocked.", plan);
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (plan.decision === "done") {
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "self_check",
|
||||||
|
summary: plan.summary,
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tasks = uniqueTaskDrafts(plan.tasks).slice(0, this.config.maxTasks);
|
||||||
|
if (!tasks.length) {
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "blocked",
|
||||||
|
summary: "Strategy returned continue without any tasks.",
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.store.addEvent(runId, "system", "blocked", "Run blocked because no tasks were returned.", plan);
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.store.replacePendingTasks(runId, tasks);
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "planning",
|
||||||
|
summary: plan.summary,
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async dispatchNextTask(runId: string): Promise<void> {
|
||||||
|
const claimedTask = this.store.claimNextPendingTask(runId);
|
||||||
|
if (!claimedTask) {
|
||||||
|
this.store.updateRun(runId, { status: "planning", currentTaskId: null });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "implementing",
|
||||||
|
currentTaskId: claimedTask.id,
|
||||||
|
summary: `Implementing ${claimedTask.title}`,
|
||||||
|
});
|
||||||
|
this.store.addEvent(runId, "system", "task_dispatched", `Dispatched task ${claimedTask.id}.`, { taskId: claimedTask.id });
|
||||||
|
|
||||||
|
const snapshot = this.store.buildSnapshot(runId);
|
||||||
|
const response = await this.invokeRole<ImplementationResultOutput>({
|
||||||
|
runId,
|
||||||
|
role: "implementation",
|
||||||
|
schemaName: "implementation-result",
|
||||||
|
prompt: buildImplementationPrompt(snapshot, claimedTask),
|
||||||
|
schema: implementationResultSchema,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = response.output;
|
||||||
|
if (result.taskId !== claimedTask.id) {
|
||||||
|
throw new Error(`Implementation agent returned mismatched task id: expected ${claimedTask.id}, got ${result.taskId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const signature = blockerSignature(result.blockers);
|
||||||
|
const previousAttempt = this.store.latestTaskAttempt(claimedTask.id);
|
||||||
|
const latestTask = this.store.getTask(claimedTask.id);
|
||||||
|
|
||||||
|
this.store.addTaskAttempt(
|
||||||
|
runId,
|
||||||
|
claimedTask.id,
|
||||||
|
latestTask.attemptCount,
|
||||||
|
result.status,
|
||||||
|
result.summary,
|
||||||
|
result,
|
||||||
|
signature,
|
||||||
|
);
|
||||||
|
this.store.addEvent(runId, "implementation", "implementation_result", formatImplementationSummary(result), result);
|
||||||
|
|
||||||
|
if (result.status === "completed") {
|
||||||
|
this.store.completeTask(claimedTask.id, result.summary);
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "planning",
|
||||||
|
currentTaskId: null,
|
||||||
|
summary: result.summary,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const repeatedBlocker =
|
||||||
|
previousAttempt &&
|
||||||
|
previousAttempt.blockerSignature &&
|
||||||
|
previousAttempt.blockerSignature === signature;
|
||||||
|
|
||||||
|
if (result.status === "blocked") {
|
||||||
|
this.store.blockTask(claimedTask.id, result.summary, signature);
|
||||||
|
if (repeatedBlocker) {
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "blocked",
|
||||||
|
currentTaskId: null,
|
||||||
|
summary: `Repeated blocker for task ${claimedTask.title}.`,
|
||||||
|
});
|
||||||
|
this.store.addEvent(runId, "system", "blocked", "Run blocked because the same blocker repeated.", {
|
||||||
|
taskId: claimedTask.id,
|
||||||
|
blockerSignature: signature,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "planning",
|
||||||
|
currentTaskId: null,
|
||||||
|
summary: result.summary,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (latestTask.attemptCount >= this.config.maxTaskAttempts) {
|
||||||
|
this.store.blockTask(claimedTask.id, result.summary, signature);
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "blocked",
|
||||||
|
currentTaskId: null,
|
||||||
|
summary: `Task ${claimedTask.title} exceeded the retry limit.`,
|
||||||
|
});
|
||||||
|
this.store.addEvent(runId, "system", "blocked", "Run blocked because task retry limit was exceeded.", {
|
||||||
|
taskId: claimedTask.id,
|
||||||
|
attempts: latestTask.attemptCount,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.store.abandonTask(claimedTask.id, result.summary, signature);
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "planning",
|
||||||
|
currentTaskId: null,
|
||||||
|
summary: result.summary,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async runStrategySelfCheck(runId: string): Promise<void> {
|
||||||
|
const snapshot = this.store.buildSnapshot(runId);
|
||||||
|
const lastPlan = snapshot.recentEvents.findLast((event) => event.kind === "strategy_plan")?.payload as
|
||||||
|
| StrategyPlanOutput
|
||||||
|
| undefined;
|
||||||
|
if (!lastPlan) {
|
||||||
|
this.store.updateRun(runId, { status: "planning" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await this.invokeRole<StrategySelfCheckOutput>({
|
||||||
|
runId,
|
||||||
|
role: "strategy",
|
||||||
|
schemaName: "strategy-self-check",
|
||||||
|
prompt: buildStrategySelfCheckPrompt(snapshot, lastPlan),
|
||||||
|
schema: strategySelfCheckSchema,
|
||||||
|
});
|
||||||
|
|
||||||
|
const selfCheck = response.output;
|
||||||
|
this.store.addApproval(
|
||||||
|
runId,
|
||||||
|
"strategy_self_check",
|
||||||
|
selfCheck.readyForIndependentCheck ? "approved" : "rejected",
|
||||||
|
selfCheck.rationale,
|
||||||
|
selfCheck,
|
||||||
|
);
|
||||||
|
this.store.addEvent(runId, "strategy", "strategy_self_check", formatSelfCheckSummary(selfCheck), selfCheck);
|
||||||
|
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: selfCheck.readyForIndependentCheck ? "independent_check" : "planning",
|
||||||
|
summary: selfCheck.summary,
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async runIndependentCheck(runId: string): Promise<void> {
|
||||||
|
const snapshot = this.store.buildSnapshot(runId);
|
||||||
|
const latestSelfCheck = this.store.latestApproval(runId, "strategy_self_check")?.payload as
|
||||||
|
| StrategySelfCheckOutput
|
||||||
|
| undefined;
|
||||||
|
|
||||||
|
if (!latestSelfCheck) {
|
||||||
|
this.store.updateRun(runId, { status: "planning" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await this.invokeRole<CheckVerdictOutput>({
|
||||||
|
runId,
|
||||||
|
role: "checker",
|
||||||
|
schemaName: "independent-check",
|
||||||
|
prompt: buildCheckPrompt(snapshot, latestSelfCheck),
|
||||||
|
schema: checkVerdictSchema,
|
||||||
|
});
|
||||||
|
|
||||||
|
const verdict = response.output;
|
||||||
|
this.store.addApproval(
|
||||||
|
runId,
|
||||||
|
"checker",
|
||||||
|
verdict.verdict === "approved" ? "approved" : "rejected",
|
||||||
|
verdict.rationale,
|
||||||
|
verdict,
|
||||||
|
);
|
||||||
|
this.store.addEvent(runId, "checker", "check_verdict", formatCheckVerdictSummary(verdict), verdict);
|
||||||
|
|
||||||
|
if (verdict.verdict === "approved") {
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "done",
|
||||||
|
summary: verdict.summary,
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.store.updateRun(runId, {
|
||||||
|
status: "planning",
|
||||||
|
summary: verdict.summary,
|
||||||
|
currentTaskId: null,
|
||||||
|
});
|
||||||
|
this.checkpoint(runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async invokeRole<T>(options: {
|
||||||
|
runId: string;
|
||||||
|
role: AgentRole;
|
||||||
|
schemaName: string;
|
||||||
|
prompt: string;
|
||||||
|
schema: Record<string, unknown>;
|
||||||
|
}): Promise<{ output: T }> {
|
||||||
|
const state = this.store.getAgentState(options.runId, options.role);
|
||||||
|
const nextSession = this.shouldRotate(state) ? null : state?.sessionId ?? null;
|
||||||
|
const prompt = this.shouldRotate(state)
|
||||||
|
? `Session refresh for role ${options.role}. Continue the run using this compressed state.\n\n${options.prompt}`
|
||||||
|
: options.prompt;
|
||||||
|
const artifacts = this.artifacts.createInvocationArtifacts(options.runId, options.role, options.schemaName);
|
||||||
|
|
||||||
|
this.artifacts.writePrompt(artifacts.promptPath, prompt);
|
||||||
|
this.artifacts.writeSchema(artifacts.schemaPath, options.schema);
|
||||||
|
|
||||||
|
const result = await this.runner.invoke<T>({
|
||||||
|
runId: options.runId,
|
||||||
|
role: options.role,
|
||||||
|
sessionId: nextSession,
|
||||||
|
prompt,
|
||||||
|
schemaName: options.schemaName,
|
||||||
|
schema: options.schema,
|
||||||
|
cwd: this.config.repoPath,
|
||||||
|
roleConfig: this.config.roles[options.role],
|
||||||
|
artifacts,
|
||||||
|
maxValidationRetries: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
this.artifacts.writeRawEvents(artifacts.rawEventsPath, result.rawEvents);
|
||||||
|
this.artifacts.writeStderr(artifacts.stderrPath, result.stderr);
|
||||||
|
this.artifacts.writeLastMessage(artifacts.lastMessagePath, result.rawMessage);
|
||||||
|
this.artifacts.writeResponse(artifacts.responsePath, result.output);
|
||||||
|
|
||||||
|
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.prompt`, artifacts.promptPath, {});
|
||||||
|
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.schema`, artifacts.schemaPath, {});
|
||||||
|
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.events`, artifacts.rawEventsPath, {});
|
||||||
|
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.stderr`, artifacts.stderrPath, {});
|
||||||
|
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.last`, artifacts.lastMessagePath, {});
|
||||||
|
this.store.addArtifact(options.runId, options.role, `${options.schemaName}.response`, artifacts.responsePath, {});
|
||||||
|
|
||||||
|
const nextState: AgentStateRecord = {
|
||||||
|
runId: options.runId,
|
||||||
|
role: options.role,
|
||||||
|
sessionId: result.sessionId,
|
||||||
|
turns: (this.shouldRotate(state) ? 0 : state?.turns ?? 0) + 1,
|
||||||
|
rotationCount: state ? state.rotationCount + (this.shouldRotate(state) ? 1 : 0) : 0,
|
||||||
|
lastPromptPath: artifacts.promptPath,
|
||||||
|
lastResponsePath: artifacts.responsePath,
|
||||||
|
updatedAt: nowIso(),
|
||||||
|
};
|
||||||
|
this.store.saveAgentState(nextState);
|
||||||
|
|
||||||
|
return { output: result.output };
|
||||||
|
}
|
||||||
|
|
||||||
|
private shouldRotate(state: AgentStateRecord | null): boolean {
|
||||||
|
return Boolean(state?.sessionId && state.turns >= this.config.sessionRefreshTurns);
|
||||||
|
}
|
||||||
|
|
||||||
|
private checkpoint(runId: string): void {
|
||||||
|
const snapshot = this.store.buildSnapshot(runId);
|
||||||
|
this.store.addCheckpoint(runId, snapshot.run.status, snapshot);
|
||||||
|
}
|
||||||
|
}
|
||||||
250
src/prompts.ts
Normal file
250
src/prompts.ts
Normal file
|
|
@ -0,0 +1,250 @@
|
||||||
|
import type {
|
||||||
|
CheckVerdictOutput,
|
||||||
|
RunSnapshot,
|
||||||
|
StrategyPlanOutput,
|
||||||
|
StrategySelfCheckOutput,
|
||||||
|
TaskDraft,
|
||||||
|
TaskRecord,
|
||||||
|
} from "./types.js";
|
||||||
|
|
||||||
|
function bulletList(values: string[]): string {
|
||||||
|
return values.length ? values.map((value) => `- ${value}`).join("\n") : "- none";
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderTask(task: TaskDraft | TaskRecord): string {
|
||||||
|
return [
|
||||||
|
`Title: ${task.title}`,
|
||||||
|
`Objective: ${task.objective}`,
|
||||||
|
`Acceptance:`,
|
||||||
|
bulletList(task.acceptanceCriteria),
|
||||||
|
`Verification:`,
|
||||||
|
bulletList(task.verificationSteps),
|
||||||
|
`Allowed paths:`,
|
||||||
|
bulletList(task.allowedPaths),
|
||||||
|
].join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderTasks(tasks: TaskDraft[] | TaskRecord[]): string {
|
||||||
|
return tasks.length
|
||||||
|
? tasks.map((task, index) => `Task ${index + 1}\n${renderTask(task)}`).join("\n\n")
|
||||||
|
: "No tasks.";
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderApprovals(snapshot: RunSnapshot): string {
|
||||||
|
if (!snapshot.approvals.length) {
|
||||||
|
return "No approvals yet.";
|
||||||
|
}
|
||||||
|
|
||||||
|
return snapshot.approvals
|
||||||
|
.map(
|
||||||
|
(approval) =>
|
||||||
|
`- ${approval.createdAt} ${approval.source} ${approval.verdict}: ${approval.rationale}`,
|
||||||
|
)
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderRecentAttempts(snapshot: RunSnapshot): string {
|
||||||
|
if (!snapshot.recentAttempts.length) {
|
||||||
|
return "No implementation attempts yet.";
|
||||||
|
}
|
||||||
|
|
||||||
|
return snapshot.recentAttempts
|
||||||
|
.map((attempt) => {
|
||||||
|
const payload = JSON.parse(attempt.resultJson) as { changes?: string[]; followUps?: string[]; blockers?: string[] };
|
||||||
|
return [
|
||||||
|
`- Task ${attempt.taskId} attempt ${attempt.attemptNumber}: ${attempt.status}`,
|
||||||
|
` Summary: ${attempt.summary}`,
|
||||||
|
` Changes: ${(payload.changes ?? []).join("; ") || "none"}`,
|
||||||
|
` Follow-ups: ${(payload.followUps ?? []).join("; ") || "none"}`,
|
||||||
|
` Blockers: ${(payload.blockers ?? []).join("; ") || "none"}`,
|
||||||
|
].join("\n");
|
||||||
|
})
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderEvents(snapshot: RunSnapshot): string {
|
||||||
|
if (!snapshot.recentEvents.length) {
|
||||||
|
return "No events yet.";
|
||||||
|
}
|
||||||
|
|
||||||
|
return snapshot.recentEvents
|
||||||
|
.map((event) => `- ${event.ts} ${event.kind}: ${event.message}`)
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderRunState(snapshot: RunSnapshot): string {
|
||||||
|
return [
|
||||||
|
`Run ID: ${snapshot.run.id}`,
|
||||||
|
`Goal: ${snapshot.run.goal}`,
|
||||||
|
`Status: ${snapshot.run.status}`,
|
||||||
|
`Repo: ${snapshot.run.repoPath}`,
|
||||||
|
`Cycles executed: ${snapshot.run.cycleCount}`,
|
||||||
|
`Replans: ${snapshot.run.replanCount}`,
|
||||||
|
`Current task: ${snapshot.run.currentTaskId ?? "none"}`,
|
||||||
|
].join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildStrategyPlanPrompt(snapshot: RunSnapshot): string {
|
||||||
|
const checkerApproval = snapshot.approvals.findLast((approval) => approval.source === "checker");
|
||||||
|
return `
|
||||||
|
You are the strategy agent for an autonomous development system.
|
||||||
|
|
||||||
|
Role rules:
|
||||||
|
- You may inspect the repository directly, but treat that as research only.
|
||||||
|
- Do not edit files and do not implement tasks yourself.
|
||||||
|
- Break work into concrete tasks for the implementation agent.
|
||||||
|
- Lean on the provided summary first. Read more of the repo only when it materially improves strategy.
|
||||||
|
- Only declare the project done when the user goal is actually satisfied, with evidence from completed work.
|
||||||
|
|
||||||
|
Return JSON that matches the provided schema exactly.
|
||||||
|
|
||||||
|
Current run state:
|
||||||
|
${renderRunState(snapshot)}
|
||||||
|
|
||||||
|
Pending tasks:
|
||||||
|
${renderTasks(snapshot.pendingTasks)}
|
||||||
|
|
||||||
|
Completed tasks:
|
||||||
|
${renderTasks(snapshot.completedTasks)}
|
||||||
|
|
||||||
|
Blocked or abandoned tasks:
|
||||||
|
${renderTasks(snapshot.blockedTasks)}
|
||||||
|
|
||||||
|
Recent implementation attempts:
|
||||||
|
${renderRecentAttempts(snapshot)}
|
||||||
|
|
||||||
|
Recent events:
|
||||||
|
${renderEvents(snapshot)}
|
||||||
|
|
||||||
|
Latest approvals:
|
||||||
|
${renderApprovals(snapshot)}
|
||||||
|
|
||||||
|
Latest checker verdict:
|
||||||
|
${checkerApproval ? JSON.stringify(checkerApproval.payload, null, 2) : "No checker verdict yet."}
|
||||||
|
|
||||||
|
Decision policy:
|
||||||
|
- Use \`decision: "continue"\` when there is meaningful work left. Provide the complete next backlog.
|
||||||
|
- Use \`decision: "done"\` only when no substantial work remains.
|
||||||
|
- Use \`decision: "blocked"\` only for external blockers or unresolved contradictions.
|
||||||
|
- Keep the backlog concise. Prefer a few substantive tasks over many trivial tasks.
|
||||||
|
`.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildImplementationPrompt(snapshot: RunSnapshot, task: TaskRecord): string {
|
||||||
|
return `
|
||||||
|
You are the implementation agent for an autonomous development system.
|
||||||
|
|
||||||
|
Role rules:
|
||||||
|
- You are responsible for executing exactly one task.
|
||||||
|
- You may modify files in the repository.
|
||||||
|
- Stay within the task's allowed paths unless the task itself truly requires broader changes, and call that out if it happens.
|
||||||
|
- Run relevant verification when possible.
|
||||||
|
- If you cannot complete the task, return \`needs_replan\` or \`blocked\` with concrete blockers.
|
||||||
|
|
||||||
|
Return JSON that matches the provided schema exactly.
|
||||||
|
|
||||||
|
Overall goal:
|
||||||
|
${snapshot.run.goal}
|
||||||
|
|
||||||
|
Task:
|
||||||
|
${renderTask(task)}
|
||||||
|
|
||||||
|
Recent completed tasks:
|
||||||
|
${renderTasks(snapshot.completedTasks.slice(-5))}
|
||||||
|
|
||||||
|
Recent blocked tasks:
|
||||||
|
${renderTasks(snapshot.blockedTasks.slice(-5))}
|
||||||
|
|
||||||
|
Recent approvals:
|
||||||
|
${renderApprovals(snapshot)}
|
||||||
|
|
||||||
|
You must echo the exact task id \`${task.id}\`.
|
||||||
|
`.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildStrategySelfCheckPrompt(
|
||||||
|
snapshot: RunSnapshot,
|
||||||
|
lastPlan: StrategyPlanOutput,
|
||||||
|
): string {
|
||||||
|
return `
|
||||||
|
You are the strategy agent performing a self-check before independent review.
|
||||||
|
|
||||||
|
Role rules:
|
||||||
|
- Re-evaluate the original goal against the repository and the execution history.
|
||||||
|
- You may inspect the repo, but you still must not edit files.
|
||||||
|
- Be strict. If meaningful work remains, do not send the run to the checker.
|
||||||
|
|
||||||
|
Return JSON that matches the provided schema exactly.
|
||||||
|
|
||||||
|
Original goal:
|
||||||
|
${snapshot.run.goal}
|
||||||
|
|
||||||
|
Latest strategy decision:
|
||||||
|
${JSON.stringify(lastPlan, null, 2)}
|
||||||
|
|
||||||
|
Completed tasks:
|
||||||
|
${renderTasks(snapshot.completedTasks)}
|
||||||
|
|
||||||
|
Blocked or abandoned tasks:
|
||||||
|
${renderTasks(snapshot.blockedTasks)}
|
||||||
|
|
||||||
|
Recent implementation attempts:
|
||||||
|
${renderRecentAttempts(snapshot)}
|
||||||
|
`.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildCheckPrompt(
|
||||||
|
snapshot: RunSnapshot,
|
||||||
|
selfCheck: StrategySelfCheckOutput,
|
||||||
|
): string {
|
||||||
|
return `
|
||||||
|
You are the independent check agent for an autonomous development system.
|
||||||
|
|
||||||
|
Role rules:
|
||||||
|
- You are independent from the strategy agent.
|
||||||
|
- You may inspect the repository directly.
|
||||||
|
- You must not edit files.
|
||||||
|
- Evaluate whether the original user goal is actually complete.
|
||||||
|
- Reject completion when there are still meaningful tasks, risks, or missing evidence.
|
||||||
|
|
||||||
|
Return JSON that matches the provided schema exactly.
|
||||||
|
|
||||||
|
Original goal:
|
||||||
|
${snapshot.run.goal}
|
||||||
|
|
||||||
|
Current run state:
|
||||||
|
${renderRunState(snapshot)}
|
||||||
|
|
||||||
|
Strategy self-check:
|
||||||
|
${JSON.stringify(selfCheck, null, 2)}
|
||||||
|
|
||||||
|
Completed tasks:
|
||||||
|
${renderTasks(snapshot.completedTasks)}
|
||||||
|
|
||||||
|
Blocked or abandoned tasks:
|
||||||
|
${renderTasks(snapshot.blockedTasks)}
|
||||||
|
|
||||||
|
Recent implementation attempts:
|
||||||
|
${renderRecentAttempts(snapshot)}
|
||||||
|
`.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatStrategyPlanSummary(plan: StrategyPlanOutput): string {
|
||||||
|
return `${plan.decision}: ${plan.summary}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatImplementationSummary(result: {
|
||||||
|
status: string;
|
||||||
|
summary: string;
|
||||||
|
touchedFiles: string[];
|
||||||
|
}): string {
|
||||||
|
return `${result.status}: ${result.summary} | touched: ${result.touchedFiles.join(", ") || "none"}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatSelfCheckSummary(result: StrategySelfCheckOutput): string {
|
||||||
|
return `${result.readyForIndependentCheck ? "ready" : "not ready"}: ${result.summary}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatCheckVerdictSummary(result: CheckVerdictOutput): string {
|
||||||
|
return `${result.verdict}: ${result.summary}`;
|
||||||
|
}
|
||||||
141
src/schema-catalog.ts
Normal file
141
src/schema-catalog.ts
Normal file
|
|
@ -0,0 +1,141 @@
|
||||||
|
export const taskDraftSchema = {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["title", "objective", "acceptanceCriteria", "verificationSteps", "allowedPaths"],
|
||||||
|
properties: {
|
||||||
|
title: { type: "string", minLength: 1 },
|
||||||
|
objective: { type: "string", minLength: 1 },
|
||||||
|
acceptanceCriteria: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
verificationSteps: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
allowedPaths: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export const strategyPlanSchema = {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["decision", "summary", "rationale", "goalProgress", "risks", "tasks"],
|
||||||
|
properties: {
|
||||||
|
decision: {
|
||||||
|
type: "string",
|
||||||
|
enum: ["continue", "done", "blocked"],
|
||||||
|
},
|
||||||
|
summary: { type: "string", minLength: 1 },
|
||||||
|
rationale: { type: "string", minLength: 1 },
|
||||||
|
goalProgress: { type: "string", minLength: 1 },
|
||||||
|
risks: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
tasks: {
|
||||||
|
type: "array",
|
||||||
|
items: taskDraftSchema,
|
||||||
|
},
|
||||||
|
blockedReason: { type: "string" },
|
||||||
|
},
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export const strategySelfCheckSchema = {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["readyForIndependentCheck", "summary", "rationale", "evidence", "remainingGaps"],
|
||||||
|
properties: {
|
||||||
|
readyForIndependentCheck: { type: "boolean" },
|
||||||
|
summary: { type: "string", minLength: 1 },
|
||||||
|
rationale: { type: "string", minLength: 1 },
|
||||||
|
evidence: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
remainingGaps: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export const implementationResultSchema = {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: [
|
||||||
|
"taskId",
|
||||||
|
"status",
|
||||||
|
"summary",
|
||||||
|
"changes",
|
||||||
|
"verification",
|
||||||
|
"followUps",
|
||||||
|
"touchedFiles",
|
||||||
|
"blockers",
|
||||||
|
],
|
||||||
|
properties: {
|
||||||
|
taskId: { type: "string", minLength: 1 },
|
||||||
|
status: {
|
||||||
|
type: "string",
|
||||||
|
enum: ["completed", "needs_replan", "blocked"],
|
||||||
|
},
|
||||||
|
summary: { type: "string", minLength: 1 },
|
||||||
|
changes: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
verification: {
|
||||||
|
type: "array",
|
||||||
|
items: {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["command", "outcome", "details"],
|
||||||
|
properties: {
|
||||||
|
command: { type: "string", minLength: 1 },
|
||||||
|
outcome: {
|
||||||
|
type: "string",
|
||||||
|
enum: ["passed", "failed", "not_run"],
|
||||||
|
},
|
||||||
|
details: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
followUps: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
touchedFiles: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
blockers: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export const checkVerdictSchema = {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["verdict", "summary", "rationale", "evidence", "remainingTasks"],
|
||||||
|
properties: {
|
||||||
|
verdict: {
|
||||||
|
type: "string",
|
||||||
|
enum: ["approved", "rejected"],
|
||||||
|
},
|
||||||
|
summary: { type: "string", minLength: 1 },
|
||||||
|
rationale: { type: "string", minLength: 1 },
|
||||||
|
evidence: {
|
||||||
|
type: "array",
|
||||||
|
items: { type: "string", minLength: 1 },
|
||||||
|
},
|
||||||
|
remainingTasks: {
|
||||||
|
type: "array",
|
||||||
|
items: taskDraftSchema,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as const;
|
||||||
647
src/store.ts
Normal file
647
src/store.ts
Normal file
|
|
@ -0,0 +1,647 @@
|
||||||
|
import { randomUUID } from "node:crypto";
|
||||||
|
import { DatabaseSync } from "node:sqlite";
|
||||||
|
import path from "node:path";
|
||||||
|
import { ensureDir } from "./fs-utils.js";
|
||||||
|
import type {
|
||||||
|
AgentRole,
|
||||||
|
AgentStateRecord,
|
||||||
|
ApprovalRecord,
|
||||||
|
ApprovalSource,
|
||||||
|
ApprovalVerdict,
|
||||||
|
ArtifactRecord,
|
||||||
|
AttemptStatus,
|
||||||
|
CheckpointRecord,
|
||||||
|
EventRecord,
|
||||||
|
RunRecord,
|
||||||
|
RunSnapshot,
|
||||||
|
RunStatus,
|
||||||
|
TaskAttemptRecord,
|
||||||
|
TaskDraft,
|
||||||
|
TaskRecord,
|
||||||
|
TaskStatus,
|
||||||
|
} from "./types.js";
|
||||||
|
|
||||||
|
function nowIso(): string {
|
||||||
|
return new Date().toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseJson<T>(value: string | null): T {
|
||||||
|
if (!value) {
|
||||||
|
return [] as T;
|
||||||
|
}
|
||||||
|
return JSON.parse(value) as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toJson(value: unknown): string {
|
||||||
|
return JSON.stringify(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapTaskRow(row: Record<string, unknown>): TaskRecord {
|
||||||
|
return {
|
||||||
|
id: String(row.id),
|
||||||
|
runId: String(row.run_id),
|
||||||
|
title: String(row.title),
|
||||||
|
objective: String(row.objective),
|
||||||
|
acceptanceCriteria: parseJson<string[]>(String(row.acceptance_criteria_json)),
|
||||||
|
verificationSteps: parseJson<string[]>(String(row.verification_steps_json)),
|
||||||
|
allowedPaths: parseJson<string[]>(String(row.allowed_paths_json)),
|
||||||
|
status: row.status as TaskStatus,
|
||||||
|
attemptCount: Number(row.attempt_count),
|
||||||
|
implementationSummary: row.implementation_summary ? String(row.implementation_summary) : null,
|
||||||
|
blockerSignature: row.blocker_signature ? String(row.blocker_signature) : null,
|
||||||
|
createdAt: String(row.created_at),
|
||||||
|
updatedAt: String(row.updated_at),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapAttemptRow(row: Record<string, unknown>): TaskAttemptRecord {
|
||||||
|
return {
|
||||||
|
id: String(row.id),
|
||||||
|
runId: String(row.run_id),
|
||||||
|
taskId: String(row.task_id),
|
||||||
|
attemptNumber: Number(row.attempt_number),
|
||||||
|
status: row.status as AttemptStatus,
|
||||||
|
summary: String(row.summary),
|
||||||
|
blockerSignature: row.blocker_signature ? String(row.blocker_signature) : null,
|
||||||
|
resultJson: String(row.result_json),
|
||||||
|
createdAt: String(row.created_at),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapRunRow(row: Record<string, unknown>): RunRecord {
|
||||||
|
return {
|
||||||
|
id: String(row.id),
|
||||||
|
goal: String(row.goal),
|
||||||
|
repoPath: String(row.repo_path),
|
||||||
|
status: row.status as RunStatus,
|
||||||
|
summary: row.summary ? String(row.summary) : null,
|
||||||
|
cycleCount: Number(row.cycle_count),
|
||||||
|
replanCount: Number(row.replan_count),
|
||||||
|
currentTaskId: row.current_task_id ? String(row.current_task_id) : null,
|
||||||
|
createdAt: String(row.created_at),
|
||||||
|
updatedAt: String(row.updated_at),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapAgentRow(row: Record<string, unknown>): AgentStateRecord {
|
||||||
|
return {
|
||||||
|
runId: String(row.run_id),
|
||||||
|
role: row.role as AgentRole,
|
||||||
|
sessionId: row.session_id ? String(row.session_id) : null,
|
||||||
|
turns: Number(row.turns),
|
||||||
|
rotationCount: Number(row.rotation_count),
|
||||||
|
lastPromptPath: row.last_prompt_path ? String(row.last_prompt_path) : null,
|
||||||
|
lastResponsePath: row.last_response_path ? String(row.last_response_path) : null,
|
||||||
|
updatedAt: String(row.updated_at),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RunStore {
|
||||||
|
private readonly db: DatabaseSync;
|
||||||
|
|
||||||
|
constructor(dbPath: string) {
|
||||||
|
ensureDir(path.dirname(dbPath));
|
||||||
|
this.db = new DatabaseSync(dbPath);
|
||||||
|
this.db.exec("PRAGMA journal_mode = WAL;");
|
||||||
|
this.db.exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS runs (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
goal TEXT NOT NULL,
|
||||||
|
repo_path TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
summary TEXT,
|
||||||
|
cycle_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
replan_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
current_task_id TEXT,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS agents (
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
role TEXT NOT NULL,
|
||||||
|
session_id TEXT,
|
||||||
|
turns INTEGER NOT NULL DEFAULT 0,
|
||||||
|
rotation_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
last_prompt_path TEXT,
|
||||||
|
last_response_path TEXT,
|
||||||
|
updated_at TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (run_id, role)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS tasks (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
objective TEXT NOT NULL,
|
||||||
|
acceptance_criteria_json TEXT NOT NULL,
|
||||||
|
verification_steps_json TEXT NOT NULL,
|
||||||
|
allowed_paths_json TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
attempt_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
implementation_summary TEXT,
|
||||||
|
blocker_signature TEXT,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS task_attempts (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
task_id TEXT NOT NULL,
|
||||||
|
attempt_number INTEGER NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
summary TEXT NOT NULL,
|
||||||
|
blocker_signature TEXT,
|
||||||
|
result_json TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS events (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
ts TEXT NOT NULL,
|
||||||
|
source TEXT NOT NULL,
|
||||||
|
kind TEXT NOT NULL,
|
||||||
|
message TEXT NOT NULL,
|
||||||
|
payload_json TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS artifacts (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
role TEXT,
|
||||||
|
kind TEXT NOT NULL,
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
metadata_json TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS checkpoints (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
payload_json TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS approvals (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
source TEXT NOT NULL,
|
||||||
|
verdict TEXT NOT NULL,
|
||||||
|
rationale TEXT NOT NULL,
|
||||||
|
payload_json TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
createRun(goal: string, repoPath: string): RunRecord {
|
||||||
|
const timestamp = nowIso();
|
||||||
|
const id = randomUUID();
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
INSERT INTO runs (id, goal, repo_path, status, summary, cycle_count, replan_count, current_task_id, created_at, updated_at)
|
||||||
|
VALUES (?, ?, ?, 'planning', NULL, 0, 0, NULL, ?, ?)
|
||||||
|
`)
|
||||||
|
.run(id, goal, repoPath, timestamp, timestamp);
|
||||||
|
|
||||||
|
this.addEvent(id, "system", "run_created", "Run created.", { goal, repoPath });
|
||||||
|
return this.getRun(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
getRun(runId: string): RunRecord {
|
||||||
|
const row = this.db.prepare("SELECT * FROM runs WHERE id = ?").get(runId) as Record<string, unknown> | undefined;
|
||||||
|
if (!row) {
|
||||||
|
throw new Error(`Run not found: ${runId}`);
|
||||||
|
}
|
||||||
|
return mapRunRow(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
listRuns(): RunRecord[] {
|
||||||
|
const rows = this.db.prepare("SELECT * FROM runs ORDER BY created_at DESC").all() as Record<string, unknown>[];
|
||||||
|
return rows.map(mapRunRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
updateRun(runId: string, updates: Partial<Pick<RunRecord, "status" | "summary" | "currentTaskId">>): RunRecord {
|
||||||
|
const current = this.getRun(runId);
|
||||||
|
const next: RunRecord = {
|
||||||
|
...current,
|
||||||
|
status: updates.status ?? current.status,
|
||||||
|
summary: updates.summary ?? current.summary,
|
||||||
|
currentTaskId: updates.currentTaskId === undefined ? current.currentTaskId : updates.currentTaskId,
|
||||||
|
updatedAt: nowIso(),
|
||||||
|
};
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
UPDATE runs
|
||||||
|
SET status = ?, summary = ?, current_task_id = ?, updated_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
`)
|
||||||
|
.run(next.status, next.summary, next.currentTaskId, next.updatedAt, runId);
|
||||||
|
return this.getRun(runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
incrementCycle(runId: string): void {
|
||||||
|
this.db.prepare("UPDATE runs SET cycle_count = cycle_count + 1, updated_at = ? WHERE id = ?").run(nowIso(), runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
incrementReplanCount(runId: string): void {
|
||||||
|
this.db.prepare("UPDATE runs SET replan_count = replan_count + 1, updated_at = ? WHERE id = ?").run(nowIso(), runId);
|
||||||
|
}
|
||||||
|
|
||||||
|
getAgentState(runId: string, role: AgentRole): AgentStateRecord | null {
|
||||||
|
const row = this.db.prepare("SELECT * FROM agents WHERE run_id = ? AND role = ?").get(runId, role) as
|
||||||
|
| Record<string, unknown>
|
||||||
|
| undefined;
|
||||||
|
return row ? mapAgentRow(row) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
saveAgentState(record: AgentStateRecord): void {
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
INSERT INTO agents (run_id, role, session_id, turns, rotation_count, last_prompt_path, last_response_path, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(run_id, role) DO UPDATE SET
|
||||||
|
session_id = excluded.session_id,
|
||||||
|
turns = excluded.turns,
|
||||||
|
rotation_count = excluded.rotation_count,
|
||||||
|
last_prompt_path = excluded.last_prompt_path,
|
||||||
|
last_response_path = excluded.last_response_path,
|
||||||
|
updated_at = excluded.updated_at
|
||||||
|
`)
|
||||||
|
.run(
|
||||||
|
record.runId,
|
||||||
|
record.role,
|
||||||
|
record.sessionId,
|
||||||
|
record.turns,
|
||||||
|
record.rotationCount,
|
||||||
|
record.lastPromptPath,
|
||||||
|
record.lastResponsePath,
|
||||||
|
record.updatedAt,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
requeueInProgressTasks(runId: string): number {
|
||||||
|
const timestamp = nowIso();
|
||||||
|
const result = this.db
|
||||||
|
.prepare(`
|
||||||
|
UPDATE tasks
|
||||||
|
SET status = 'pending', updated_at = ?
|
||||||
|
WHERE run_id = ? AND status = 'in_progress'
|
||||||
|
`)
|
||||||
|
.run(timestamp, runId);
|
||||||
|
const changes = Number(result.changes ?? 0);
|
||||||
|
if (changes > 0) {
|
||||||
|
this.addEvent(runId, "system", "requeue_in_progress", "Requeued in-progress tasks on resume.", { changes });
|
||||||
|
}
|
||||||
|
return changes;
|
||||||
|
}
|
||||||
|
|
||||||
|
replacePendingTasks(runId: string, tasks: TaskDraft[]): TaskRecord[] {
|
||||||
|
const now = nowIso();
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
UPDATE tasks
|
||||||
|
SET status = 'abandoned', updated_at = ?
|
||||||
|
WHERE run_id = ? AND status IN ('pending', 'in_progress')
|
||||||
|
`)
|
||||||
|
.run(now, runId);
|
||||||
|
|
||||||
|
const insertStmt = this.db.prepare(`
|
||||||
|
INSERT INTO tasks (
|
||||||
|
id, run_id, title, objective, acceptance_criteria_json, verification_steps_json,
|
||||||
|
allowed_paths_json, status, attempt_count, implementation_summary, blocker_signature, created_at, updated_at
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, 'pending', 0, NULL, NULL, ?, ?)
|
||||||
|
`);
|
||||||
|
|
||||||
|
const records: TaskRecord[] = [];
|
||||||
|
for (const task of tasks) {
|
||||||
|
const id = randomUUID();
|
||||||
|
insertStmt.run(
|
||||||
|
id,
|
||||||
|
runId,
|
||||||
|
task.title,
|
||||||
|
task.objective,
|
||||||
|
toJson(task.acceptanceCriteria),
|
||||||
|
toJson(task.verificationSteps),
|
||||||
|
toJson(task.allowedPaths),
|
||||||
|
now,
|
||||||
|
now,
|
||||||
|
);
|
||||||
|
records.push(this.getTask(id));
|
||||||
|
}
|
||||||
|
|
||||||
|
return records;
|
||||||
|
}
|
||||||
|
|
||||||
|
listTasks(runId: string, statuses?: TaskStatus[]): TaskRecord[] {
|
||||||
|
if (!statuses?.length) {
|
||||||
|
const rows = this.db
|
||||||
|
.prepare("SELECT * FROM tasks WHERE run_id = ? ORDER BY created_at ASC")
|
||||||
|
.all(runId) as Record<string, unknown>[];
|
||||||
|
return rows.map(mapTaskRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
const placeholders = statuses.map(() => "?").join(", ");
|
||||||
|
const rows = this.db
|
||||||
|
.prepare(`SELECT * FROM tasks WHERE run_id = ? AND status IN (${placeholders}) ORDER BY created_at ASC`)
|
||||||
|
.all(runId, ...statuses) as Record<string, unknown>[];
|
||||||
|
return rows.map(mapTaskRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
getTask(taskId: string): TaskRecord {
|
||||||
|
const row = this.db.prepare("SELECT * FROM tasks WHERE id = ?").get(taskId) as Record<string, unknown> | undefined;
|
||||||
|
if (!row) {
|
||||||
|
throw new Error(`Task not found: ${taskId}`);
|
||||||
|
}
|
||||||
|
return mapTaskRow(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
claimNextPendingTask(runId: string): TaskRecord | null {
|
||||||
|
const row = this.db
|
||||||
|
.prepare(`
|
||||||
|
SELECT * FROM tasks
|
||||||
|
WHERE run_id = ? AND status = 'pending'
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
`)
|
||||||
|
.get(runId) as Record<string, unknown> | undefined;
|
||||||
|
|
||||||
|
if (!row) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const taskId = String(row.id);
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
UPDATE tasks
|
||||||
|
SET status = 'in_progress', attempt_count = attempt_count + 1, updated_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
`)
|
||||||
|
.run(nowIso(), taskId);
|
||||||
|
return this.getTask(taskId);
|
||||||
|
}
|
||||||
|
|
||||||
|
completeTask(taskId: string, summary: string): TaskRecord {
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
UPDATE tasks
|
||||||
|
SET status = 'completed', implementation_summary = ?, blocker_signature = NULL, updated_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
`)
|
||||||
|
.run(summary, nowIso(), taskId);
|
||||||
|
return this.getTask(taskId);
|
||||||
|
}
|
||||||
|
|
||||||
|
abandonTask(taskId: string, summary: string | null, blockerSignature: string | null): TaskRecord {
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
UPDATE tasks
|
||||||
|
SET status = 'abandoned', implementation_summary = ?, blocker_signature = ?, updated_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
`)
|
||||||
|
.run(summary, blockerSignature, nowIso(), taskId);
|
||||||
|
return this.getTask(taskId);
|
||||||
|
}
|
||||||
|
|
||||||
|
blockTask(taskId: string, summary: string | null, blockerSignature: string | null): TaskRecord {
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
UPDATE tasks
|
||||||
|
SET status = 'blocked', implementation_summary = ?, blocker_signature = ?, updated_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
`)
|
||||||
|
.run(summary, blockerSignature, nowIso(), taskId);
|
||||||
|
return this.getTask(taskId);
|
||||||
|
}
|
||||||
|
|
||||||
|
latestTaskAttempt(taskId: string): TaskAttemptRecord | null {
|
||||||
|
const row = this.db
|
||||||
|
.prepare("SELECT * FROM task_attempts WHERE task_id = ? ORDER BY created_at DESC LIMIT 1")
|
||||||
|
.get(taskId) as Record<string, unknown> | undefined;
|
||||||
|
return row ? mapAttemptRow(row) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
addTaskAttempt(
|
||||||
|
runId: string,
|
||||||
|
taskId: string,
|
||||||
|
attemptNumber: number,
|
||||||
|
status: AttemptStatus,
|
||||||
|
summary: string,
|
||||||
|
result: unknown,
|
||||||
|
blockerSignature: string | null,
|
||||||
|
): TaskAttemptRecord {
|
||||||
|
const id = randomUUID();
|
||||||
|
const createdAt = nowIso();
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
INSERT INTO task_attempts (id, run_id, task_id, attempt_number, status, summary, blocker_signature, result_json, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
`)
|
||||||
|
.run(id, runId, taskId, attemptNumber, status, summary, blockerSignature, toJson(result), createdAt);
|
||||||
|
return this.latestTaskAttempt(taskId) as TaskAttemptRecord;
|
||||||
|
}
|
||||||
|
|
||||||
|
addEvent(runId: string, source: string, kind: string, message: string, payload: unknown): void {
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
INSERT INTO events (run_id, ts, source, kind, message, payload_json)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
|
`)
|
||||||
|
.run(runId, nowIso(), source, kind, message, toJson(payload));
|
||||||
|
}
|
||||||
|
|
||||||
|
listEvents(runId: string, limit = 50): EventRecord[] {
|
||||||
|
const rows = this.db
|
||||||
|
.prepare(`
|
||||||
|
SELECT * FROM events
|
||||||
|
WHERE run_id = ?
|
||||||
|
ORDER BY id DESC
|
||||||
|
LIMIT ?
|
||||||
|
`)
|
||||||
|
.all(runId, limit) as Record<string, unknown>[];
|
||||||
|
return rows
|
||||||
|
.map((row) => ({
|
||||||
|
id: Number(row.id),
|
||||||
|
runId: String(row.run_id),
|
||||||
|
ts: String(row.ts),
|
||||||
|
source: String(row.source),
|
||||||
|
kind: String(row.kind),
|
||||||
|
message: String(row.message),
|
||||||
|
payload: JSON.parse(String(row.payload_json)),
|
||||||
|
}))
|
||||||
|
.reverse();
|
||||||
|
}
|
||||||
|
|
||||||
|
addArtifact(runId: string, role: AgentRole | null, kind: string, filePath: string, metadata: unknown): ArtifactRecord {
|
||||||
|
const id = randomUUID();
|
||||||
|
const createdAt = nowIso();
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
INSERT INTO artifacts (id, run_id, role, kind, path, created_at, metadata_json)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
`)
|
||||||
|
.run(id, runId, role, kind, filePath, createdAt, toJson(metadata));
|
||||||
|
|
||||||
|
return {
|
||||||
|
id,
|
||||||
|
runId,
|
||||||
|
role,
|
||||||
|
kind,
|
||||||
|
path: filePath,
|
||||||
|
createdAt,
|
||||||
|
metadata,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
listArtifacts(runId: string, role?: AgentRole): ArtifactRecord[] {
|
||||||
|
const rows = role
|
||||||
|
? (this.db
|
||||||
|
.prepare(`
|
||||||
|
SELECT * FROM artifacts
|
||||||
|
WHERE run_id = ? AND role = ?
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
`)
|
||||||
|
.all(runId, role) as Record<string, unknown>[])
|
||||||
|
: (this.db
|
||||||
|
.prepare(`
|
||||||
|
SELECT * FROM artifacts
|
||||||
|
WHERE run_id = ?
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
`)
|
||||||
|
.all(runId) as Record<string, unknown>[]);
|
||||||
|
|
||||||
|
return rows.map((row) => ({
|
||||||
|
id: String(row.id),
|
||||||
|
runId: String(row.run_id),
|
||||||
|
role: row.role ? (String(row.role) as AgentRole) : null,
|
||||||
|
kind: String(row.kind),
|
||||||
|
path: String(row.path),
|
||||||
|
createdAt: String(row.created_at),
|
||||||
|
metadata: JSON.parse(String(row.metadata_json)),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
addApproval(
|
||||||
|
runId: string,
|
||||||
|
source: ApprovalSource,
|
||||||
|
verdict: ApprovalVerdict,
|
||||||
|
rationale: string,
|
||||||
|
payload: unknown,
|
||||||
|
): ApprovalRecord {
|
||||||
|
const id = randomUUID();
|
||||||
|
const createdAt = nowIso();
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
INSERT INTO approvals (id, run_id, source, verdict, rationale, payload_json, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
`)
|
||||||
|
.run(id, runId, source, verdict, rationale, toJson(payload), createdAt);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id,
|
||||||
|
runId,
|
||||||
|
source,
|
||||||
|
verdict,
|
||||||
|
rationale,
|
||||||
|
payload,
|
||||||
|
createdAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
listApprovals(runId: string): ApprovalRecord[] {
|
||||||
|
const rows = this.db
|
||||||
|
.prepare("SELECT * FROM approvals WHERE run_id = ? ORDER BY created_at ASC")
|
||||||
|
.all(runId) as Record<string, unknown>[];
|
||||||
|
return rows.map((row) => ({
|
||||||
|
id: String(row.id),
|
||||||
|
runId: String(row.run_id),
|
||||||
|
source: row.source as ApprovalSource,
|
||||||
|
verdict: row.verdict as ApprovalVerdict,
|
||||||
|
rationale: String(row.rationale),
|
||||||
|
payload: JSON.parse(String(row.payload_json)),
|
||||||
|
createdAt: String(row.created_at),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
latestApproval(runId: string, source: ApprovalSource): ApprovalRecord | null {
|
||||||
|
const row = this.db
|
||||||
|
.prepare(`
|
||||||
|
SELECT * FROM approvals
|
||||||
|
WHERE run_id = ? AND source = ?
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
`)
|
||||||
|
.get(runId, source) as Record<string, unknown> | undefined;
|
||||||
|
if (!row) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: String(row.id),
|
||||||
|
runId: String(row.run_id),
|
||||||
|
source: row.source as ApprovalSource,
|
||||||
|
verdict: row.verdict as ApprovalVerdict,
|
||||||
|
rationale: String(row.rationale),
|
||||||
|
payload: JSON.parse(String(row.payload_json)),
|
||||||
|
createdAt: String(row.created_at),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
addCheckpoint(runId: string, status: RunStatus, payload: unknown): CheckpointRecord {
|
||||||
|
const id = randomUUID();
|
||||||
|
const createdAt = nowIso();
|
||||||
|
this.db
|
||||||
|
.prepare(`
|
||||||
|
INSERT INTO checkpoints (id, run_id, status, payload_json, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
`)
|
||||||
|
.run(id, runId, status, toJson(payload), createdAt);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id,
|
||||||
|
runId,
|
||||||
|
status,
|
||||||
|
payload,
|
||||||
|
createdAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
buildSnapshot(runId: string): RunSnapshot {
|
||||||
|
const run = this.getRun(runId);
|
||||||
|
const agents = (["strategy", "implementation", "checker"] as const).reduce<RunSnapshot["agents"]>((acc, role) => {
|
||||||
|
const agent = this.getAgentState(runId, role);
|
||||||
|
if (agent) {
|
||||||
|
acc[role] = agent;
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
return {
|
||||||
|
run,
|
||||||
|
agents,
|
||||||
|
pendingTasks: this.listTasks(runId, ["pending"]),
|
||||||
|
inProgressTasks: this.listTasks(runId, ["in_progress"]),
|
||||||
|
completedTasks: this.listTasks(runId, ["completed"]),
|
||||||
|
blockedTasks: this.listTasks(runId, ["blocked", "abandoned"]),
|
||||||
|
recentAttempts: this.listRecentAttempts(runId),
|
||||||
|
recentEvents: this.listEvents(runId),
|
||||||
|
approvals: this.listApprovals(runId),
|
||||||
|
artifacts: this.listArtifacts(runId),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
listRecentAttempts(runId: string, limit = 12): TaskAttemptRecord[] {
|
||||||
|
const rows = this.db
|
||||||
|
.prepare(`
|
||||||
|
SELECT * FROM task_attempts
|
||||||
|
WHERE run_id = ?
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT ?
|
||||||
|
`)
|
||||||
|
.all(runId, limit) as Record<string, unknown>[];
|
||||||
|
return rows.map(mapAttemptRow).reverse();
|
||||||
|
}
|
||||||
|
}
|
||||||
230
src/types.ts
Normal file
230
src/types.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
||||||
|
export const agentRoles = ["strategy", "implementation", "checker"] as const;
|
||||||
|
|
||||||
|
export type AgentRole = (typeof agentRoles)[number];
|
||||||
|
export type SandboxMode = "read-only" | "workspace-write" | "danger-full-access";
|
||||||
|
export type RunStatus =
|
||||||
|
| "planning"
|
||||||
|
| "implementing"
|
||||||
|
| "self_check"
|
||||||
|
| "independent_check"
|
||||||
|
| "blocked"
|
||||||
|
| "done";
|
||||||
|
export type TaskStatus = "pending" | "in_progress" | "completed" | "blocked" | "abandoned";
|
||||||
|
export type AttemptStatus = "completed" | "needs_replan" | "blocked";
|
||||||
|
export type ApprovalSource = "strategy_self_check" | "checker";
|
||||||
|
export type ApprovalVerdict = "approved" | "rejected";
|
||||||
|
|
||||||
|
export interface RoleConfig {
|
||||||
|
model?: string;
|
||||||
|
sandbox: SandboxMode;
|
||||||
|
search: boolean;
|
||||||
|
skipGitRepoCheck: boolean;
|
||||||
|
extraArgs: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ResolvedConfig {
|
||||||
|
repoPath: string;
|
||||||
|
stateDir: string;
|
||||||
|
databasePath: string;
|
||||||
|
runsDir: string;
|
||||||
|
codexCommand: string;
|
||||||
|
maxTaskAttempts: number;
|
||||||
|
maxCyclesPerInvocation: number;
|
||||||
|
maxReplans: number;
|
||||||
|
maxTasks: number;
|
||||||
|
sessionRefreshTurns: number;
|
||||||
|
roles: Record<AgentRole, RoleConfig>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RunRecord {
|
||||||
|
id: string;
|
||||||
|
goal: string;
|
||||||
|
repoPath: string;
|
||||||
|
status: RunStatus;
|
||||||
|
summary: string | null;
|
||||||
|
cycleCount: number;
|
||||||
|
replanCount: number;
|
||||||
|
currentTaskId: string | null;
|
||||||
|
createdAt: string;
|
||||||
|
updatedAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AgentStateRecord {
|
||||||
|
runId: string;
|
||||||
|
role: AgentRole;
|
||||||
|
sessionId: string | null;
|
||||||
|
turns: number;
|
||||||
|
rotationCount: number;
|
||||||
|
lastPromptPath: string | null;
|
||||||
|
lastResponsePath: string | null;
|
||||||
|
updatedAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TaskDraft {
|
||||||
|
title: string;
|
||||||
|
objective: string;
|
||||||
|
acceptanceCriteria: string[];
|
||||||
|
verificationSteps: string[];
|
||||||
|
allowedPaths: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TaskRecord extends TaskDraft {
|
||||||
|
id: string;
|
||||||
|
runId: string;
|
||||||
|
status: TaskStatus;
|
||||||
|
attemptCount: number;
|
||||||
|
implementationSummary: string | null;
|
||||||
|
blockerSignature: string | null;
|
||||||
|
createdAt: string;
|
||||||
|
updatedAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TaskAttemptRecord {
|
||||||
|
id: string;
|
||||||
|
runId: string;
|
||||||
|
taskId: string;
|
||||||
|
attemptNumber: number;
|
||||||
|
status: AttemptStatus;
|
||||||
|
summary: string;
|
||||||
|
blockerSignature: string | null;
|
||||||
|
resultJson: string;
|
||||||
|
createdAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface EventRecord {
|
||||||
|
id: number;
|
||||||
|
runId: string;
|
||||||
|
ts: string;
|
||||||
|
source: string;
|
||||||
|
kind: string;
|
||||||
|
message: string;
|
||||||
|
payload: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ArtifactRecord {
|
||||||
|
id: string;
|
||||||
|
runId: string;
|
||||||
|
role: AgentRole | null;
|
||||||
|
kind: string;
|
||||||
|
path: string;
|
||||||
|
createdAt: string;
|
||||||
|
metadata: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ApprovalRecord {
|
||||||
|
id: string;
|
||||||
|
runId: string;
|
||||||
|
source: ApprovalSource;
|
||||||
|
verdict: ApprovalVerdict;
|
||||||
|
rationale: string;
|
||||||
|
payload: unknown;
|
||||||
|
createdAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CheckpointRecord {
|
||||||
|
id: string;
|
||||||
|
runId: string;
|
||||||
|
status: RunStatus;
|
||||||
|
payload: unknown;
|
||||||
|
createdAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VerificationResult {
|
||||||
|
command: string;
|
||||||
|
outcome: "passed" | "failed" | "not_run";
|
||||||
|
details: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StrategyPlanOutput {
|
||||||
|
decision: "continue" | "done" | "blocked";
|
||||||
|
summary: string;
|
||||||
|
rationale: string;
|
||||||
|
goalProgress: string;
|
||||||
|
risks: string[];
|
||||||
|
tasks: TaskDraft[];
|
||||||
|
blockedReason?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StrategySelfCheckOutput {
|
||||||
|
readyForIndependentCheck: boolean;
|
||||||
|
summary: string;
|
||||||
|
rationale: string;
|
||||||
|
evidence: string[];
|
||||||
|
remainingGaps: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ImplementationResultOutput {
|
||||||
|
taskId: string;
|
||||||
|
status: AttemptStatus;
|
||||||
|
summary: string;
|
||||||
|
changes: string[];
|
||||||
|
verification: VerificationResult[];
|
||||||
|
followUps: string[];
|
||||||
|
touchedFiles: string[];
|
||||||
|
blockers: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CheckVerdictOutput {
|
||||||
|
verdict: "approved" | "rejected";
|
||||||
|
summary: string;
|
||||||
|
rationale: string;
|
||||||
|
evidence: string[];
|
||||||
|
remainingTasks: TaskDraft[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RunSnapshot {
|
||||||
|
run: RunRecord;
|
||||||
|
agents: Partial<Record<AgentRole, AgentStateRecord>>;
|
||||||
|
pendingTasks: TaskRecord[];
|
||||||
|
inProgressTasks: TaskRecord[];
|
||||||
|
completedTasks: TaskRecord[];
|
||||||
|
blockedTasks: TaskRecord[];
|
||||||
|
recentAttempts: TaskAttemptRecord[];
|
||||||
|
recentEvents: EventRecord[];
|
||||||
|
approvals: ApprovalRecord[];
|
||||||
|
artifacts: ArtifactRecord[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface InvocationArtifacts {
|
||||||
|
promptPath: string;
|
||||||
|
schemaPath: string;
|
||||||
|
rawEventsPath: string;
|
||||||
|
stderrPath: string;
|
||||||
|
lastMessagePath: string;
|
||||||
|
responsePath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AgentInvocation<T> {
|
||||||
|
runId: string;
|
||||||
|
role: AgentRole;
|
||||||
|
sessionId: string | null;
|
||||||
|
prompt: string;
|
||||||
|
schemaName: string;
|
||||||
|
schema: Record<string, unknown>;
|
||||||
|
cwd: string;
|
||||||
|
roleConfig: RoleConfig;
|
||||||
|
artifacts: InvocationArtifacts;
|
||||||
|
maxValidationRetries?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AgentInvocationResult<T> {
|
||||||
|
sessionId: string | null;
|
||||||
|
output: T;
|
||||||
|
rawMessage: string;
|
||||||
|
rawEvents: string[];
|
||||||
|
stderr: string;
|
||||||
|
artifacts: InvocationArtifacts;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RawAgentRunner {
|
||||||
|
invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RunArtifactsIndex {
|
||||||
|
promptPath: string;
|
||||||
|
responsePath: string;
|
||||||
|
schemaPath: string;
|
||||||
|
rawEventsPath: string;
|
||||||
|
stderrPath: string;
|
||||||
|
lastMessagePath: string;
|
||||||
|
}
|
||||||
138
test/orchestrator.test.ts
Normal file
138
test/orchestrator.test.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
import { mkdtempSync, readFileSync } from "node:fs";
|
||||||
|
import { tmpdir } from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import { describe, expect, test } from "vitest";
|
||||||
|
import { ArtifactManager } from "../src/artifacts.js";
|
||||||
|
import { loadConfig } from "../src/config.js";
|
||||||
|
import { AgentROrchestrator } from "../src/orchestrator.js";
|
||||||
|
import { RunStore } from "../src/store.js";
|
||||||
|
import type { AgentInvocation, AgentInvocationResult, RawAgentRunner } from "../src/types.js";
|
||||||
|
|
||||||
|
class ScriptedRunner implements RawAgentRunner {
|
||||||
|
public readonly prompts: string[] = [];
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
private readonly outputs: string[],
|
||||||
|
) {}
|
||||||
|
|
||||||
|
async invoke<T>(request: AgentInvocation<T>): Promise<AgentInvocationResult<T>> {
|
||||||
|
const next = this.outputs.shift();
|
||||||
|
if (!next) {
|
||||||
|
throw new Error("No scripted output remaining.");
|
||||||
|
}
|
||||||
|
|
||||||
|
this.prompts.push(request.prompt);
|
||||||
|
readFileSync(request.artifacts.schemaPath, "utf8");
|
||||||
|
const parsed = JSON.parse(next) as Record<string, unknown>;
|
||||||
|
if (typeof parsed.taskId === "string" && !parsed.taskId) {
|
||||||
|
const match = request.prompt.match(/exact task id `([^`]+)`/);
|
||||||
|
if (match) {
|
||||||
|
parsed.taskId = match[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
sessionId: request.sessionId ?? `session-${this.prompts.length}`,
|
||||||
|
output: parsed as T,
|
||||||
|
rawMessage: JSON.stringify(parsed),
|
||||||
|
rawEvents: ['{"type":"turn.completed"}'],
|
||||||
|
stderr: "",
|
||||||
|
artifacts: request.artifacts,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("AgentROrchestrator", () => {
|
||||||
|
test("runs through plan, implementation, self-check, and independent check", async () => {
|
||||||
|
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-orchestrator-"));
|
||||||
|
const config = loadConfig(repoPath);
|
||||||
|
const store = new RunStore(config.databasePath);
|
||||||
|
const artifacts = new ArtifactManager(config.runsDir);
|
||||||
|
const runner = new ScriptedRunner([
|
||||||
|
JSON.stringify({
|
||||||
|
decision: "continue",
|
||||||
|
summary: "Implement the CLI skeleton.",
|
||||||
|
rationale: "The project is empty and needs a first vertical slice.",
|
||||||
|
goalProgress: "No implementation exists yet.",
|
||||||
|
risks: [],
|
||||||
|
tasks: [
|
||||||
|
{
|
||||||
|
title: "Create CLI entrypoint",
|
||||||
|
objective: "Build the initial command surface.",
|
||||||
|
acceptanceCriteria: ["A run command exists."],
|
||||||
|
verificationSteps: ["Run the help command."],
|
||||||
|
allowedPaths: ["src", "package.json"],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
JSON.stringify({
|
||||||
|
taskId: "",
|
||||||
|
status: "completed",
|
||||||
|
summary: "Added the CLI skeleton.",
|
||||||
|
changes: ["Created an entrypoint."],
|
||||||
|
verification: [{ command: "agent-r --help", outcome: "passed", details: "Help output rendered." }],
|
||||||
|
followUps: [],
|
||||||
|
touchedFiles: ["src/index.ts", "package.json"],
|
||||||
|
blockers: [],
|
||||||
|
}),
|
||||||
|
JSON.stringify({
|
||||||
|
decision: "done",
|
||||||
|
summary: "The requested slice is complete.",
|
||||||
|
rationale: "The only planned task is complete and verified.",
|
||||||
|
goalProgress: "The vertical slice exists.",
|
||||||
|
risks: [],
|
||||||
|
tasks: [],
|
||||||
|
}),
|
||||||
|
JSON.stringify({
|
||||||
|
readyForIndependentCheck: true,
|
||||||
|
summary: "The run is ready for independent review.",
|
||||||
|
rationale: "Implementation and verification are present.",
|
||||||
|
evidence: ["CLI entrypoint exists."],
|
||||||
|
remainingGaps: [],
|
||||||
|
}),
|
||||||
|
JSON.stringify({
|
||||||
|
verdict: "approved",
|
||||||
|
summary: "The goal is satisfied.",
|
||||||
|
rationale: "The requested slice exists and is verified.",
|
||||||
|
evidence: ["CLI entrypoint present."],
|
||||||
|
remainingTasks: [],
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
||||||
|
const run = orchestrator.createRun("Create a CLI skeleton");
|
||||||
|
|
||||||
|
const firstStatus = await orchestrator.runUntilStable(run.id, 10);
|
||||||
|
const snapshot = store.buildSnapshot(run.id);
|
||||||
|
|
||||||
|
expect(firstStatus.status).toBe("done");
|
||||||
|
expect(snapshot.completedTasks).toHaveLength(1);
|
||||||
|
expect(snapshot.approvals).toHaveLength(2);
|
||||||
|
expect(snapshot.pendingTasks).toHaveLength(0);
|
||||||
|
expect(runner.prompts.at(1)).toContain("You must echo the exact task id");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("strategy prompt explicitly permits read-only repository inspection", async () => {
|
||||||
|
const repoPath = mkdtempSync(path.join(tmpdir(), "agent-r-strategy-"));
|
||||||
|
const config = loadConfig(repoPath);
|
||||||
|
const store = new RunStore(config.databasePath);
|
||||||
|
const artifacts = new ArtifactManager(config.runsDir);
|
||||||
|
const runner = new ScriptedRunner([
|
||||||
|
JSON.stringify({
|
||||||
|
decision: "blocked",
|
||||||
|
summary: "Cannot continue.",
|
||||||
|
rationale: "Test stop.",
|
||||||
|
goalProgress: "None.",
|
||||||
|
risks: [],
|
||||||
|
tasks: [],
|
||||||
|
blockedReason: "Stop immediately.",
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const orchestrator = new AgentROrchestrator(config, store, artifacts, runner);
|
||||||
|
const run = orchestrator.createRun("Inspect repo");
|
||||||
|
await orchestrator.runUntilStable(run.id, 1);
|
||||||
|
|
||||||
|
expect(runner.prompts[0]).toContain("You may inspect the repository directly");
|
||||||
|
expect(runner.prompts[0]).toContain("Do not edit files");
|
||||||
|
});
|
||||||
|
});
|
||||||
40
test/store.test.ts
Normal file
40
test/store.test.ts
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
import { mkdtempSync } from "node:fs";
|
||||||
|
import { tmpdir } from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import { describe, expect, test } from "vitest";
|
||||||
|
import { RunStore } from "../src/store.js";
|
||||||
|
|
||||||
|
describe("RunStore", () => {
|
||||||
|
test("creates runs, tasks, and snapshots", () => {
|
||||||
|
const tempDir = mkdtempSync(path.join(tmpdir(), "agent-r-store-"));
|
||||||
|
const store = new RunStore(path.join(tempDir, "state.sqlite"));
|
||||||
|
const run = store.createRun("Build something", tempDir);
|
||||||
|
|
||||||
|
store.replacePendingTasks(run.id, [
|
||||||
|
{
|
||||||
|
title: "Task 1",
|
||||||
|
objective: "Do work",
|
||||||
|
acceptanceCriteria: ["It works"],
|
||||||
|
verificationSteps: ["Run tests"],
|
||||||
|
allowedPaths: ["src"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "Task 2",
|
||||||
|
objective: "Do more work",
|
||||||
|
acceptanceCriteria: ["It still works"],
|
||||||
|
verificationSteps: ["Run tests again"],
|
||||||
|
allowedPaths: ["tests"],
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
const claimed = store.claimNextPendingTask(run.id);
|
||||||
|
expect(claimed?.status).toBe("in_progress");
|
||||||
|
store.completeTask(claimed!.id, "Finished");
|
||||||
|
store.addEvent(run.id, "test", "custom", "Recorded a test event.", { ok: true });
|
||||||
|
|
||||||
|
const snapshot = store.buildSnapshot(run.id);
|
||||||
|
expect(snapshot.pendingTasks).toHaveLength(1);
|
||||||
|
expect(snapshot.completedTasks).toHaveLength(1);
|
||||||
|
expect(snapshot.recentEvents.at(-1)?.kind).toBe("custom");
|
||||||
|
});
|
||||||
|
});
|
||||||
21
tsconfig.json
Normal file
21
tsconfig.json
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2023",
|
||||||
|
"module": "NodeNext",
|
||||||
|
"moduleResolution": "NodeNext",
|
||||||
|
"lib": ["ES2023"],
|
||||||
|
"types": ["node"],
|
||||||
|
"strict": true,
|
||||||
|
"noUncheckedIndexedAccess": true,
|
||||||
|
"exactOptionalPropertyTypes": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"rootDir": "src",
|
||||||
|
"outDir": "dist",
|
||||||
|
"declaration": true,
|
||||||
|
"sourceMap": true,
|
||||||
|
"skipLibCheck": true
|
||||||
|
},
|
||||||
|
"include": ["src/**/*.ts"]
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue