EntityProcess · christso · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/.changeset/add-pi-coding-agent-provider.md b/.changeset/add-pi-coding-agent-provider.md
@@ -0,0 +1,14 @@
+---
+"@agentv/core": minor
+"agentv": minor
+---
+
+Add Pi Coding Agent provider and default system prompts for agent evaluations
+
+- New `pi-coding-agent` provider for the Pi Coding Agent CLI from pi-mono
+- Support file attachments using Pi's native `@path` syntax
+- Extract tool trajectory/traces from Pi's JSONL output
+- Display log file paths in console during eval runs
+- Add `log_format` option ('summary' or 'json') for log verbosity
+- Add default system prompt for Pi and Codex providers instructing agents to include code in response using git diff format
+- Add `system_prompt` config option to override default behavior via targets.yaml
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # AgentV
 
-A TypeScript-based AI agent evaluation and optimization framework using YAML specifications to score task completion. Built for modern development workflows with first-class support for VS Code Copilot, OpenAI Codex CLI and Azure OpenAI.
+A TypeScript-based AI agent evaluation and optimization framework using YAML specifications to score task completion. Built for modern development workflows with first-class support for VS Code Copilot, OpenAI Codex CLI, Pi Coding Agent, and Azure OpenAI.
 
 ## Installation and Setup
 
@@ -162,7 +162,7 @@ Execution targets in `.agentv/targets.yaml` decouple evals from providers/settin
 Each target specifies:
 
 - `name`: Unique identifier for the target
-- `provider`: The model provider (`azure`, `anthropic`, `gemini`, `codex`, `vscode`, `vscode-insiders`, `cli`, or `mock`)
+- `provider`: The model provider (`azure`, `anthropic`, `gemini`, `codex`, `pi-coding-agent`, `vscode`, `vscode-insiders`, `cli`, or `mock`)
 - Provider-specific configuration fields at the top level (no `settings` wrapper needed)
 - Optional fields: `judge_target`, `workers`, `provider_batching`
 
@@ -240,6 +240,27 @@ Note: Environment variables are referenced using `${{ VARIABLE_NAME }}` syntax.
 Codex targets require the standalone `codex` CLI and a configured profile (via `codex configure`) so credentials are stored in `~/.codex/config` (or whatever path the CLI already uses). AgentV mirrors all guideline and attachment files into a fresh scratch workspace, so the `file://` preread links remain valid even when the CLI runs outside your repo tree.
 Confirm the CLI works by running `codex exec --json --profile <name> "ping"` (or any supported dry run) before starting an eval. This prints JSONL events; seeing `item.completed` messages indicates the CLI is healthy.
 
+**Pi Coding Agent targets:**
+
+```yaml
+- name: pi
+  provider: pi-coding-agent
+  judge_target: gemini_base
+  executable: ${{ PI_CLI_PATH }}            # Optional: defaults to `pi` if omitted
+  pi_provider: google                       # google, anthropic, openai, groq, xai, openrouter
+  model: ${{ GEMINI_MODEL_NAME }}
+  api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
+  tools: read,bash,edit,write               # Available tools for the agent
+  timeout_seconds: 180
+  cwd: ${{ PI_WORKSPACE_DIR }}              # Optional: run in specific directory
+  log_format: json                          # 'summary' (default) or 'json' for full logs
+  # system_prompt: optional override for the default system prompt
+```
+
+Pi Coding Agent is an autonomous coding CLI from [pi-mono](https://github.com/badlogic/pi-mono). Install it globally with `npm install -g @anthropic/pi-coding-agent` (or use a local path via `executable`). It supports multiple LLM providers and outputs JSONL events. AgentV extracts tool trajectories from the output for trace-based evaluation. File attachments are passed using Pi's native `@path` syntax.
+
+By default, a system prompt instructs the agent to include code in its response (required for evaluation scoring). Use `system_prompt` to override this behavior.
+
 ## Writing Custom Evaluators
 
 ### Code Evaluator I/O Contract

diff --git a/apps/cli/README.md b/apps/cli/README.md
@@ -1,6 +1,6 @@
 # AgentV
 
-A TypeScript-based AI agent evaluation and optimization framework using YAML specifications to score task completion. Built for modern development workflows with first-class support for VS Code Copilot, OpenAI Codex CLI and Azure OpenAI.
+A TypeScript-based AI agent evaluation and optimization framework using YAML specifications to score task completion. Built for modern development workflows with first-class support for VS Code Copilot, OpenAI Codex CLI, Pi Coding Agent, and Azure OpenAI.
 
 ## Installation and Setup
 
@@ -162,7 +162,7 @@ Execution targets in `.agentv/targets.yaml` decouple evals from providers/settin
 Each target specifies:
 
 - `name`: Unique identifier for the target
-- `provider`: The model provider (`azure`, `anthropic`, `gemini`, `codex`, `vscode`, `vscode-insiders`, `cli`, or `mock`)
+- `provider`: The model provider (`azure`, `anthropic`, `gemini`, `codex`, `pi-coding-agent`, `vscode`, `vscode-insiders`, `cli`, or `mock`)
 - Provider-specific configuration fields at the top level (no `settings` wrapper needed)
 - Optional fields: `judge_target`, `workers`, `provider_batching`
 
@@ -240,6 +240,27 @@ Note: Environment variables are referenced using `${{ VARIABLE_NAME }}` syntax.
 Codex targets require the standalone `codex` CLI and a configured profile (via `codex configure`) so credentials are stored in `~/.codex/config` (or whatever path the CLI already uses). AgentV mirrors all guideline and attachment files into a fresh scratch workspace, so the `file://` preread links remain valid even when the CLI runs outside your repo tree.
 Confirm the CLI works by running `codex exec --json --profile <name> "ping"` (or any supported dry run) before starting an eval. This prints JSONL events; seeing `item.completed` messages indicates the CLI is healthy.
 
+**Pi Coding Agent targets:**
+
+```yaml
+- name: pi
+  provider: pi-coding-agent
+  judge_target: gemini_base
+  executable: ${{ PI_CLI_PATH }}            # Optional: defaults to `pi` if omitted
+  pi_provider: google                       # google, anthropic, openai, groq, xai, openrouter
+  model: ${{ GEMINI_MODEL_NAME }}
+  api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
+  tools: read,bash,edit,write               # Available tools for the agent
+  timeout_seconds: 180
+  cwd: ${{ PI_WORKSPACE_DIR }}              # Optional: run in specific directory
+  log_format: json                          # 'summary' (default) or 'json' for full logs
+  # system_prompt: optional override for the default system prompt
+```
+
+Pi Coding Agent is an autonomous coding CLI from [pi-mono](https://github.com/badlogic/pi-mono). Install it globally with `npm install -g @anthropic/pi-coding-agent` (or use a local path via `executable`). It supports multiple LLM providers and outputs JSONL events. AgentV extracts tool trajectories from the output for trace-based evaluation. File attachments are passed using Pi's native `@path` syntax.
+
+By default, a system prompt instructs the agent to include code in its response (required for evaluation scoring). Use `system_prompt` to override this behavior.
+
 ## Writing Custom Evaluators
 
 ### Code Evaluator I/O Contract

diff --git a/apps/cli/package.json b/apps/cli/package.json
@@ -14,10 +14,7 @@
   "bin": {
     "agentv": "./dist/cli.js"
   },
-  "files": [
-    "dist",
-    "README.md"
-  ],
+  "files": ["dist", "README.md"],
   "scripts": {
     "dev": "bun --watch src/index.ts",
     "build": "tsup && bun run copy-readme",

diff --git a/apps/cli/src/commands/eval/progress-display.ts b/apps/cli/src/commands/eval/progress-display.ts
@@ -78,7 +78,7 @@ export class ProgressDisplay {
     }
   }
 
-  addLogPaths(paths: readonly string[]): void {
+  addLogPaths(paths: readonly string[], provider?: 'codex' | 'pi'): void {
     const newPaths: string[] = [];
     for (const path of paths) {
       if (this.logPathSet.has(path)) {
@@ -96,7 +96,8 @@ export class ProgressDisplay {
 
     if (!this.hasPrintedLogHeader) {
       console.log('');
-      console.log('Codex CLI logs:');
+      const label = provider === 'pi' ? 'Pi Coding Agent' : 'Codex CLI';
+      console.log(`${label} logs:`);
       this.hasPrintedLogHeader = true;
     }
 

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
@@ -11,6 +11,7 @@ import {
   ensureVSCodeSubagents,
   loadEvalCases,
   subscribeToCodexLogEntries,
+  subscribeToPiLogEntries,
 } from '@agentv/core';
 
 import { loadEnvFromHierarchy } from './env.js';
@@ -170,7 +171,7 @@ type ProgressReporter = {
   setTotal(total: number): void;
   update(workerId: number, progress: WorkerProgress): void;
   finish(): void;
-  addLogPaths(paths: readonly string[]): void;
+  addLogPaths(paths: readonly string[], provider?: 'codex' | 'pi'): void;
 };
 
 function createProgressReporter(
@@ -185,7 +186,8 @@ function createProgressReporter(
     update: (workerId: number, progress: WorkerProgress) =>
       display.updateWorker({ ...progress, workerId }),
     finish: () => display.finish(),
-    addLogPaths: (paths: readonly string[]) => display.addLogPaths(paths),
+    addLogPaths: (paths: readonly string[], provider?: 'codex' | 'pi') =>
+      display.addLogPaths(paths, provider),
   };
 }
 
@@ -494,7 +496,15 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
       return;
     }
     seenCodexLogPaths.add(entry.filePath);
-    progressReporter.addLogPaths([entry.filePath]);
+    progressReporter.addLogPaths([entry.filePath], 'codex');
+  });
+  const seenPiLogPaths = new Set<string>();
+  const unsubscribePiLogs = subscribeToPiLogEntries((entry) => {
+    if (!entry.filePath || seenPiLogPaths.has(entry.filePath)) {
+      return;
+    }
+    seenPiLogPaths.add(entry.filePath);
+    progressReporter.addLogPaths([entry.filePath], 'pi');
   });
   for (const [testFilePath, meta] of fileMetadata.entries()) {
     for (const evalId of meta.evalIds) {
@@ -553,6 +563,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
     }
   } finally {
     unsubscribeCodexLogs();
+    unsubscribePiLogs();
     await outputWriter.close().catch(() => undefined);
   }
 }

diff --git a/examples/features/.agentv/targets.yaml b/examples/features/.agentv/targets.yaml
@@ -55,6 +55,19 @@ targets:
     api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
     model: ${{ GEMINI_MODEL_NAME }}
 
+  # Pi Coding Agent - autonomous coding CLI from pi-mono
+  - name: pi
+    provider: pi-coding-agent
+    executable: ${{ PI_CLI_PATH }}  # Optional: defaults to `pi` if omitted
+    pi_provider: google
+    model: ${{ GEMINI_MODEL_NAME }}
+    api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
+    judge_target: gemini_base
+    timeout_seconds: 180
+    tools: read,bash,edit,write  # Default tools for coding tasks
+    log_format: json  # 'summary' (default) or 'json' for raw event logs
+    # system_prompt: optional override (default instructs agent to include code in response)
+
   - name: local_cli
     provider: cli
     judge_target: azure_base

diff --git a/packages/core/package.json b/packages/core/package.json
@@ -36,10 +36,7 @@
     "test:watch": "bun test --watch",
     "diagnostics:azure": "bun src/diagnostics/azure-deployment-diag.ts"
   },
-  "files": [
-    "dist",
-    "README.md"
-  ],
+  "files": ["dist", "README.md"],
   "dependencies": {
     "@ai-sdk/anthropic": "^2.0.53",
     "@ai-sdk/azure": "^2.0.78",

diff --git a/packages/core/src/evaluation/providers/codex.ts b/packages/core/src/evaluation/providers/codex.ts
@@ -17,6 +17,16 @@ const WORKSPACE_PREFIX = 'agentv-codex-';
 const PROMPT_FILENAME = 'prompt.md';
 const JSONL_TYPE_ITEM_COMPLETED = 'item.completed';
 
+/**
+ * Default system prompt for Codex CLI evaluations.
+ * Ensures the agent returns code in its response rather than just writing files.
+ */
+const DEFAULT_SYSTEM_PROMPT = `**IMPORTANT**: Follow these instructions for your response:
+- Do NOT create any additional output files in the workspace.
+- All intended file outputs/changes MUST be written in your response.
+- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
+This is required for evaluation scoring.`;
+
 interface CodexRunOptions {
   readonly executable: string;
   readonly args: readonly string[];
@@ -72,7 +82,9 @@ export class CodexProvider implements Provider {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => undefined);
     try {
-      const promptContent = buildPromptDocument(request, inputFiles);
+      const basePrompt = buildPromptDocument(request, inputFiles);
+      const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
+      const promptContent = `${systemPrompt}\n\n${basePrompt}`;
       const promptFile = path.join(workspaceRoot, PROMPT_FILENAME);
       await writeFile(promptFile, promptContent, 'utf8');
 

diff --git a/packages/core/src/evaluation/providers/index.ts b/packages/core/src/evaluation/providers/index.ts
@@ -2,6 +2,7 @@ import { AnthropicProvider, AzureProvider, GeminiProvider } from './ai-sdk.js';
 import { CliProvider } from './cli.js';
 import { CodexProvider } from './codex.js';
 import { MockProvider } from './mock.js';
+import { PiCodingAgentProvider } from './pi-coding-agent.js';
 import type { ResolvedTarget } from './targets.js';
 import { resolveTargetDefinition } from './targets.js';
 import type { EnvLookup, Provider, TargetDefinition } from './types.js';
@@ -22,6 +23,7 @@ export type {
   CliResolvedConfig,
   GeminiResolvedConfig,
   MockResolvedConfig,
+  PiCodingAgentResolvedConfig,
   ResolvedTarget,
   VSCodeResolvedConfig,
 } from './targets.js';
@@ -34,6 +36,7 @@ export {
   type EnsureSubagentsResult,
 } from './vscode.js';
 export { consumeCodexLogEntries, subscribeToCodexLogEntries } from './codex-log-tracker.js';
+export { consumePiLogEntries, subscribeToPiLogEntries } from './pi-log-tracker.js';
 
 export function createProvider(target: ResolvedTarget): Provider {
   switch (target.kind) {
@@ -47,6 +50,8 @@ export function createProvider(target: ResolvedTarget): Provider {
       return new CliProvider(target.name, target.config);
     case 'codex':
       return new CodexProvider(target.name, target.config);
+    case 'pi-coding-agent':
+      return new PiCodingAgentProvider(target.name, target.config);
     case 'mock':
       return new MockProvider(target.name, target.config);
     case 'vscode':