Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ jobs:
- uses: actions/setup-node@v6
with:
node-version: '24'
- name: Condfigure sandboxing
run: |
sudo apt-get update
sudo apt-get install --yes bubblewrap
sudo sysctl -w kernel.unprivileged_userns_clone=1
if [ -f /proc/sys/kernel/apparmor_restrict_unprivileged_userns ]; then
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
fi
- name: Install dependencies
run: npm ci
- name: Run e2e tests
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ dist/
*.log
.DS_Store
.idea/
tmp/
105 changes: 78 additions & 27 deletions src/__tests__/CodexACPAgent/e2e/acp-e2e-file-approval.test.ts
Original file line number Diff line number Diff line change
@@ -1,57 +1,108 @@
import fs from "node:fs";
import path from "node:path";
import {afterEach, beforeEach, expect, it} from "vitest";
import {afterEach, beforeEach, expect, it, onTestFinished} from "vitest";
import {AgentMode} from "../../../AgentMode";
import {ApprovalOptionId} from "../../../ApprovalOptionId";
import {
createAuthenticatedFixture,
createPermissionResponder,
describeE2E,
expectNoPermissionRequests,
expectPermissionRequests,
generateFileNameForTest,
type SpawnedAgentFixture,
} from "./acp-e2e-test-utils";

const FILE_NAME = "approval-file.txt";
const FILE_CONTENT = "file approval e2e";

describeE2E("E2E file approval tests", () => {
let fixture: SpawnedAgentFixture;
let sessionId: string;

beforeEach(async () => {
fixture = await createAuthenticatedFixture(AgentMode.ReadOnly);
sessionId = (await fixture.createSession()).sessionId;
});

afterEach(async () => {
await fixture.dispose();
});

async function expectFileApproval(
optionId: ApprovalOptionId,
expectedStopReason: "end_turn" | "cancelled",
): Promise<void> {
fixture.setPermissionResponder(createPermissionResponder("edit", optionId));
const response = await fixture.connection.prompt({
sessionId,
prompt: [{
type: "text",
text: `Create ${FILE_NAME} by editing files directly. Content must be exactly: ${FILE_CONTENT}. Do not use shell commands, and stop if the edit is rejected.`,
}],
});
expect(response.stopReason).toBe(expectedStopReason);
expect(fixture.readPermissionRequests(sessionId, "edit").length).toBe(1);
expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(0);
}

it("applies approved file edits", async () => {
await expectFileApproval(ApprovalOptionId.AllowOnce, "end_turn");
const filePath = path.join(fixture.workspaceDir, FILE_NAME);
expect(fs.existsSync(filePath)).toBe(true);
expect(fs.readFileSync(filePath, "utf8").trim()).toBe(FILE_CONTENT);
fixture.setPermissionResponder(createPermissionResponder("edit", ApprovalOptionId.AllowOnce));
const sessionId = await editFileDirectly(fixture, path.join(fixture.workspaceDir, generateFileNameForTest()), true);
expectPermissionRequests(fixture, sessionId, {edit: 1, execute: 0});
});

it("does not apply rejected file edits", async () => {
await expectFileApproval(ApprovalOptionId.RejectOnce, "cancelled");
expect(fs.existsSync(path.join(fixture.workspaceDir, FILE_NAME))).toBe(false);
fixture.setPermissionResponder(createPermissionResponder("edit", ApprovalOptionId.RejectOnce));
const sessionId = await editFileDirectly(fixture, path.join(fixture.workspaceDir, generateFileNameForTest()), false);
expectPermissionRequests(fixture, sessionId, {edit: 1, execute: 0});
});
});

describeE2E("E2E Agent mode file permission tests", () => {
let fixture: SpawnedAgentFixture;

beforeEach(async () => {
fixture = await createAuthenticatedFixture(AgentMode.Agent);
});

afterEach(async () => {
await fixture.dispose();
});

it("edits a workspace file without prompting for permission", async () => {
const sessionId = await editFileDirectly(fixture, path.join(fixture.workspaceDir, generateFileNameForTest()), true);
expectNoPermissionRequests(fixture, sessionId);
});

it("can't edit file outside workspace", async () => {
const dir = createDirOutsideWorkspace(fixture);
await editFileDirectly(fixture, path.join(dir, generateFileNameForTest()), false);
});
});

describeE2E("E2E Agent with full access file permission tests", () => {
let fixture: SpawnedAgentFixture;

beforeEach(async () => {
fixture = await createAuthenticatedFixture(AgentMode.AgentFullAccess);
});

afterEach(async () => {
await fixture.dispose();
});

it("edits a file outside workspace without prompting for permission", async () => {
const dir = createDirOutsideWorkspace(fixture);
const sessionId = await editFileDirectly(fixture, path.join(dir, generateFileNameForTest()), true);
expectNoPermissionRequests(fixture, sessionId);
});
});

async function editFileDirectly(
fixture: SpawnedAgentFixture,
filePath: string,
expectSuccess: boolean,
): Promise<string> {
const sessionId = (await fixture.createSession()).sessionId;
await fixture.connection.prompt({
sessionId,
prompt: [{
type: "text",
text: `Create ${filePath} by editing files directly. Content must be exactly: ${FILE_CONTENT}. Do not use shell commands.`,
}],
});
if (expectSuccess) {
expect(fs.readFileSync(filePath, "utf8").trim()).toBe(FILE_CONTENT);
} else {
expect(fs.existsSync(filePath)).toBe(false);
}
return sessionId;
}

function createDirOutsideWorkspace(fixture: SpawnedAgentFixture): string {
const outsideWorkspaceDir = path.join(path.dirname(fixture.workspaceDir), "outside-workspace");
onTestFinished(() => fs.rmSync(outsideWorkspaceDir, {recursive: true, force: true}));
fs.mkdirSync(outsideWorkspaceDir);
return outsideWorkspaceDir;
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import {afterEach, expect, it} from "vitest";
import {AgentMode} from "../../../AgentMode";
import {
createAuthenticatedFixture,
describeE2E,
Expand Down
83 changes: 75 additions & 8 deletions src/__tests__/CodexACPAgent/e2e/acp-e2e-shell-approval.test.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import fs from "node:fs";
import path from "node:path";
import {afterEach, beforeEach, expect, it, vi} from "vitest";
import {afterEach, beforeEach, expect, it, onTestFinished, vi} from "vitest";
import {AgentMode} from "../../../AgentMode";
import {ApprovalOptionId} from "../../../ApprovalOptionId";
import {
createAuthenticatedFixture,
createPermissionResponse,
createPermissionResponder,
createPermissionResponse,
describeE2E,
expectEndTurn,
expectNoPermissionRequests,
expectPermissionRequests,
generateFileNameForTest,
type SpawnedAgentFixture,
} from "./acp-e2e-test-utils";

Expand Down Expand Up @@ -51,23 +54,66 @@ describeE2E("E2E shell approval tests", () => {
await promptShellCommandTwice();
expect(fs.existsSync(path.join(fixture.workspaceDir, FIRST_FILE_NAME))).toBe(true);
expect(fs.existsSync(path.join(fixture.workspaceDir, SECOND_FILE_NAME))).toBe(false);
expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(2);
expectPermissionRequests(fixture, sessionId, {execute: 2, edit: 0});
});

it("skips subsequent approvals when allow_always is selected", async () => {
fixture.setPermissionResponder(createPermissionResponder("execute", ApprovalOptionId.AllowAlways));
await promptShellCommandTwice();
expect(fs.existsSync(path.join(fixture.workspaceDir, FIRST_FILE_NAME))).toBe(true);
expect(fs.existsSync(path.join(fixture.workspaceDir, SECOND_FILE_NAME))).toBe(true);
expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(1);
expectPermissionRequests(fixture, sessionId, {execute: 1, edit: 0});
});

it("prompts for every command when reject_once is selected", async () => {
fixture.setPermissionResponder(createPermissionResponder("execute", ApprovalOptionId.RejectOnce));
await promptShellCommandTwice();
expect(fs.existsSync(path.join(fixture.workspaceDir, FIRST_FILE_NAME))).toBe(false);
expect(fs.existsSync(path.join(fixture.workspaceDir, SECOND_FILE_NAME))).toBe(false);
expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(2);
expectPermissionRequests(fixture, sessionId, {execute: 2, edit: 0});
});
});

describeE2E("E2E Agent mode shell permission tests", () => {
let fixture: SpawnedAgentFixture;

beforeEach(async () => {
fixture = await createAuthenticatedFixture(AgentMode.Agent);
});

afterEach(async () => {
await fixture.dispose();
});

it("runs a workspace command without prompting for permission", async () => {
const sessionId = await writeToFile(fixture, path.join(fixture.workspaceDir, generateFileNameForTest()));

expectNoPermissionRequests(fixture, sessionId);
});

it("requests permission for a command that writes outside the workspace", async () => {
const dir = createDirOutsideWorkspace(fixture);
fixture.setPermissionResponder(createPermissionResponder("execute", ApprovalOptionId.AllowOnce));
const sessionId = await writeToFile(fixture, path.join(dir, generateFileNameForTest()));
expectPermissionRequests(fixture, sessionId, {execute: 1, edit: 0});
});
});

describeE2E("E2E Agent with full access shell permission tests", () => {
let fixture: SpawnedAgentFixture;

beforeEach(async () => {
fixture = await createAuthenticatedFixture(AgentMode.AgentFullAccess);
});

afterEach(async () => {
await fixture.dispose();
});

it("runs a command outside workspace without prompting for permission", async () => {
const dir = createDirOutsideWorkspace(fixture);
const sessionId = await writeToFile(fixture, path.join(dir, generateFileNameForTest()));
expectNoPermissionRequests(fixture, sessionId);
});
});

Expand All @@ -89,9 +135,7 @@ describeE2E("E2E shell cancellation tests", () => {
}

it("cancels a running shell command", async () => {
fixture = await createAuthenticatedFixture();
fixture.setPermissionResponder(createPermissionResponder("execute", ApprovalOptionId.AllowOnce));

fixture = await createAuthenticatedFixture(AgentMode.AgentFullAccess);
const sessionId = (await fixture.createSession()).sessionId;
const pidFilePath = path.join(fixture.workspaceDir, "cancel-command.pid");
const command = `/bin/sh -c 'echo $$ > "${pidFilePath}"; exec sleep 100'`;
Expand All @@ -116,3 +160,26 @@ describeE2E("E2E shell cancellation tests", () => {
}, {timeout: 5_000});
});
});

async function writeToFile(fixture: SpawnedAgentFixture, filePath: string): Promise<string> {
const content = "hello from e2e";
const command = `printf '${content}' > '${filePath}'`;
const sessionId = (await fixture.createSession()).sessionId;
const response = await fixture.connection.prompt({
sessionId,
prompt: [{
type: "text",
text: `Use your shell tool to run exactly \`${command}\`. Do not modify files any other way.`,
}],
});
expectEndTurn(response);
expect(fs.readFileSync(filePath, "utf8").trim()).toBe(content);
return sessionId;
}

function createDirOutsideWorkspace(fixture: SpawnedAgentFixture): string {
const outsideWorkspaceDir = path.join(path.dirname(fixture.workspaceDir), "outside-workspace");
onTestFinished(() => fs.rmSync(outsideWorkspaceDir, {recursive: true, force: true}));
fs.mkdirSync(outsideWorkspaceDir);
return outsideWorkspaceDir;
}
20 changes: 20 additions & 0 deletions src/__tests__/CodexACPAgent/e2e/acp-e2e-test-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,26 @@ export function expectEndTurn(response: acp.PromptResponse): void {
expect(response.stopReason).toBe("end_turn");
}

export function expectCancelled(response: acp.PromptResponse): void {
expect(response.stopReason).toBe("cancelled");
}

export function generateFileNameForTest(): string {
return `test-file-${crypto.randomUUID()}.txt`;
}

export function expectPermissionRequests(fixture: SpawnedAgentFixture, sessionId: string, requests: {
edit: number,
execute: number,
}): void {
expect(fixture.readPermissionRequests(sessionId, "edit").length).toBe(requests.edit);
expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(requests.execute);
}

export function expectNoPermissionRequests(fixture: SpawnedAgentFixture, sessionId: string): void {
expectPermissionRequests(fixture, sessionId, { edit: 0, execute: 0 });
}

export async function createAuthenticatedFixture(initialMode?: AgentMode): Promise<SpawnedAgentFixture> {
const apiKey = requireLiveApiKey();
const extraEnv = initialMode ? {INITIAL_AGENT_MODE: initialMode.id} : undefined;
Expand Down
5 changes: 2 additions & 3 deletions src/__tests__/CodexACPAgent/e2e/spawned-agent-fixture.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import * as acp from "@agentclientprotocol/sdk";
import {type ChildProcessWithoutNullStreams, spawn} from "node:child_process";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import {Readable, Writable} from "node:stream";
import {expect, vi} from "vitest";
Expand Down Expand Up @@ -80,9 +79,9 @@ class RuntimePaths {
}

static createTemporary(): RuntimePaths {
const rootDir = fs.mkdtempSync(path.join(os.tmpdir(), "codex-acp-integration-"));
const rootDir = path.join(process.cwd(), "tmp", crypto.randomUUID());
const paths = new RuntimePaths(rootDir);
for (const dir of [paths.codexHome, paths.workspaceDir, paths.appServerLogsDir]) {
for (const dir of [paths.rootDir, paths.codexHome, paths.workspaceDir, paths.appServerLogsDir]) {
fs.mkdirSync(dir, {recursive: true});
}
writeCodexHomeConfig(paths.codexHome, {
Expand Down
Loading