diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 5ecbb3c..219fb61 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -17,6 +17,14 @@ jobs: - uses: actions/setup-node@v6 with: node-version: '24' + - name: Condfigure sandboxing + run: | + sudo apt-get update + sudo apt-get install --yes bubblewrap + sudo sysctl -w kernel.unprivileged_userns_clone=1 + if [ -f /proc/sys/kernel/apparmor_restrict_unprivileged_userns ]; then + sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 + fi - name: Install dependencies run: npm ci - name: Run e2e tests diff --git a/.gitignore b/.gitignore index 40830d6..2ea6f0d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ dist/ *.log .DS_Store .idea/ +tmp/ \ No newline at end of file diff --git a/src/__tests__/CodexACPAgent/e2e/acp-e2e-file-approval.test.ts b/src/__tests__/CodexACPAgent/e2e/acp-e2e-file-approval.test.ts index a5b639e..c2a6fee 100644 --- a/src/__tests__/CodexACPAgent/e2e/acp-e2e-file-approval.test.ts +++ b/src/__tests__/CodexACPAgent/e2e/acp-e2e-file-approval.test.ts @@ -1,57 +1,108 @@ import fs from "node:fs"; import path from "node:path"; -import {afterEach, beforeEach, expect, it} from "vitest"; +import {afterEach, beforeEach, expect, it, onTestFinished} from "vitest"; import {AgentMode} from "../../../AgentMode"; import {ApprovalOptionId} from "../../../ApprovalOptionId"; import { createAuthenticatedFixture, createPermissionResponder, describeE2E, + expectNoPermissionRequests, + expectPermissionRequests, + generateFileNameForTest, type SpawnedAgentFixture, } from "./acp-e2e-test-utils"; -const FILE_NAME = "approval-file.txt"; const FILE_CONTENT = "file approval e2e"; describeE2E("E2E file approval tests", () => { let fixture: SpawnedAgentFixture; - let sessionId: string; beforeEach(async () => { fixture = await createAuthenticatedFixture(AgentMode.ReadOnly); - sessionId = (await fixture.createSession()).sessionId; }); afterEach(async () => { await fixture.dispose(); }); - async function expectFileApproval( - optionId: ApprovalOptionId, - expectedStopReason: "end_turn" | "cancelled", - ): Promise { - fixture.setPermissionResponder(createPermissionResponder("edit", optionId)); - const response = await fixture.connection.prompt({ - sessionId, - prompt: [{ - type: "text", - text: `Create ${FILE_NAME} by editing files directly. Content must be exactly: ${FILE_CONTENT}. Do not use shell commands, and stop if the edit is rejected.`, - }], - }); - expect(response.stopReason).toBe(expectedStopReason); - expect(fixture.readPermissionRequests(sessionId, "edit").length).toBe(1); - expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(0); - } - it("applies approved file edits", async () => { - await expectFileApproval(ApprovalOptionId.AllowOnce, "end_turn"); - const filePath = path.join(fixture.workspaceDir, FILE_NAME); - expect(fs.existsSync(filePath)).toBe(true); - expect(fs.readFileSync(filePath, "utf8").trim()).toBe(FILE_CONTENT); + fixture.setPermissionResponder(createPermissionResponder("edit", ApprovalOptionId.AllowOnce)); + const sessionId = await editFileDirectly(fixture, path.join(fixture.workspaceDir, generateFileNameForTest()), true); + expectPermissionRequests(fixture, sessionId, {edit: 1, execute: 0}); }); it("does not apply rejected file edits", async () => { - await expectFileApproval(ApprovalOptionId.RejectOnce, "cancelled"); - expect(fs.existsSync(path.join(fixture.workspaceDir, FILE_NAME))).toBe(false); + fixture.setPermissionResponder(createPermissionResponder("edit", ApprovalOptionId.RejectOnce)); + const sessionId = await editFileDirectly(fixture, path.join(fixture.workspaceDir, generateFileNameForTest()), false); + expectPermissionRequests(fixture, sessionId, {edit: 1, execute: 0}); + }); +}); + +describeE2E("E2E Agent mode file permission tests", () => { + let fixture: SpawnedAgentFixture; + + beforeEach(async () => { + fixture = await createAuthenticatedFixture(AgentMode.Agent); + }); + + afterEach(async () => { + await fixture.dispose(); + }); + + it("edits a workspace file without prompting for permission", async () => { + const sessionId = await editFileDirectly(fixture, path.join(fixture.workspaceDir, generateFileNameForTest()), true); + expectNoPermissionRequests(fixture, sessionId); + }); + + it("can't edit file outside workspace", async () => { + const dir = createDirOutsideWorkspace(fixture); + await editFileDirectly(fixture, path.join(dir, generateFileNameForTest()), false); }); }); + +describeE2E("E2E Agent with full access file permission tests", () => { + let fixture: SpawnedAgentFixture; + + beforeEach(async () => { + fixture = await createAuthenticatedFixture(AgentMode.AgentFullAccess); + }); + + afterEach(async () => { + await fixture.dispose(); + }); + + it("edits a file outside workspace without prompting for permission", async () => { + const dir = createDirOutsideWorkspace(fixture); + const sessionId = await editFileDirectly(fixture, path.join(dir, generateFileNameForTest()), true); + expectNoPermissionRequests(fixture, sessionId); + }); +}); + +async function editFileDirectly( + fixture: SpawnedAgentFixture, + filePath: string, + expectSuccess: boolean, +): Promise { + const sessionId = (await fixture.createSession()).sessionId; + await fixture.connection.prompt({ + sessionId, + prompt: [{ + type: "text", + text: `Create ${filePath} by editing files directly. Content must be exactly: ${FILE_CONTENT}. Do not use shell commands.`, + }], + }); + if (expectSuccess) { + expect(fs.readFileSync(filePath, "utf8").trim()).toBe(FILE_CONTENT); + } else { + expect(fs.existsSync(filePath)).toBe(false); + } + return sessionId; +} + +function createDirOutsideWorkspace(fixture: SpawnedAgentFixture): string { + const outsideWorkspaceDir = path.join(path.dirname(fixture.workspaceDir), "outside-workspace"); + onTestFinished(() => fs.rmSync(outsideWorkspaceDir, {recursive: true, force: true})); + fs.mkdirSync(outsideWorkspaceDir); + return outsideWorkspaceDir; +} diff --git a/src/__tests__/CodexACPAgent/e2e/acp-e2e-session-persistence.test.ts b/src/__tests__/CodexACPAgent/e2e/acp-e2e-session-persistence.test.ts index c295380..c76ed05 100644 --- a/src/__tests__/CodexACPAgent/e2e/acp-e2e-session-persistence.test.ts +++ b/src/__tests__/CodexACPAgent/e2e/acp-e2e-session-persistence.test.ts @@ -1,5 +1,4 @@ import {afterEach, expect, it} from "vitest"; -import {AgentMode} from "../../../AgentMode"; import { createAuthenticatedFixture, describeE2E, diff --git a/src/__tests__/CodexACPAgent/e2e/acp-e2e-shell-approval.test.ts b/src/__tests__/CodexACPAgent/e2e/acp-e2e-shell-approval.test.ts index fedc87c..fdfcc55 100644 --- a/src/__tests__/CodexACPAgent/e2e/acp-e2e-shell-approval.test.ts +++ b/src/__tests__/CodexACPAgent/e2e/acp-e2e-shell-approval.test.ts @@ -1,14 +1,17 @@ import fs from "node:fs"; import path from "node:path"; -import {afterEach, beforeEach, expect, it, vi} from "vitest"; +import {afterEach, beforeEach, expect, it, onTestFinished, vi} from "vitest"; import {AgentMode} from "../../../AgentMode"; import {ApprovalOptionId} from "../../../ApprovalOptionId"; import { createAuthenticatedFixture, - createPermissionResponse, createPermissionResponder, + createPermissionResponse, describeE2E, expectEndTurn, + expectNoPermissionRequests, + expectPermissionRequests, + generateFileNameForTest, type SpawnedAgentFixture, } from "./acp-e2e-test-utils"; @@ -51,7 +54,7 @@ describeE2E("E2E shell approval tests", () => { await promptShellCommandTwice(); expect(fs.existsSync(path.join(fixture.workspaceDir, FIRST_FILE_NAME))).toBe(true); expect(fs.existsSync(path.join(fixture.workspaceDir, SECOND_FILE_NAME))).toBe(false); - expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(2); + expectPermissionRequests(fixture, sessionId, {execute: 2, edit: 0}); }); it("skips subsequent approvals when allow_always is selected", async () => { @@ -59,7 +62,7 @@ describeE2E("E2E shell approval tests", () => { await promptShellCommandTwice(); expect(fs.existsSync(path.join(fixture.workspaceDir, FIRST_FILE_NAME))).toBe(true); expect(fs.existsSync(path.join(fixture.workspaceDir, SECOND_FILE_NAME))).toBe(true); - expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(1); + expectPermissionRequests(fixture, sessionId, {execute: 1, edit: 0}); }); it("prompts for every command when reject_once is selected", async () => { @@ -67,7 +70,50 @@ describeE2E("E2E shell approval tests", () => { await promptShellCommandTwice(); expect(fs.existsSync(path.join(fixture.workspaceDir, FIRST_FILE_NAME))).toBe(false); expect(fs.existsSync(path.join(fixture.workspaceDir, SECOND_FILE_NAME))).toBe(false); - expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(2); + expectPermissionRequests(fixture, sessionId, {execute: 2, edit: 0}); + }); +}); + +describeE2E("E2E Agent mode shell permission tests", () => { + let fixture: SpawnedAgentFixture; + + beforeEach(async () => { + fixture = await createAuthenticatedFixture(AgentMode.Agent); + }); + + afterEach(async () => { + await fixture.dispose(); + }); + + it("runs a workspace command without prompting for permission", async () => { + const sessionId = await writeToFile(fixture, path.join(fixture.workspaceDir, generateFileNameForTest())); + + expectNoPermissionRequests(fixture, sessionId); + }); + + it("requests permission for a command that writes outside the workspace", async () => { + const dir = createDirOutsideWorkspace(fixture); + fixture.setPermissionResponder(createPermissionResponder("execute", ApprovalOptionId.AllowOnce)); + const sessionId = await writeToFile(fixture, path.join(dir, generateFileNameForTest())); + expectPermissionRequests(fixture, sessionId, {execute: 1, edit: 0}); + }); +}); + +describeE2E("E2E Agent with full access shell permission tests", () => { + let fixture: SpawnedAgentFixture; + + beforeEach(async () => { + fixture = await createAuthenticatedFixture(AgentMode.AgentFullAccess); + }); + + afterEach(async () => { + await fixture.dispose(); + }); + + it("runs a command outside workspace without prompting for permission", async () => { + const dir = createDirOutsideWorkspace(fixture); + const sessionId = await writeToFile(fixture, path.join(dir, generateFileNameForTest())); + expectNoPermissionRequests(fixture, sessionId); }); }); @@ -89,9 +135,7 @@ describeE2E("E2E shell cancellation tests", () => { } it("cancels a running shell command", async () => { - fixture = await createAuthenticatedFixture(); - fixture.setPermissionResponder(createPermissionResponder("execute", ApprovalOptionId.AllowOnce)); - + fixture = await createAuthenticatedFixture(AgentMode.AgentFullAccess); const sessionId = (await fixture.createSession()).sessionId; const pidFilePath = path.join(fixture.workspaceDir, "cancel-command.pid"); const command = `/bin/sh -c 'echo $$ > "${pidFilePath}"; exec sleep 100'`; @@ -116,3 +160,26 @@ describeE2E("E2E shell cancellation tests", () => { }, {timeout: 5_000}); }); }); + +async function writeToFile(fixture: SpawnedAgentFixture, filePath: string): Promise { + const content = "hello from e2e"; + const command = `printf '${content}' > '${filePath}'`; + const sessionId = (await fixture.createSession()).sessionId; + const response = await fixture.connection.prompt({ + sessionId, + prompt: [{ + type: "text", + text: `Use your shell tool to run exactly \`${command}\`. Do not modify files any other way.`, + }], + }); + expectEndTurn(response); + expect(fs.readFileSync(filePath, "utf8").trim()).toBe(content); + return sessionId; +} + +function createDirOutsideWorkspace(fixture: SpawnedAgentFixture): string { + const outsideWorkspaceDir = path.join(path.dirname(fixture.workspaceDir), "outside-workspace"); + onTestFinished(() => fs.rmSync(outsideWorkspaceDir, {recursive: true, force: true})); + fs.mkdirSync(outsideWorkspaceDir); + return outsideWorkspaceDir; +} diff --git a/src/__tests__/CodexACPAgent/e2e/acp-e2e-test-utils.ts b/src/__tests__/CodexACPAgent/e2e/acp-e2e-test-utils.ts index fad44b3..429e79d 100644 --- a/src/__tests__/CodexACPAgent/e2e/acp-e2e-test-utils.ts +++ b/src/__tests__/CodexACPAgent/e2e/acp-e2e-test-utils.ts @@ -26,6 +26,26 @@ export function expectEndTurn(response: acp.PromptResponse): void { expect(response.stopReason).toBe("end_turn"); } +export function expectCancelled(response: acp.PromptResponse): void { + expect(response.stopReason).toBe("cancelled"); +} + +export function generateFileNameForTest(): string { + return `test-file-${crypto.randomUUID()}.txt`; +} + +export function expectPermissionRequests(fixture: SpawnedAgentFixture, sessionId: string, requests: { + edit: number, + execute: number, +}): void { + expect(fixture.readPermissionRequests(sessionId, "edit").length).toBe(requests.edit); + expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(requests.execute); +} + +export function expectNoPermissionRequests(fixture: SpawnedAgentFixture, sessionId: string): void { + expectPermissionRequests(fixture, sessionId, { edit: 0, execute: 0 }); +} + export async function createAuthenticatedFixture(initialMode?: AgentMode): Promise { const apiKey = requireLiveApiKey(); const extraEnv = initialMode ? {INITIAL_AGENT_MODE: initialMode.id} : undefined; diff --git a/src/__tests__/CodexACPAgent/e2e/spawned-agent-fixture.ts b/src/__tests__/CodexACPAgent/e2e/spawned-agent-fixture.ts index 17935e4..217c112 100644 --- a/src/__tests__/CodexACPAgent/e2e/spawned-agent-fixture.ts +++ b/src/__tests__/CodexACPAgent/e2e/spawned-agent-fixture.ts @@ -1,7 +1,6 @@ import * as acp from "@agentclientprotocol/sdk"; import {type ChildProcessWithoutNullStreams, spawn} from "node:child_process"; import fs from "node:fs"; -import os from "node:os"; import path from "node:path"; import {Readable, Writable} from "node:stream"; import {expect, vi} from "vitest"; @@ -80,9 +79,9 @@ class RuntimePaths { } static createTemporary(): RuntimePaths { - const rootDir = fs.mkdtempSync(path.join(os.tmpdir(), "codex-acp-integration-")); + const rootDir = path.join(process.cwd(), "tmp", crypto.randomUUID()); const paths = new RuntimePaths(rootDir); - for (const dir of [paths.codexHome, paths.workspaceDir, paths.appServerLogsDir]) { + for (const dir of [paths.rootDir, paths.codexHome, paths.workspaceDir, paths.appServerLogsDir]) { fs.mkdirSync(dir, {recursive: true}); } writeCodexHomeConfig(paths.codexHome, {