Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/controller/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,22 @@ const validateAndFixJavaScript = (code: string): string => {
(_match, selector: string) => `.matches(\`${selector}\`)`
);

// Auto-wrap bare async function expressions in IIFE so they actually
// execute. The LLM commonly emits `async () => { ... }` (or
// `async function () { ... }`) without trailing `()`, which evaluates
// to a function reference and silently does nothing — this is the
// root cause behind credential-extraction failures where
// `clipboard.writeText` etc. never runs. Detect that the trimmed code
// is exactly one such expression with no trailing call and wrap it.
const trimmed = fixedCode.trim();
const startsAsyncArrow = /^async\s*(?:\([^)]*\)|[A-Za-z_$][\w$]*)\s*=>/.test(
trimmed
);
const startsAsyncFn = /^async\s+function\b/.test(trimmed);
if ((startsAsyncArrow || startsAsyncFn) && !/\)\s*;?\s*$/.test(trimmed)) {
fixedCode = `(${trimmed})()`;
}

return fixedCode;
};

Expand Down Expand Up @@ -2591,6 +2607,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
});

type EvaluateAction = z.infer<typeof EvaluateActionSchema>;
const evaluateLogger = this.logger;
this.registry.action(
'Execute browser JavaScript on the current page and return the result.',
{ param_model: EvaluateActionSchema }
Expand All @@ -2606,6 +2623,11 @@ You will be given a query and the markdown of a webpage that has been filtered t
}

const validatedCode = validateAndFixJavaScript(params.code);
if (validatedCode !== params.code) {
evaluateLogger.info(
`🛠 evaluate: auto-fixed input (len ${params.code.length} → ${validatedCode.length})`
);
}

const payload = (await page.evaluate(
async ({ code }: { code: string }) => {
Expand Down Expand Up @@ -2671,6 +2693,18 @@ You will be given a query and the markdown of a webpage that has been filtered t
rendered = `${rendered.slice(0, maxChars - 50)}\n... [Truncated after 20000 characters]`;
}

// Log the evaluate return value so callers (and trace consumers) can
// see whether the executed code actually produced a result. Truncate
// aggressively — the agent already has the full value via
// extracted_content; this log is for debugging the "code ran but did
// nothing" failure mode (e.g. unwrapped async fn expressions).
const previewMax = 500;
const preview =
rendered.length > previewMax
? `${rendered.slice(0, previewMax)}... [+${rendered.length - previewMax} chars]`
: rendered;
evaluateLogger.info(`🧪 evaluate result (${rendered.length} chars): ${preview}`);

const maxMemoryChars = 10000;
const includeExtractedContentOnlyOnce = rendered.length >= maxMemoryChars;
const longTermMemory = includeExtractedContentOnlyOnce
Expand Down
53 changes: 53 additions & 0 deletions test/controller.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2308,6 +2308,59 @@ describe('Regression Coverage', () => {
expect(result.error).toBeNull();
});

describe('evaluate auto-wraps bare function expressions in IIFE', () => {
const buildPage = () => ({
evaluate: vi.fn(async (_handler: unknown, args: { code: string }) => ({
ok: true,
result: args.code,
})),
url: vi.fn(() => 'https://example.com'),
});

const runEvaluate = async (code: string) => {
const controller = new Controller();
const page = buildPage();
const browserSession = {
get_current_page: vi.fn(async () => page),
};
const result = await controller.registry.execute_action(
'evaluate',
{ code },
{ browser_session: browserSession as any }
);
const evaluatedCode = (page.evaluate.mock.calls[0]?.[1] as {
code: string;
})?.code;
return { result, evaluatedCode };
};

it('wraps a bare async arrow expression in an IIFE', async () => {
const { result, evaluatedCode } = await runEvaluate(
`async () => {\n return { username: document.querySelector('input[type=email]')?.value };\n}`
);
expect(result.error).toBeNull();
expect(evaluatedCode?.startsWith('(async () => {')).toBe(true);
expect(evaluatedCode?.endsWith('})()')).toBe(true);
});

it('wraps a bare async function expression in an IIFE', async () => {
const { evaluatedCode } = await runEvaluate(
`async function () { return 1; }`
);
expect(evaluatedCode).toBe(`(async function () { return 1; })()`);
});

it('leaves a non-function expression untouched', async () => {
const { evaluatedCode } = await runEvaluate(`document.title`);
expect(evaluatedCode).toBe('document.title');
});

it('does not double-wrap an existing IIFE', async () => {
const { evaluatedCode } = await runEvaluate(`(async () => 1)()`);
expect(evaluatedCode).toBe(`(async () => 1)()`);
});
});

it('evaluate returns action error on JavaScript failure', async () => {
const controller = new Controller();
const page = {
Expand Down