diff --git a/src/controller/service.ts b/src/controller/service.ts index 1f840d7..4edb347 100644 --- a/src/controller/service.ts +++ b/src/controller/service.ts @@ -288,6 +288,22 @@ const validateAndFixJavaScript = (code: string): string => { (_match, selector: string) => `.matches(\`${selector}\`)` ); + // Auto-wrap bare async function expressions in IIFE so they actually + // execute. The LLM commonly emits `async () => { ... }` (or + // `async function () { ... }`) without trailing `()`, which evaluates + // to a function reference and silently does nothing โ€” this is the + // root cause behind credential-extraction failures where + // `clipboard.writeText` etc. never runs. Detect that the trimmed code + // is exactly one such expression with no trailing call and wrap it. + const trimmed = fixedCode.trim(); + const startsAsyncArrow = /^async\s*(?:\([^)]*\)|[A-Za-z_$][\w$]*)\s*=>/.test( + trimmed + ); + const startsAsyncFn = /^async\s+function\b/.test(trimmed); + if ((startsAsyncArrow || startsAsyncFn) && !/\)\s*;?\s*$/.test(trimmed)) { + fixedCode = `(${trimmed})()`; + } + return fixedCode; }; @@ -2591,6 +2607,7 @@ You will be given a query and the markdown of a webpage that has been filtered t }); type EvaluateAction = z.infer; + const evaluateLogger = this.logger; this.registry.action( 'Execute browser JavaScript on the current page and return the result.', { param_model: EvaluateActionSchema } @@ -2606,6 +2623,11 @@ You will be given a query and the markdown of a webpage that has been filtered t } const validatedCode = validateAndFixJavaScript(params.code); + if (validatedCode !== params.code) { + evaluateLogger.info( + `๐Ÿ›  evaluate: auto-fixed input (len ${params.code.length} โ†’ ${validatedCode.length})` + ); + } const payload = (await page.evaluate( async ({ code }: { code: string }) => { @@ -2671,6 +2693,18 @@ You will be given a query and the markdown of a webpage that has been filtered t rendered = `${rendered.slice(0, maxChars - 50)}\n... [Truncated after 20000 characters]`; } + // Log the evaluate return value so callers (and trace consumers) can + // see whether the executed code actually produced a result. Truncate + // aggressively โ€” the agent already has the full value via + // extracted_content; this log is for debugging the "code ran but did + // nothing" failure mode (e.g. unwrapped async fn expressions). + const previewMax = 500; + const preview = + rendered.length > previewMax + ? `${rendered.slice(0, previewMax)}... [+${rendered.length - previewMax} chars]` + : rendered; + evaluateLogger.info(`๐Ÿงช evaluate result (${rendered.length} chars): ${preview}`); + const maxMemoryChars = 10000; const includeExtractedContentOnlyOnce = rendered.length >= maxMemoryChars; const longTermMemory = includeExtractedContentOnlyOnce diff --git a/test/controller.test.ts b/test/controller.test.ts index aef01ad..36dcba3 100644 --- a/test/controller.test.ts +++ b/test/controller.test.ts @@ -2308,6 +2308,59 @@ describe('Regression Coverage', () => { expect(result.error).toBeNull(); }); + describe('evaluate auto-wraps bare function expressions in IIFE', () => { + const buildPage = () => ({ + evaluate: vi.fn(async (_handler: unknown, args: { code: string }) => ({ + ok: true, + result: args.code, + })), + url: vi.fn(() => 'https://example.com'), + }); + + const runEvaluate = async (code: string) => { + const controller = new Controller(); + const page = buildPage(); + const browserSession = { + get_current_page: vi.fn(async () => page), + }; + const result = await controller.registry.execute_action( + 'evaluate', + { code }, + { browser_session: browserSession as any } + ); + const evaluatedCode = (page.evaluate.mock.calls[0]?.[1] as { + code: string; + })?.code; + return { result, evaluatedCode }; + }; + + it('wraps a bare async arrow expression in an IIFE', async () => { + const { result, evaluatedCode } = await runEvaluate( + `async () => {\n return { username: document.querySelector('input[type=email]')?.value };\n}` + ); + expect(result.error).toBeNull(); + expect(evaluatedCode?.startsWith('(async () => {')).toBe(true); + expect(evaluatedCode?.endsWith('})()')).toBe(true); + }); + + it('wraps a bare async function expression in an IIFE', async () => { + const { evaluatedCode } = await runEvaluate( + `async function () { return 1; }` + ); + expect(evaluatedCode).toBe(`(async function () { return 1; })()`); + }); + + it('leaves a non-function expression untouched', async () => { + const { evaluatedCode } = await runEvaluate(`document.title`); + expect(evaluatedCode).toBe('document.title'); + }); + + it('does not double-wrap an existing IIFE', async () => { + const { evaluatedCode } = await runEvaluate(`(async () => 1)()`); + expect(evaluatedCode).toBe(`(async () => 1)()`); + }); + }); + it('evaluate returns action error on JavaScript failure', async () => { const controller = new Controller(); const page = {