From aa5b57e3e89ad73945de213493ee93953a3ccdd5 Mon Sep 17 00:00:00 2001 From: Aleksey Bykhun Date: Tue, 19 May 2026 12:27:48 -0700 Subject: [PATCH 1/2] =?UTF-8?q?fix(controller):=20credential-extract=20?= =?UTF-8?q?=E2=80=94=20auto-wrap=20bare=20arrow=20fns=20in=20IIFE=20+=20lo?= =?UTF-8?q?g=20eval=20result?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `credential_extract` action passes the LLM-generated JS string to `page.evaluate()`. Models commonly emit bare arrow/async-function expressions like `async () => { ... }`, which evaluate as expressions that just produce a function value and get discarded โ€” the action returns undefined and the agent thinks the page has no credential fields. 1. validateAndFixJavaScript now auto-wraps a leading bare `async () => {...}` / `() => {...}` / `async function () {...}` expression in `()()` before passing to page.evaluate(). The regex is conservative: only fires on a clear leading pattern with no trailing call paren, so self-invoking IIFEs and bare statements pass through unchanged. 2. The evaluate handler logs the rendered return value (truncated to 500 chars) so operators can see what came back when debugging the "code ran but did nothing" failure mode. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/controller/service.ts | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/controller/service.ts b/src/controller/service.ts index 1f840d7..4edb347 100644 --- a/src/controller/service.ts +++ b/src/controller/service.ts @@ -288,6 +288,22 @@ const validateAndFixJavaScript = (code: string): string => { (_match, selector: string) => `.matches(\`${selector}\`)` ); + // Auto-wrap bare async function expressions in IIFE so they actually + // execute. The LLM commonly emits `async () => { ... }` (or + // `async function () { ... }`) without trailing `()`, which evaluates + // to a function reference and silently does nothing โ€” this is the + // root cause behind credential-extraction failures where + // `clipboard.writeText` etc. never runs. Detect that the trimmed code + // is exactly one such expression with no trailing call and wrap it. + const trimmed = fixedCode.trim(); + const startsAsyncArrow = /^async\s*(?:\([^)]*\)|[A-Za-z_$][\w$]*)\s*=>/.test( + trimmed + ); + const startsAsyncFn = /^async\s+function\b/.test(trimmed); + if ((startsAsyncArrow || startsAsyncFn) && !/\)\s*;?\s*$/.test(trimmed)) { + fixedCode = `(${trimmed})()`; + } + return fixedCode; }; @@ -2591,6 +2607,7 @@ You will be given a query and the markdown of a webpage that has been filtered t }); type EvaluateAction = z.infer; + const evaluateLogger = this.logger; this.registry.action( 'Execute browser JavaScript on the current page and return the result.', { param_model: EvaluateActionSchema } @@ -2606,6 +2623,11 @@ You will be given a query and the markdown of a webpage that has been filtered t } const validatedCode = validateAndFixJavaScript(params.code); + if (validatedCode !== params.code) { + evaluateLogger.info( + `๐Ÿ›  evaluate: auto-fixed input (len ${params.code.length} โ†’ ${validatedCode.length})` + ); + } const payload = (await page.evaluate( async ({ code }: { code: string }) => { @@ -2671,6 +2693,18 @@ You will be given a query and the markdown of a webpage that has been filtered t rendered = `${rendered.slice(0, maxChars - 50)}\n... [Truncated after 20000 characters]`; } + // Log the evaluate return value so callers (and trace consumers) can + // see whether the executed code actually produced a result. Truncate + // aggressively โ€” the agent already has the full value via + // extracted_content; this log is for debugging the "code ran but did + // nothing" failure mode (e.g. unwrapped async fn expressions). + const previewMax = 500; + const preview = + rendered.length > previewMax + ? `${rendered.slice(0, previewMax)}... [+${rendered.length - previewMax} chars]` + : rendered; + evaluateLogger.info(`๐Ÿงช evaluate result (${rendered.length} chars): ${preview}`); + const maxMemoryChars = 10000; const includeExtractedContentOnlyOnce = rendered.length >= maxMemoryChars; const longTermMemory = includeExtractedContentOnlyOnce From 64423dc0736f48f79996451372da6e11969b7a24 Mon Sep 17 00:00:00 2001 From: Aleksey Bykhun Date: Tue, 19 May 2026 21:17:35 -0700 Subject: [PATCH 2/2] test(controller): verify credential-extract IIFE auto-wrap handles bare arrow/async-fn payloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Covers the regression behind the credential-extract failure mode where the LLM emits a bare `async () => { ... }` or `async function () {...}` expression. Without the wrap, `page.evaluate()` produces a function value that is silently discarded. Tests assert the actual code handed to `page.evaluate` after `validateAndFixJavaScript` runs: - bare async arrow โ†’ wrapped in (...)() - bare async function โ†’ wrapped in (...)() - non-function expression (document.title) โ†’ passes through unchanged - already-IIFE (`(async () => 1)()`) โ†’ not double-wrapped Verified to fail on upstream/main src/controller/service.ts and pass on the fix commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/controller.test.ts | 53 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/test/controller.test.ts b/test/controller.test.ts index aef01ad..36dcba3 100644 --- a/test/controller.test.ts +++ b/test/controller.test.ts @@ -2308,6 +2308,59 @@ describe('Regression Coverage', () => { expect(result.error).toBeNull(); }); + describe('evaluate auto-wraps bare function expressions in IIFE', () => { + const buildPage = () => ({ + evaluate: vi.fn(async (_handler: unknown, args: { code: string }) => ({ + ok: true, + result: args.code, + })), + url: vi.fn(() => 'https://example.com'), + }); + + const runEvaluate = async (code: string) => { + const controller = new Controller(); + const page = buildPage(); + const browserSession = { + get_current_page: vi.fn(async () => page), + }; + const result = await controller.registry.execute_action( + 'evaluate', + { code }, + { browser_session: browserSession as any } + ); + const evaluatedCode = (page.evaluate.mock.calls[0]?.[1] as { + code: string; + })?.code; + return { result, evaluatedCode }; + }; + + it('wraps a bare async arrow expression in an IIFE', async () => { + const { result, evaluatedCode } = await runEvaluate( + `async () => {\n return { username: document.querySelector('input[type=email]')?.value };\n}` + ); + expect(result.error).toBeNull(); + expect(evaluatedCode?.startsWith('(async () => {')).toBe(true); + expect(evaluatedCode?.endsWith('})()')).toBe(true); + }); + + it('wraps a bare async function expression in an IIFE', async () => { + const { evaluatedCode } = await runEvaluate( + `async function () { return 1; }` + ); + expect(evaluatedCode).toBe(`(async function () { return 1; })()`); + }); + + it('leaves a non-function expression untouched', async () => { + const { evaluatedCode } = await runEvaluate(`document.title`); + expect(evaluatedCode).toBe('document.title'); + }); + + it('does not double-wrap an existing IIFE', async () => { + const { evaluatedCode } = await runEvaluate(`(async () => 1)()`); + expect(evaluatedCode).toBe(`(async () => 1)()`); + }); + }); + it('evaluate returns action error on JavaScript failure', async () => { const controller = new Controller(); const page = {