diff --git a/src/agents/planner-executor/plan-utils.ts b/src/agents/planner-executor/plan-utils.ts index 5d1c3ffe..b74ed78b 100644 --- a/src/agents/planner-executor/plan-utils.ts +++ b/src/agents/planner-executor/plan-utils.ts @@ -31,6 +31,11 @@ export function parseAction(text: string): ParsedAction { // Strip ... tags (Qwen/DeepSeek reasoning output) cleaned = cleaned.replace(/[\s\S]*?<\/think>/gi, '').trim(); + // Some local models leak reasoning without the opening tag but still close it before the answer. + const closingThinkIndex = cleaned.toLowerCase().lastIndexOf(''); + if (closingThinkIndex !== -1) { + cleaned = cleaned.slice(closingThinkIndex + ''.length).trim(); + } // If never closed, strip from first to end cleaned = cleaned.replace(/[\s\S]*$/gi, '').trim(); diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts index bbf456d7..1465add8 100644 --- a/src/agents/planner-executor/planner-executor-agent.ts +++ b/src/agents/planner-executor/planner-executor-agent.ts @@ -1063,6 +1063,14 @@ export class PlannerExecutorAgent { finalOutcome.status === StepStatus.SKIPPED || finalOutcome.status === StepStatus.VISION_FALLBACK ) { + if ( + !success && + finalOutcome.status === StepStatus.SUCCESS && + (await this.isCartAdditionTerminal(runtime, task, plannerAction)) + ) { + success = true; + } + if (this.recoveryState && this.config.recovery.trackSuccessfulUrls && urlAfter) { this.recoveryState.recordCheckpoint({ url: urlAfter, @@ -1084,6 +1092,10 @@ export class PlannerExecutorAgent { break; } + if (success) { + break; + } + if (shouldContinue) { continue; } @@ -2254,6 +2266,60 @@ export class PlannerExecutorAgent { return false; } + private async isCartAdditionTerminal( + runtime: AgentRuntime, + task: string, + plannerAction: StepwisePlannerResponse + ): Promise { + const taskText = task.toLowerCase(); + if ( + !/\badd(?:ed)?\b[\s\S]*\bcart\b|\bcart[_\s-]?addition\b/.test(taskText) || + /\bcheckout\b|\bcheck out\b|\bpayment\b|\bplace order\b|\bbuy now\b/.test(taskText) + ) { + return false; + } + + const actionText = [ + plannerAction.intent, + plannerAction.input, + plannerAction.goal, + plannerAction.action, + ] + .filter((value): value is string => typeof value === 'string') + .join(' ') + .toLowerCase() + .replace(/[_-]+/g, ' '); + + if (!/\badd(?:ed)?\b[\s\S]*\bcart\b|\bcart contains\b/.test(actionText)) { + return false; + } + + try { + const snap = await runtime.snapshot({ + limit: this.config.snapshot.limitBase, + screenshot: false, + goal: 'cart addition confirmation', + }); + if (!snap) { + return false; + } + + return (snap.elements || []).some(element => { + const label = [element.text, element.ariaLabel, element.name] + .filter((value): value is string => typeof value === 'string') + .join(' ') + .toLowerCase(); + return ( + /\badded to (?:cart|bag|basket)\b/.test(label) || + /\bcart contains\s+[1-9]\d*\s+items?\b/.test(label) || + /\b[1-9]\d*\s+items?\s+in (?:your )?(?:cart|bag|basket)\b/.test(label) + ); + }); + } catch { + return false; + } + } + private async attemptRecovery(runtime: AgentRuntime): Promise { if (!this.recoveryState) { return false; diff --git a/src/utils/trace-file-manager.ts b/src/utils/trace-file-manager.ts index 9265ffbd..fd9dfb98 100644 --- a/src/utils/trace-file-manager.ts +++ b/src/utils/trace-file-manager.ts @@ -104,21 +104,43 @@ export class TraceFileManager { return; } - stream.end(() => { - resolve(); - }); - - stream.once('error', error => { - reject(error); - }); - - // Timeout after 5 seconds - setTimeout(() => { - if (!stream.destroyed) { + let settled = false; + const timeout = setTimeout(() => { + if (!settled) { + settled = true; stream.destroy(); resolve(); } }, 5000); + timeout.unref?.(); + + const cleanup = () => { + clearTimeout(timeout); + stream.removeListener('error', onError); + stream.removeListener('close', onClose); + }; + + const onClose = () => { + if (settled) { + return; + } + settled = true; + cleanup(); + resolve(); + }; + + const onError = (error: Error) => { + if (settled) { + return; + } + settled = true; + cleanup(); + reject(error); + }; + + stream.once('close', onClose); + stream.once('error', onError); + stream.end(); }); } diff --git a/tests/actions.test.ts b/tests/actions.test.ts index a992c05b..dc844bf4 100644 --- a/tests/actions.test.ts +++ b/tests/actions.test.ts @@ -183,7 +183,7 @@ describe('Actions', () => { await page.goto('https://example.com'); await page.waitForLoadState('networkidle', { timeout: 10000 }); - patchSearchEnginePages(page); + await patchSearchEnginePages(page); const result = await search(browser, 'sentience sdk', 'duckduckgo'); expect(result.success).toBe(true); @@ -233,7 +233,7 @@ describe('Actions', () => { try { await browser.start(); const page = getPageOrThrow(browser); - patchExampleDotCom(page); + await patchExampleDotCom(page); await page.goto('https://example.com'); await expect(search(browser, 'sentience sdk', 'duckduckgo')).rejects.toThrow( diff --git a/tests/agents/planner-executor/modal-flow.test.ts b/tests/agents/planner-executor/modal-flow.test.ts index d450cf77..64b398b1 100644 --- a/tests/agents/planner-executor/modal-flow.test.ts +++ b/tests/agents/planner-executor/modal-flow.test.ts @@ -7,6 +7,7 @@ import { class ProviderStub extends LLMProvider { private responses: string[]; + public generateCalls = 0; constructor(responses: string[] = []) { super(); @@ -22,6 +23,7 @@ class ProviderStub extends LLMProvider { } async generate(): Promise { + this.generateCalls += 1; const content = this.responses.length ? this.responses.shift()! : JSON.stringify({ action: 'DONE' }); @@ -200,6 +202,57 @@ describe('PlannerExecutorAgent modal flow parity', () => { expect(runtime.currentUrl).toContain('/checkout'); }); + it('finishes an add-to-cart task when the cart count confirms success', async () => { + const planner = new ProviderStub([ + JSON.stringify({ + action: 'CLICK', + intent: 'add_to_cart', + input: 'Add to Cart', + verify: [], + required: true, + }), + ]); + const executor = new ProviderStub(['CLICK(1)']); + let stage: 'product' | 'cart-confirmed' = 'product'; + const runtime = new RuntimeStub( + 'https://shop.test/product', + () => { + if (stage === 'cart-confirmed') { + return makeSnapshot('https://shop.test/product', [ + { id: 1, role: 'button', text: 'Add to Cart', clickable: true, importance: 100 }, + { + id: 9, + role: 'button', + text: 'Cart contains 1 item Total $59.99', + clickable: true, + importance: 110, + }, + { id: 10, role: 'text', text: 'Added to cart', importance: 90 }, + ]); + } + return makeSnapshot('https://shop.test/product', [ + { id: 1, role: 'button', text: 'Add to Cart', clickable: true, importance: 100 }, + ]); + }, + { + onClick: elementId => { + if (elementId === 1) { + stage = 'cart-confirmed'; + } + }, + } + ); + + const agent = new PlannerExecutorAgent({ planner, executor }); + const result = await agent.runStepwise(runtime, { + task: 'Search for running shoes and add the item to cart', + }); + + expect(result.success).toBe(true); + expect(runtime.clickCalls).toEqual([1]); + expect(planner.generateCalls).toBe(1); + }); + it('does not dismiss or auto-continue drawers with checkout or cart controls for unrelated clicks', async () => { const planner = new ProviderStub([ JSON.stringify({ action: 'CLICK', intent: 'open shipping info', verify: [] }), diff --git a/tests/agents/planner-executor/plan-utils.test.ts b/tests/agents/planner-executor/plan-utils.test.ts index 8131cb8c..b77f99b3 100644 --- a/tests/agents/planner-executor/plan-utils.test.ts +++ b/tests/agents/planner-executor/plan-utils.test.ts @@ -34,6 +34,26 @@ describe('parseAction', () => { }); }); + it('parses the final action after leaked thinking output', () => { + expect( + parseAction( + [ + 'So we output exactly: TYPE(168, "noise cancelling earbuds")', + '', + 'However, the problem says: "Return ONLY ONE line: TYPE(, "text")"', + '', + 'Output: TYPE(168, "noise cancelling earbuds")', + '', + '', + 'TYPE(168, "noise cancelling earbuds")', + ].join('\n') + ) + ).toEqual({ + action: 'TYPE', + args: [168, 'noise cancelling earbuds'], + }); + }); + it('does not treat action examples inside prose as executable output', () => { expect( parseAction( diff --git a/tests/browser.test.ts b/tests/browser.test.ts index 0b448d2e..625aea94 100644 --- a/tests/browser.test.ts +++ b/tests/browser.test.ts @@ -229,7 +229,7 @@ describe('Browser Proxy Support', () => { if (!page) { throw new Error('Browser page is not available'); } - patchExampleDotCom(page); + await patchExampleDotCom(page); await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 }); const viewportSize = await page.evaluate(() => ({ @@ -295,7 +295,7 @@ describe('Browser Proxy Support', () => { expect(sentienceBrowser.getContext()).toBe(context); // Test that we can use it - patchExampleDotCom(page); + await patchExampleDotCom(page); await page.goto('https://example.com'); await page.waitForLoadState('networkidle', { timeout: 10000 }); diff --git a/tests/test-utils.ts b/tests/test-utils.ts index 7fca6572..b8dcfd80 100644 --- a/tests/test-utils.ts +++ b/tests/test-utils.ts @@ -15,7 +15,7 @@ export async function createTestBrowser(headless?: boolean): Promise { +export async function patchExampleDotCom(page: Page): Promise { + await page.route(/https?:\/\/example\.com\/?.*/, async route => { await route.fulfill({ status: 200, contentType: 'text/html', @@ -88,8 +88,8 @@ const SEARCH_RESULTS_HTML = ` `; -export function patchSearchEnginePages(page: Page): void { - void page.route( +export async function patchSearchEnginePages(page: Page): Promise { + await page.route( /https?:\/\/(duckduckgo\.com|www\.google\.com|www\.bing\.com)\/.*/, async route => { await route.fulfill({ diff --git a/tests/utils/trace-file-manager.test.ts b/tests/utils/trace-file-manager.test.ts index 5e554332..5dee5379 100644 --- a/tests/utils/trace-file-manager.test.ts +++ b/tests/utils/trace-file-manager.test.ts @@ -5,6 +5,7 @@ import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; +import { EventEmitter } from 'events'; import { TraceFileManager } from '../../src/utils/trace-file-manager'; import { TraceEvent } from '../../src/tracing/types'; @@ -99,6 +100,37 @@ describe('TraceFileManager', () => { await expect(TraceFileManager.closeStream(stream)).resolves.not.toThrow(); expect(stream.destroyed).toBe(true); }); + + it('should wait for the close event before resolving', async () => { + class DelayedCloseStream extends EventEmitter { + destroyed = false; + + end(callback?: () => void): void { + callback?.(); + } + + destroy(): void { + this.destroyed = true; + this.emit('close'); + } + } + + const stream = new DelayedCloseStream(); + let resolved = false; + const closePromise = TraceFileManager.closeStream(stream as unknown as fs.WriteStream).then( + () => { + resolved = true; + } + ); + + await Promise.resolve(); + expect(resolved).toBe(false); + + stream.destroy(); + await closePromise; + + expect(resolved).toBe(true); + }); }); describe('fileExists', () => {