diff --git a/src/agents/planner-executor/plan-utils.ts b/src/agents/planner-executor/plan-utils.ts
index 5d1c3ffe..b74ed78b 100644
--- a/src/agents/planner-executor/plan-utils.ts
+++ b/src/agents/planner-executor/plan-utils.ts
@@ -31,6 +31,11 @@ export function parseAction(text: string): ParsedAction {
// Strip ... tags (Qwen/DeepSeek reasoning output)
cleaned = cleaned.replace(/[\s\S]*?<\/think>/gi, '').trim();
+ // Some local models leak reasoning without the opening tag but still close it before the answer.
+ const closingThinkIndex = cleaned.toLowerCase().lastIndexOf('');
+ if (closingThinkIndex !== -1) {
+ cleaned = cleaned.slice(closingThinkIndex + ''.length).trim();
+ }
// If never closed, strip from first to end
cleaned = cleaned.replace(/[\s\S]*$/gi, '').trim();
diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts
index bbf456d7..1465add8 100644
--- a/src/agents/planner-executor/planner-executor-agent.ts
+++ b/src/agents/planner-executor/planner-executor-agent.ts
@@ -1063,6 +1063,14 @@ export class PlannerExecutorAgent {
finalOutcome.status === StepStatus.SKIPPED ||
finalOutcome.status === StepStatus.VISION_FALLBACK
) {
+ if (
+ !success &&
+ finalOutcome.status === StepStatus.SUCCESS &&
+ (await this.isCartAdditionTerminal(runtime, task, plannerAction))
+ ) {
+ success = true;
+ }
+
if (this.recoveryState && this.config.recovery.trackSuccessfulUrls && urlAfter) {
this.recoveryState.recordCheckpoint({
url: urlAfter,
@@ -1084,6 +1092,10 @@ export class PlannerExecutorAgent {
break;
}
+ if (success) {
+ break;
+ }
+
if (shouldContinue) {
continue;
}
@@ -2254,6 +2266,60 @@ export class PlannerExecutorAgent {
return false;
}
+ private async isCartAdditionTerminal(
+ runtime: AgentRuntime,
+ task: string,
+ plannerAction: StepwisePlannerResponse
+ ): Promise {
+ const taskText = task.toLowerCase();
+ if (
+ !/\badd(?:ed)?\b[\s\S]*\bcart\b|\bcart[_\s-]?addition\b/.test(taskText) ||
+ /\bcheckout\b|\bcheck out\b|\bpayment\b|\bplace order\b|\bbuy now\b/.test(taskText)
+ ) {
+ return false;
+ }
+
+ const actionText = [
+ plannerAction.intent,
+ plannerAction.input,
+ plannerAction.goal,
+ plannerAction.action,
+ ]
+ .filter((value): value is string => typeof value === 'string')
+ .join(' ')
+ .toLowerCase()
+ .replace(/[_-]+/g, ' ');
+
+ if (!/\badd(?:ed)?\b[\s\S]*\bcart\b|\bcart contains\b/.test(actionText)) {
+ return false;
+ }
+
+ try {
+ const snap = await runtime.snapshot({
+ limit: this.config.snapshot.limitBase,
+ screenshot: false,
+ goal: 'cart addition confirmation',
+ });
+ if (!snap) {
+ return false;
+ }
+
+ return (snap.elements || []).some(element => {
+ const label = [element.text, element.ariaLabel, element.name]
+ .filter((value): value is string => typeof value === 'string')
+ .join(' ')
+ .toLowerCase();
+ return (
+ /\badded to (?:cart|bag|basket)\b/.test(label) ||
+ /\bcart contains\s+[1-9]\d*\s+items?\b/.test(label) ||
+ /\b[1-9]\d*\s+items?\s+in (?:your )?(?:cart|bag|basket)\b/.test(label)
+ );
+ });
+ } catch {
+ return false;
+ }
+ }
+
private async attemptRecovery(runtime: AgentRuntime): Promise {
if (!this.recoveryState) {
return false;
diff --git a/src/utils/trace-file-manager.ts b/src/utils/trace-file-manager.ts
index 9265ffbd..fd9dfb98 100644
--- a/src/utils/trace-file-manager.ts
+++ b/src/utils/trace-file-manager.ts
@@ -104,21 +104,43 @@ export class TraceFileManager {
return;
}
- stream.end(() => {
- resolve();
- });
-
- stream.once('error', error => {
- reject(error);
- });
-
- // Timeout after 5 seconds
- setTimeout(() => {
- if (!stream.destroyed) {
+ let settled = false;
+ const timeout = setTimeout(() => {
+ if (!settled) {
+ settled = true;
stream.destroy();
resolve();
}
}, 5000);
+ timeout.unref?.();
+
+ const cleanup = () => {
+ clearTimeout(timeout);
+ stream.removeListener('error', onError);
+ stream.removeListener('close', onClose);
+ };
+
+ const onClose = () => {
+ if (settled) {
+ return;
+ }
+ settled = true;
+ cleanup();
+ resolve();
+ };
+
+ const onError = (error: Error) => {
+ if (settled) {
+ return;
+ }
+ settled = true;
+ cleanup();
+ reject(error);
+ };
+
+ stream.once('close', onClose);
+ stream.once('error', onError);
+ stream.end();
});
}
diff --git a/tests/actions.test.ts b/tests/actions.test.ts
index a992c05b..dc844bf4 100644
--- a/tests/actions.test.ts
+++ b/tests/actions.test.ts
@@ -183,7 +183,7 @@ describe('Actions', () => {
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });
- patchSearchEnginePages(page);
+ await patchSearchEnginePages(page);
const result = await search(browser, 'sentience sdk', 'duckduckgo');
expect(result.success).toBe(true);
@@ -233,7 +233,7 @@ describe('Actions', () => {
try {
await browser.start();
const page = getPageOrThrow(browser);
- patchExampleDotCom(page);
+ await patchExampleDotCom(page);
await page.goto('https://example.com');
await expect(search(browser, 'sentience sdk', 'duckduckgo')).rejects.toThrow(
diff --git a/tests/agents/planner-executor/modal-flow.test.ts b/tests/agents/planner-executor/modal-flow.test.ts
index d450cf77..64b398b1 100644
--- a/tests/agents/planner-executor/modal-flow.test.ts
+++ b/tests/agents/planner-executor/modal-flow.test.ts
@@ -7,6 +7,7 @@ import {
class ProviderStub extends LLMProvider {
private responses: string[];
+ public generateCalls = 0;
constructor(responses: string[] = []) {
super();
@@ -22,6 +23,7 @@ class ProviderStub extends LLMProvider {
}
async generate(): Promise {
+ this.generateCalls += 1;
const content = this.responses.length
? this.responses.shift()!
: JSON.stringify({ action: 'DONE' });
@@ -200,6 +202,57 @@ describe('PlannerExecutorAgent modal flow parity', () => {
expect(runtime.currentUrl).toContain('/checkout');
});
+ it('finishes an add-to-cart task when the cart count confirms success', async () => {
+ const planner = new ProviderStub([
+ JSON.stringify({
+ action: 'CLICK',
+ intent: 'add_to_cart',
+ input: 'Add to Cart',
+ verify: [],
+ required: true,
+ }),
+ ]);
+ const executor = new ProviderStub(['CLICK(1)']);
+ let stage: 'product' | 'cart-confirmed' = 'product';
+ const runtime = new RuntimeStub(
+ 'https://shop.test/product',
+ () => {
+ if (stage === 'cart-confirmed') {
+ return makeSnapshot('https://shop.test/product', [
+ { id: 1, role: 'button', text: 'Add to Cart', clickable: true, importance: 100 },
+ {
+ id: 9,
+ role: 'button',
+ text: 'Cart contains 1 item Total $59.99',
+ clickable: true,
+ importance: 110,
+ },
+ { id: 10, role: 'text', text: 'Added to cart', importance: 90 },
+ ]);
+ }
+ return makeSnapshot('https://shop.test/product', [
+ { id: 1, role: 'button', text: 'Add to Cart', clickable: true, importance: 100 },
+ ]);
+ },
+ {
+ onClick: elementId => {
+ if (elementId === 1) {
+ stage = 'cart-confirmed';
+ }
+ },
+ }
+ );
+
+ const agent = new PlannerExecutorAgent({ planner, executor });
+ const result = await agent.runStepwise(runtime, {
+ task: 'Search for running shoes and add the item to cart',
+ });
+
+ expect(result.success).toBe(true);
+ expect(runtime.clickCalls).toEqual([1]);
+ expect(planner.generateCalls).toBe(1);
+ });
+
it('does not dismiss or auto-continue drawers with checkout or cart controls for unrelated clicks', async () => {
const planner = new ProviderStub([
JSON.stringify({ action: 'CLICK', intent: 'open shipping info', verify: [] }),
diff --git a/tests/agents/planner-executor/plan-utils.test.ts b/tests/agents/planner-executor/plan-utils.test.ts
index 8131cb8c..b77f99b3 100644
--- a/tests/agents/planner-executor/plan-utils.test.ts
+++ b/tests/agents/planner-executor/plan-utils.test.ts
@@ -34,6 +34,26 @@ describe('parseAction', () => {
});
});
+ it('parses the final action after leaked thinking output', () => {
+ expect(
+ parseAction(
+ [
+ 'So we output exactly: TYPE(168, "noise cancelling earbuds")',
+ '',
+ 'However, the problem says: "Return ONLY ONE line: TYPE(, "text")"',
+ '',
+ 'Output: TYPE(168, "noise cancelling earbuds")',
+ '',
+ '',
+ 'TYPE(168, "noise cancelling earbuds")',
+ ].join('\n')
+ )
+ ).toEqual({
+ action: 'TYPE',
+ args: [168, 'noise cancelling earbuds'],
+ });
+ });
+
it('does not treat action examples inside prose as executable output', () => {
expect(
parseAction(
diff --git a/tests/browser.test.ts b/tests/browser.test.ts
index 0b448d2e..625aea94 100644
--- a/tests/browser.test.ts
+++ b/tests/browser.test.ts
@@ -229,7 +229,7 @@ describe('Browser Proxy Support', () => {
if (!page) {
throw new Error('Browser page is not available');
}
- patchExampleDotCom(page);
+ await patchExampleDotCom(page);
await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 });
const viewportSize = await page.evaluate(() => ({
@@ -295,7 +295,7 @@ describe('Browser Proxy Support', () => {
expect(sentienceBrowser.getContext()).toBe(context);
// Test that we can use it
- patchExampleDotCom(page);
+ await patchExampleDotCom(page);
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });
diff --git a/tests/test-utils.ts b/tests/test-utils.ts
index 7fca6572..b8dcfd80 100644
--- a/tests/test-utils.ts
+++ b/tests/test-utils.ts
@@ -15,7 +15,7 @@ export async function createTestBrowser(headless?: boolean): Promise {
+export async function patchExampleDotCom(page: Page): Promise {
+ await page.route(/https?:\/\/example\.com\/?.*/, async route => {
await route.fulfill({
status: 200,
contentType: 'text/html',
@@ -88,8 +88,8 @@ const SEARCH_RESULTS_HTML = `