From 6def109785f6db77ba0faceab57091fb25246466 Mon Sep 17 00:00:00 2001 From: sbrooke Date: Tue, 26 May 2026 13:38:23 -0400 Subject: [PATCH 1/7] fix(core): block prompt context form fills --- packages/core/src/security/actionFirewall.ts | 43 +++++++++++++++++++ packages/core/src/tools/webActionTools.ts | 30 +++++++++++++ .../core/test/security/actionFirewall.test.ts | 38 ++++++++++++++++ .../core/test/tools/webActionTools.test.ts | 35 +++++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 packages/core/src/security/actionFirewall.ts create mode 100644 packages/core/test/security/actionFirewall.test.ts diff --git a/packages/core/src/security/actionFirewall.ts b/packages/core/src/security/actionFirewall.ts new file mode 100644 index 00000000..1a76a45e --- /dev/null +++ b/packages/core/src/security/actionFirewall.ts @@ -0,0 +1,43 @@ +export const SECURITY_BLOCKED_CONTEXT_EXFILTRATION = + "Security policy blocked a form fill that appears to contain agent context or prompt data"; + +export type SecurityAssessment = + | { allowed: true } + | { allowed: false; reason: string; isRecoverable: true }; + +export interface FillAssessmentInput { + value: string; +} + +const CONTEXT_EXFILTRATION_PATTERNS = [ + /system prompt/i, + /developer prompt/i, + /conversation history/i, + /tool results?/i, + /page snapshots?/i, + /<\s*external-content\b/i, + /<\/\s*external-content\s*>/i, + /you are an expert at completing tasks using a web browser/i, + /available tools/i, + /mandatory guardrails/i, +]; + +const GENERATED_TEXT_LINE_LIMIT = 2; + +export function assessFillValue(input: FillAssessmentInput): SecurityAssessment { + const value = input.value.trim(); + + if ( + value && + (CONTEXT_EXFILTRATION_PATTERNS.some((pattern) => pattern.test(value)) || + value.split(/\r?\n/).length > GENERATED_TEXT_LINE_LIMIT) + ) { + return { + allowed: false, + reason: SECURITY_BLOCKED_CONTEXT_EXFILTRATION, + isRecoverable: true, + }; + } + + return { allowed: true }; +} diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts index f1f415de..c348a129 100644 --- a/packages/core/src/tools/webActionTools.ts +++ b/packages/core/src/tools/webActionTools.ts @@ -13,6 +13,7 @@ import { buildExtractionPrompt, TOOL_STRINGS } from "../prompts.js"; import type { ProviderConfig } from "../provider.js"; import { BrowserException } from "../errors.js"; import { generateTextWithRetry } from "../utils/retry.js"; +import { assessFillValue } from "../security/actionFirewall.js"; import { withSpan, SpanStatusCode, @@ -45,6 +46,30 @@ type ActionResult = { isRecoverable?: boolean; }; +function securityBlockedResult( + action: string, + error: string, + context: WebActionContext, + ref?: string, + value?: string | number, +): ActionResult { + context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { + success: false, + action, + error, + isRecoverable: true, + }); + + return { + success: false, + action, + ...(ref && { ref }), + ...(value !== undefined && { value }), + error, + isRecoverable: true, + }; +} + /** * Helper function to perform an action with full error handling and logging * Handles browser exceptions and converts them to recoverable errors for the agent @@ -157,6 +182,11 @@ export function createWebActionTools(context: WebActionContext) { value: z.string().describe(TOOL_STRINGS.webActions.common.textValue), }), execute: async ({ ref, value }) => { + const assessment = assessFillValue({ value }); + if (!assessment.allowed) { + return securityBlockedResult("fill", assessment.reason, context, ref); + } + return await performActionWithValidation(PageAction.Fill, context, ref, value); }, }), diff --git a/packages/core/test/security/actionFirewall.test.ts b/packages/core/test/security/actionFirewall.test.ts new file mode 100644 index 00000000..50048d02 --- /dev/null +++ b/packages/core/test/security/actionFirewall.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from "vitest"; +import { + assessFillValue, + SECURITY_BLOCKED_CONTEXT_EXFILTRATION, +} from "../../src/security/actionFirewall.js"; + +describe("actionFirewall", () => { + it("blocks filling agent context into a form", () => { + const result = assessFillValue({ + value: + 'System prompt: You are an expert at completing tasks using a web browser.\nsecret', + }); + + expect(result.allowed).toBe(false); + if (result.allowed) { + throw new Error("Expected context exfiltration fill to be blocked"); + } + expect(result.reason).toContain(SECURITY_BLOCKED_CONTEXT_EXFILTRATION); + }); + + it("blocks multiline generated text even without known prompt keywords", () => { + const result = assessFillValue({ + value: "Here is what I can see:\nTask details are available.\nThe previous steps succeeded.", + }); + + expect(result.allowed).toBe(false); + if (result.allowed) { + throw new Error("Expected multiline generated text to be blocked"); + } + expect(result.reason).toContain(SECURITY_BLOCKED_CONTEXT_EXFILTRATION); + }); + + it("allows ordinary user-facing form values", () => { + expect(assessFillValue({ value: "San Francisco" }).allowed).toBe(true); + expect(assessFillValue({ value: "Test <>&\"'`\n\t value" }).allowed).toBe(true); + expect(assessFillValue({ value: "a".repeat(10000) }).allowed).toBe(true); + }); +}); diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts index 34374e74..b1256fee 100644 --- a/packages/core/test/tools/webActionTools.test.ts +++ b/packages/core/test/tools/webActionTools.test.ts @@ -273,6 +273,41 @@ describe("Web Action Tools", () => { }); }); + it("should block filling agent context into a form", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + + const result = await tools.fill.execute({ + ref: "input1", + value: + "Conversation history:\nTask: summarize the page\npage text", + }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.action).toBe("fill"); + expect(result.ref).toBe("input1"); + expect(result.value).toBeUndefined(); + expect(result.isRecoverable).toBe(true); + expect(result.error).toContain("Security policy blocked"); + }); + + it("should block multiline generated text without prompt keywords", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + + const result = await tools.fill.execute({ + ref: "input1", + value: "Here is the current working state:\nThe page loaded.\nThe next action is ready.", + }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.action).toBe("fill"); + expect(result.ref).toBe("input1"); + expect(result.value).toBeUndefined(); + expect(result.isRecoverable).toBe(true); + expect(result.error).toContain("Security policy blocked"); + }); + it("should emit browser action events", async () => { const emitSpy = vi.spyOn(eventEmitter, "emit"); From daa9200f90588a6ee674839001e1c78e8c502bdc Mon Sep 17 00:00:00 2001 From: sbrooke Date: Tue, 26 May 2026 13:40:40 -0400 Subject: [PATCH 2/7] Revert "fix(core): block prompt context form fills" This reverts commit 6def109785f6db77ba0faceab57091fb25246466. --- packages/core/src/security/actionFirewall.ts | 43 ------------------- packages/core/src/tools/webActionTools.ts | 30 ------------- .../core/test/security/actionFirewall.test.ts | 38 ---------------- .../core/test/tools/webActionTools.test.ts | 35 --------------- 4 files changed, 146 deletions(-) delete mode 100644 packages/core/src/security/actionFirewall.ts delete mode 100644 packages/core/test/security/actionFirewall.test.ts diff --git a/packages/core/src/security/actionFirewall.ts b/packages/core/src/security/actionFirewall.ts deleted file mode 100644 index 1a76a45e..00000000 --- a/packages/core/src/security/actionFirewall.ts +++ /dev/null @@ -1,43 +0,0 @@ -export const SECURITY_BLOCKED_CONTEXT_EXFILTRATION = - "Security policy blocked a form fill that appears to contain agent context or prompt data"; - -export type SecurityAssessment = - | { allowed: true } - | { allowed: false; reason: string; isRecoverable: true }; - -export interface FillAssessmentInput { - value: string; -} - -const CONTEXT_EXFILTRATION_PATTERNS = [ - /system prompt/i, - /developer prompt/i, - /conversation history/i, - /tool results?/i, - /page snapshots?/i, - /<\s*external-content\b/i, - /<\/\s*external-content\s*>/i, - /you are an expert at completing tasks using a web browser/i, - /available tools/i, - /mandatory guardrails/i, -]; - -const GENERATED_TEXT_LINE_LIMIT = 2; - -export function assessFillValue(input: FillAssessmentInput): SecurityAssessment { - const value = input.value.trim(); - - if ( - value && - (CONTEXT_EXFILTRATION_PATTERNS.some((pattern) => pattern.test(value)) || - value.split(/\r?\n/).length > GENERATED_TEXT_LINE_LIMIT) - ) { - return { - allowed: false, - reason: SECURITY_BLOCKED_CONTEXT_EXFILTRATION, - isRecoverable: true, - }; - } - - return { allowed: true }; -} diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts index c348a129..f1f415de 100644 --- a/packages/core/src/tools/webActionTools.ts +++ b/packages/core/src/tools/webActionTools.ts @@ -13,7 +13,6 @@ import { buildExtractionPrompt, TOOL_STRINGS } from "../prompts.js"; import type { ProviderConfig } from "../provider.js"; import { BrowserException } from "../errors.js"; import { generateTextWithRetry } from "../utils/retry.js"; -import { assessFillValue } from "../security/actionFirewall.js"; import { withSpan, SpanStatusCode, @@ -46,30 +45,6 @@ type ActionResult = { isRecoverable?: boolean; }; -function securityBlockedResult( - action: string, - error: string, - context: WebActionContext, - ref?: string, - value?: string | number, -): ActionResult { - context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { - success: false, - action, - error, - isRecoverable: true, - }); - - return { - success: false, - action, - ...(ref && { ref }), - ...(value !== undefined && { value }), - error, - isRecoverable: true, - }; -} - /** * Helper function to perform an action with full error handling and logging * Handles browser exceptions and converts them to recoverable errors for the agent @@ -182,11 +157,6 @@ export function createWebActionTools(context: WebActionContext) { value: z.string().describe(TOOL_STRINGS.webActions.common.textValue), }), execute: async ({ ref, value }) => { - const assessment = assessFillValue({ value }); - if (!assessment.allowed) { - return securityBlockedResult("fill", assessment.reason, context, ref); - } - return await performActionWithValidation(PageAction.Fill, context, ref, value); }, }), diff --git a/packages/core/test/security/actionFirewall.test.ts b/packages/core/test/security/actionFirewall.test.ts deleted file mode 100644 index 50048d02..00000000 --- a/packages/core/test/security/actionFirewall.test.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { - assessFillValue, - SECURITY_BLOCKED_CONTEXT_EXFILTRATION, -} from "../../src/security/actionFirewall.js"; - -describe("actionFirewall", () => { - it("blocks filling agent context into a form", () => { - const result = assessFillValue({ - value: - 'System prompt: You are an expert at completing tasks using a web browser.\nsecret', - }); - - expect(result.allowed).toBe(false); - if (result.allowed) { - throw new Error("Expected context exfiltration fill to be blocked"); - } - expect(result.reason).toContain(SECURITY_BLOCKED_CONTEXT_EXFILTRATION); - }); - - it("blocks multiline generated text even without known prompt keywords", () => { - const result = assessFillValue({ - value: "Here is what I can see:\nTask details are available.\nThe previous steps succeeded.", - }); - - expect(result.allowed).toBe(false); - if (result.allowed) { - throw new Error("Expected multiline generated text to be blocked"); - } - expect(result.reason).toContain(SECURITY_BLOCKED_CONTEXT_EXFILTRATION); - }); - - it("allows ordinary user-facing form values", () => { - expect(assessFillValue({ value: "San Francisco" }).allowed).toBe(true); - expect(assessFillValue({ value: "Test <>&\"'`\n\t value" }).allowed).toBe(true); - expect(assessFillValue({ value: "a".repeat(10000) }).allowed).toBe(true); - }); -}); diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts index b1256fee..34374e74 100644 --- a/packages/core/test/tools/webActionTools.test.ts +++ b/packages/core/test/tools/webActionTools.test.ts @@ -273,41 +273,6 @@ describe("Web Action Tools", () => { }); }); - it("should block filling agent context into a form", async () => { - const performActionSpy = vi.spyOn(mockBrowser, "performAction"); - - const result = await tools.fill.execute({ - ref: "input1", - value: - "Conversation history:\nTask: summarize the page\npage text", - }); - - expect(performActionSpy).not.toHaveBeenCalled(); - expect(result.success).toBe(false); - expect(result.action).toBe("fill"); - expect(result.ref).toBe("input1"); - expect(result.value).toBeUndefined(); - expect(result.isRecoverable).toBe(true); - expect(result.error).toContain("Security policy blocked"); - }); - - it("should block multiline generated text without prompt keywords", async () => { - const performActionSpy = vi.spyOn(mockBrowser, "performAction"); - - const result = await tools.fill.execute({ - ref: "input1", - value: "Here is the current working state:\nThe page loaded.\nThe next action is ready.", - }); - - expect(performActionSpy).not.toHaveBeenCalled(); - expect(result.success).toBe(false); - expect(result.action).toBe("fill"); - expect(result.ref).toBe("input1"); - expect(result.value).toBeUndefined(); - expect(result.isRecoverable).toBe(true); - expect(result.error).toContain("Security policy blocked"); - }); - it("should emit browser action events", async () => { const emitSpy = vi.spyOn(eventEmitter, "emit"); From 80840ad36bdaba109bd6ba7cbfa78d2525503c4b Mon Sep 17 00:00:00 2001 From: sbrooke Date: Tue, 26 May 2026 13:59:51 -0400 Subject: [PATCH 3/7] fix(core): gate form actions by field provenance --- packages/core/src/browser/ariaBrowser.ts | 42 ++++ .../core/src/browser/playwrightBrowser.ts | 185 +++++++++++++++- packages/core/src/core.ts | 7 +- packages/core/src/security/actionFirewall.ts | 140 ++++++++++++ packages/core/src/tools/webActionTools.ts | 117 +++++++++- packages/core/src/webAgent.ts | 67 ++---- .../core/test/security/actionFirewall.test.ts | 149 +++++++++++++ .../core/test/tools/webActionTools.test.ts | 206 +++++++++++++++++- packages/core/test/webAgent.test.ts | 36 ++- .../src/background/ExtensionBrowser.ts | 202 ++++++++++++++++- 10 files changed, 1099 insertions(+), 52 deletions(-) create mode 100644 packages/core/src/security/actionFirewall.ts create mode 100644 packages/core/test/security/actionFirewall.test.ts diff --git a/packages/core/src/browser/ariaBrowser.ts b/packages/core/src/browser/ariaBrowser.ts index ca1abacd..0ec38656 100644 --- a/packages/core/src/browser/ariaBrowser.ts +++ b/packages/core/src/browser/ariaBrowser.ts @@ -57,6 +57,39 @@ export interface TemporaryTab { waitForLoadState(state: LoadState, options?: { timeout?: number }): Promise; } +export interface FieldMetadata { + ref: string; + tagName: string; + inputType: string | null; + role: string | null; + name: string | null; + label: string | null; + placeholder: string | null; + autocomplete: string | null; + isContentEditable: boolean; + formId: string | null; + formAction: string | null; + formMethod: string | null; +} + +export interface FormFieldState { + ref: string | null; + name: string | null; + tagName: string; + inputType: string | null; + autocomplete: string | null; +} + +export interface FormSubmissionContext { + submitterRef: string; + formId: string | null; + actionUrl: string | null; + method: string | null; + fields: FormFieldState[]; +} + +export type FormSubmissionTrigger = "click" | "enter"; + export interface AriaBrowser { /** The name of the browser being used */ browserName: string; @@ -99,6 +132,15 @@ export interface AriaBrowser { */ performAction(ref: string, action: PageAction, value?: string): Promise; + /** Returns structural metadata for an element ref used in form/action policy checks. */ + getFieldMetadata(ref: string): Promise; + + /** Returns the form that would be submitted by activating this ref, if any. */ + getFormSubmissionContext( + ref: string, + trigger?: FormSubmissionTrigger, + ): Promise; + /** * Waits for a specific load state of the page * @param state The load state to wait for diff --git a/packages/core/src/browser/playwrightBrowser.ts b/packages/core/src/browser/playwrightBrowser.ts index 510f2a71..e181d646 100644 --- a/packages/core/src/browser/playwrightBrowser.ts +++ b/packages/core/src/browser/playwrightBrowser.ts @@ -11,7 +11,15 @@ import { Locator, errors as playwrightErrors, } from "playwright"; -import { AriaBrowser, PageAction, LoadState, TemporaryTab } from "./ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + LoadState, + PageAction, + TemporaryTab, +} from "./ariaBrowser.js"; import { PlaywrightBlocker } from "@ghostery/adblocker-playwright"; import fetch from "cross-fetch"; import TurndownService from "turndown"; @@ -788,6 +796,181 @@ export class PlaywrightBrowser implements AriaBrowser { return locator; } + async getFieldMetadata(ref: string): Promise { + const locator = await this.validateElementRef(ref); + + return locator.evaluate((element, elementRef): FieldMetadata => { + const el = element as HTMLElement; + const input = el instanceof HTMLInputElement ? el : null; + const form = getElementForm(el); + + return { + ref: elementRef, + tagName: el.tagName.toLowerCase(), + inputType: input?.type?.toLowerCase() ?? null, + role: el.getAttribute("role"), + name: getElementName(el), + label: getElementLabel(el), + placeholder: getElementPlaceholder(el), + autocomplete: getElementAutocomplete(el), + isContentEditable: el.isContentEditable, + formId: form?.id || null, + formAction: form?.action || null, + formMethod: form?.method?.toLowerCase() || null, + }; + + function getElementForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.form; + } + return node.closest("form"); + } + + function getElementName(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.name || null; + } + return node.getAttribute("name"); + } + + function getElementLabel(node: HTMLElement): string | null { + const ariaLabel = node.getAttribute("aria-label"); + if (ariaLabel?.trim()) return ariaLabel.trim(); + + const labelledBy = node.getAttribute("aria-labelledby"); + if (labelledBy) { + const text = labelledBy + .split(/\s+/) + .map((id) => node.ownerDocument.getElementById(id)?.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + if ("labels" in node) { + const labels = (node as HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement) + .labels; + const text = Array.from(labels || []) + .map((label) => label.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + return null; + } + + function getElementPlaceholder(node: HTMLElement): string | null { + if (node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement) { + return node.placeholder || null; + } + return null; + } + + function getElementAutocomplete(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.autocomplete || null; + } + return null; + } + }, ref); + } + + async getFormSubmissionContext( + ref: string, + trigger: FormSubmissionTrigger = "click", + ): Promise { + const locator = await this.validateElementRef(ref); + + return locator.evaluate( + (element, { submitterRef, trigger }): FormSubmissionContext | null => { + const el = element as HTMLElement; + if (!canSubmitForm(el, trigger)) return null; + + const form = getSubmissionForm(el); + if (!form) return null; + + const fields = Array.from(form.elements) + .filter( + (field): field is HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement => + field instanceof HTMLInputElement || + field instanceof HTMLTextAreaElement || + field instanceof HTMLSelectElement, + ) + .filter((field) => !field.disabled) + .map((field) => ({ + ref: field.getAttribute("data-pilo-ref"), + name: field.name || null, + tagName: field.tagName.toLowerCase(), + inputType: field instanceof HTMLInputElement ? field.type.toLowerCase() : null, + autocomplete: "autocomplete" in field ? field.autocomplete || null : null, + })); + + return { + submitterRef, + formId: form.id || null, + actionUrl: form.action || null, + method: form.method?.toLowerCase() || null, + fields, + }; + + function getSubmissionForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLButtonElement || + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.form; + } + return node.closest("form"); + } + + function canSubmitForm(node: HTMLElement, submitTrigger: FormSubmissionTrigger): boolean { + if (submitTrigger === "click") { + if (node instanceof HTMLButtonElement) { + return node.type === "submit"; + } + if (node instanceof HTMLInputElement) { + return node.type === "submit" || node.type === "image"; + } + return false; + } + + if (node instanceof HTMLTextAreaElement || node instanceof HTMLSelectElement) + return false; + if (!(node instanceof HTMLInputElement)) return false; + return ![ + "button", + "checkbox", + "color", + "file", + "hidden", + "radio", + "range", + "reset", + "submit", + ].includes(node.type); + } + }, + { submitterRef: ref, trigger }, + ); + } + async performAction(ref: string, action: PageAction, value?: string): Promise { if (!this.page) throw new Error("Browser not started"); return withSpan( diff --git a/packages/core/src/core.ts b/packages/core/src/core.ts index 75e4f2c7..e1f7e0d6 100644 --- a/packages/core/src/core.ts +++ b/packages/core/src/core.ts @@ -5,7 +5,12 @@ */ export { WebAgent } from "./webAgent.js"; -export type { AriaBrowser } from "./browser/ariaBrowser.js"; +export type { + AriaBrowser, + FieldMetadata, + FormSubmissionContext, + FormSubmissionTrigger, +} from "./browser/ariaBrowser.js"; export { PageAction, LoadState } from "./browser/ariaBrowser.js"; export type { TaskExecutionResult, TaskError, WebAgentOptions } from "./webAgent.js"; export { TaskErrorCode } from "./webAgent.js"; diff --git a/packages/core/src/security/actionFirewall.ts b/packages/core/src/security/actionFirewall.ts new file mode 100644 index 00000000..8718a754 --- /dev/null +++ b/packages/core/src/security/actionFirewall.ts @@ -0,0 +1,140 @@ +import type { FieldMetadata, FormSubmissionContext } from "../browser/ariaBrowser.js"; + +export const SECURITY_BLOCKED_UNAUTHORIZED_FILL = + "Security policy blocked filling a submittable form field without user approval"; + +export const SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT = + "Security policy blocked submitting a form containing unauthorized agent-filled data"; + +export type FillSource = "agent" | "user-approved"; + +export type ActionFirewallResult = + | { allowed: true; operational?: boolean } + | { allowed: false; reason: string; isRecoverable: true }; + +const OPERATIONAL_INPUT_TYPES = new Set([ + "search", + "url", + "number", + "date", + "datetime-local", + "month", + "time", + "week", + "color", + "range", +]); + +const OPERATIONAL_ROLES = new Set(["searchbox", "combobox", "spinbutton", "slider"]); + +const SENSITIVE_AUTOCOMPLETE_TOKENS = [ + "name", + "honorific-prefix", + "given-name", + "additional-name", + "family-name", + "honorific-suffix", + "nickname", + "email", + "username", + "new-password", + "current-password", + "one-time-code", + "organization", + "street-address", + "address-line1", + "address-line2", + "address-line3", + "address-level1", + "address-level2", + "address-level3", + "address-level4", + "country", + "country-name", + "postal-code", + "cc-name", + "cc-given-name", + "cc-additional-name", + "cc-family-name", + "cc-number", + "cc-exp", + "cc-exp-month", + "cc-exp-year", + "cc-csc", + "cc-type", + "transaction-currency", + "transaction-amount", + "language", + "bday", + "bday-day", + "bday-month", + "bday-year", + "sex", + "tel", + "tel-country-code", + "tel-national", + "tel-area-code", + "tel-local", + "tel-local-prefix", + "tel-local-suffix", + "tel-extension", + "impp", + "url", + "photo", +]; + +export function assessFill(input: { + field: FieldMetadata; + source: FillSource; +}): ActionFirewallResult { + if (input.source === "user-approved") { + return { allowed: true }; + } + + if (isOperationalField(input.field)) { + return { allowed: true, operational: true }; + } + + return { + allowed: false, + reason: SECURITY_BLOCKED_UNAUTHORIZED_FILL, + isRecoverable: true, + }; +} + +export function assessFormSubmission(input: { + form: FormSubmissionContext; + approvedRefs: { has(ref: string): boolean }; + agentFilledRefs: ReadonlySet; + operationalRefs: ReadonlySet; +}): ActionFirewallResult { + for (const field of input.form.fields) { + if (!field.ref || !input.agentFilledRefs.has(field.ref)) continue; + if (input.approvedRefs.has(field.ref) || input.operationalRefs.has(field.ref)) continue; + + return { + allowed: false, + reason: SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT, + isRecoverable: true, + }; + } + + return { allowed: true }; +} + +function isOperationalField(field: FieldMetadata): boolean { + const inputType = field.inputType?.toLowerCase() ?? null; + const role = field.role?.toLowerCase() ?? null; + + if (hasSensitiveAutocomplete(field.autocomplete)) return false; + if (field.tagName.toLowerCase() === "textarea" || field.isContentEditable) return false; + if (inputType && OPERATIONAL_INPUT_TYPES.has(inputType)) return true; + if (role && OPERATIONAL_ROLES.has(role)) return true; + return false; +} + +function hasSensitiveAutocomplete(autocomplete: string | null): boolean { + if (!autocomplete) return false; + const tokens = autocomplete.toLowerCase().split(/\s+/); + return tokens.some((token) => SENSITIVE_AUTOCOMPLETE_TOKENS.includes(token)); +} diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts index f1f415de..ea1bbb88 100644 --- a/packages/core/src/tools/webActionTools.ts +++ b/packages/core/src/tools/webActionTools.ts @@ -13,6 +13,7 @@ import { buildExtractionPrompt, TOOL_STRINGS } from "../prompts.js"; import type { ProviderConfig } from "../provider.js"; import { BrowserException } from "../errors.js"; import { generateTextWithRetry } from "../utils/retry.js"; +import { assessFill, assessFormSubmission } from "../security/actionFirewall.js"; import { withSpan, SpanStatusCode, @@ -25,6 +26,9 @@ interface WebActionContext { eventEmitter: WebAgentEventEmitter; providerConfig: ProviderConfig; abortSignal?: AbortSignal; + approvedRefs?: { has(ref: string): boolean }; + agentFilledRefs?: Set; + operationalRefs?: Set; } /** @@ -45,6 +49,86 @@ type ActionResult = { isRecoverable?: boolean; }; +const EMPTY_APPROVED_REFS = { has: () => false }; + +function recoverableBrowserErrorResult( + action: string, + error: BrowserException, + context: WebActionContext, + ref?: string, + value?: string | number, +): ActionResult { + context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { + success: false, + action, + error: error.message, + isRecoverable: true, + }); + + return { + success: false, + action, + ...(ref && { ref }), + ...(value !== undefined && { value }), + error: error.message, + isRecoverable: true, + }; +} + +function securityBlockedResult( + action: string, + error: string, + context: WebActionContext, + ref?: string, +): ActionResult { + context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { + success: false, + action, + error, + isRecoverable: true, + }); + + return { + success: false, + action, + ...(ref && { ref }), + error, + isRecoverable: true, + }; +} + +async function assessFormSubmissionForAction( + action: PageAction.Click | PageAction.Enter, + context: WebActionContext, + ref: string, +): Promise { + try { + const form = await context.browser.getFormSubmissionContext( + ref, + action === PageAction.Click ? "click" : "enter", + ); + if (!form) return null; + + const assessment = assessFormSubmission({ + form, + approvedRefs: context.approvedRefs ?? EMPTY_APPROVED_REFS, + agentFilledRefs: context.agentFilledRefs ?? new Set(), + operationalRefs: context.operationalRefs ?? new Set(), + }); + + if (!assessment.allowed) { + return securityBlockedResult(action, assessment.reason, context, ref); + } + } catch (error) { + if (error instanceof BrowserException) { + return recoverableBrowserErrorResult(action, error, context, ref); + } + throw error; + } + + return null; +} + /** * Helper function to perform an action with full error handling and logging * Handles browser exceptions and converts them to recoverable errors for the agent @@ -146,6 +230,9 @@ export function createWebActionTools(context: WebActionContext) { ref: z.string().describe(TOOL_STRINGS.webActions.common.elementRef), }), execute: async ({ ref }) => { + const blocked = await assessFormSubmissionForAction(PageAction.Click, context, ref); + if (blocked) return blocked; + return await performActionWithValidation(PageAction.Click, context, ref); }, }), @@ -157,7 +244,32 @@ export function createWebActionTools(context: WebActionContext) { value: z.string().describe(TOOL_STRINGS.webActions.common.textValue), }), execute: async ({ ref, value }) => { - return await performActionWithValidation(PageAction.Fill, context, ref, value); + try { + const metadata = await context.browser.getFieldMetadata(ref); + const userApproved = Boolean(context.approvedRefs?.has(ref)); + const assessment = assessFill({ + field: metadata, + source: userApproved ? "user-approved" : "agent", + }); + + if (!assessment.allowed) { + return securityBlockedResult(PageAction.Fill, assessment.reason, context, ref); + } + + const result = await performActionWithValidation(PageAction.Fill, context, ref, value); + if (result.success && !userApproved) { + context.agentFilledRefs?.add(ref); + if (assessment.operational) { + context.operationalRefs?.add(ref); + } + } + return result; + } catch (error) { + if (error instanceof BrowserException) { + return recoverableBrowserErrorResult(PageAction.Fill, error, context, ref); + } + throw error; + } }, }), @@ -218,6 +330,9 @@ export function createWebActionTools(context: WebActionContext) { ref: z.string().describe(TOOL_STRINGS.webActions.common.elementRef), }), execute: async ({ ref }) => { + const blocked = await assessFormSubmissionForAction(PageAction.Enter, context, ref); + if (blocked) return blocked; + return await performActionWithValidation(PageAction.Enter, context, ref); }, }), diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index 05d68a56..20a2ffce 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -44,7 +44,7 @@ import { SearchService } from "./search/searchService.js"; import { createPlanningTools } from "./tools/planningTools.js"; import { createValidationTools } from "./tools/validationTools.js"; import { createTabstackTools } from "./tools/tabstackTools.js"; -import { createInteractiveTools, ApprovedRefs, FILL_GATE_ERROR } from "./tools/interactiveTools.js"; +import { createInteractiveTools, ApprovedRefs } from "./tools/interactiveTools.js"; import { createTabstackClient } from "./tabstack/client.js"; import type { UserDataCallback } from "./types/interactive.js"; import { nanoid } from "nanoid"; @@ -388,12 +388,30 @@ export class WebAgent { task: string, executionState: ExecutionState, ): Promise<{ success: boolean; finalAnswer: string | null; error?: TaskError }> { + // Only include interactive tools if a callback is provided + let interactiveToolSet: Record = {}; + let approvedRefs: ApprovedRefs | null = null; + const agentFilledRefs = new Set(); + const operationalRefs = new Set(); + if (this.onUserDataRequired) { + const result = createInteractiveTools({ + callback: this.onUserDataRequired, + browser: this.browser, + eventEmitter: this.eventEmitter, + }); + interactiveToolSet = result.tools; + approvedRefs = result.approvedRefs; + } + // Setup tools once const webActionTools = createWebActionTools({ browser: this.browser, eventEmitter: this.eventEmitter, providerConfig: this.providerConfig, abortSignal: this.abortSignal, + approvedRefs: approvedRefs ?? undefined, + agentFilledRefs, + operationalRefs, }); // Only include search tools if a search service was created @@ -409,51 +427,6 @@ export class WebAgent { }) : {}; - // Only include interactive tools if a callback is provided - let interactiveToolSet: Record = {}; - let approvedRefs: ApprovedRefs | null = null; - if (this.onUserDataRequired) { - const result = createInteractiveTools({ - callback: this.onUserDataRequired, - browser: this.browser, - eventEmitter: this.eventEmitter, - }); - interactiveToolSet = result.tools; - approvedRefs = result.approvedRefs; - } - - // When interactive mode is on, gate fill/select/check to require approved refs. - // On first unapproved attempt, return an error. If the agent retries the same ref - // (indicating it's a navigation/search field, not a user-data form field), allow it - // through on the second attempt to avoid a deadlock. - if (approvedRefs) { - const warnedRefs = new Set(); - const gatedActions = ["fill", "select", "check"] as const; - for (const actionName of gatedActions) { - const originalTool = webActionTools[actionName]; - if (originalTool) { - const originalExecute = originalTool.execute!; - (originalTool as any).execute = async (args: any, options: any) => { - if (args.ref && !approvedRefs!.has(args.ref)) { - if (!warnedRefs.has(args.ref)) { - // First attempt: warn and block - warnedRefs.add(args.ref); - return { - success: false, - action: actionName, - ref: args.ref, - error: FILL_GATE_ERROR, - isRecoverable: true, - }; - } - // Second attempt: agent confirmed this is a navigation/search field, allow it - } - return originalExecute(args, options); - }; - } - } - } - // Merge all tools const allTools = { ...webActionTools, ...searchTools, ...tabstackTools, ...interactiveToolSet }; @@ -510,6 +483,8 @@ export class WebAgent { if (approvedRefs) { approvedRefs.clear(); } + agentFilledRefs.clear(); + operationalRefs.clear(); await this.addPageSnapshot(); } diff --git a/packages/core/test/security/actionFirewall.test.ts b/packages/core/test/security/actionFirewall.test.ts new file mode 100644 index 00000000..ad4fa50e --- /dev/null +++ b/packages/core/test/security/actionFirewall.test.ts @@ -0,0 +1,149 @@ +import { describe, expect, it } from "vitest"; +import type { FieldMetadata, FormSubmissionContext } from "../../src/browser/ariaBrowser.js"; +import { + assessFill, + assessFormSubmission, + SECURITY_BLOCKED_UNAUTHORIZED_FILL, + SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT, +} from "../../src/security/actionFirewall.js"; + +function field(overrides: Partial = {}): FieldMetadata { + return { + ref: "E1", + tagName: "input", + inputType: "text", + role: null, + name: null, + label: null, + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: "form-1", + formAction: "https://example.com/search", + formMethod: "get", + ...overrides, + }; +} + +function form(overrides: Partial = {}): FormSubmissionContext { + return { + submitterRef: "E9", + formId: "form-1", + actionUrl: "https://example.com/submit", + method: "post", + fields: [], + ...overrides, + }; +} + +describe("actionFirewall", () => { + it("allows agent fills for operational search fields", () => { + const result = assessFill({ + field: field({ inputType: "search", label: "Search products" }), + source: "agent", + }); + + expect(result.allowed).toBe(true); + if (!result.allowed) throw new Error("Expected fill to be allowed"); + expect(result.operational).toBe(true); + }); + + it("blocks agent fills for freeform text fields", () => { + const result = assessFill({ + field: field({ label: "Message" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected fill to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_UNAUTHORIZED_FILL); + }); + + it("does not classify fields as operational from label text alone", () => { + const result = assessFill({ + field: field({ inputType: "text", label: "Search products", placeholder: "Search" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + }); + + it("blocks inherently freeform fields even when they have operational roles", () => { + const result = assessFill({ + field: field({ tagName: "textarea", inputType: null, role: "searchbox" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + }); + + it("blocks fields with sensitive autocomplete even when the input type looks operational", () => { + const result = assessFill({ + field: field({ inputType: "url", autocomplete: "url" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + }); + + it("allows user-approved freeform fields", () => { + const result = assessFill({ + field: field({ label: "Message" }), + source: "user-approved", + }); + + expect(result.allowed).toBe(true); + }); + + it("blocks submitting forms with unauthorized agent-filled fields", () => { + const result = assessFormSubmission({ + form: form({ + fields: [ + { + ref: "E1", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(), + }); + + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected submit to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT); + expect(result.reason).not.toContain("do not leak this value"); + }); + + it("allows submitting forms when agent-filled fields are approved or operational", () => { + const result = assessFormSubmission({ + form: form({ + fields: [ + { + ref: "E1", + name: "q", + tagName: "input", + inputType: "search", + autocomplete: null, + }, + { + ref: "E2", + name: "email", + tagName: "input", + inputType: "email", + autocomplete: "email", + }, + ], + }), + approvedRefs: new Set(["E2"]), + agentFilledRefs: new Set(["E1", "E2"]), + operationalRefs: new Set(["E1"]), + }); + + expect(result.allowed).toBe(true); + }); +}); diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts index 34374e74..94a9213c 100644 --- a/packages/core/test/tools/webActionTools.test.ts +++ b/packages/core/test/tools/webActionTools.test.ts @@ -1,6 +1,12 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { createWebActionTools } from "../../src/tools/webActionTools.js"; -import { AriaBrowser, PageAction } from "../../src/browser/ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + PageAction, +} from "../../src/browser/ariaBrowser.js"; import { WebAgentEventEmitter, WebAgentEventType } from "../../src/events.js"; import { LanguageModel } from "ai"; import { z } from "zod"; @@ -30,6 +36,8 @@ class MockBrowser implements AriaBrowser { browserName = "mock-browser"; public url = "https://example.com"; public title = "Example Page"; + public fieldMetadata = new Map(); + public formSubmissionContexts = new Map(); async start(): Promise {} async shutdown(): Promise {} @@ -73,6 +81,32 @@ class MockBrowser implements AriaBrowser { // Mock implementation - can be configured to throw errors for testing } + async getFieldMetadata(ref: string): Promise { + return ( + this.fieldMetadata.get(ref) ?? { + ref, + tagName: "input", + inputType: "search", + role: "searchbox", + name: "q", + label: "Search", + placeholder: "Search", + autocomplete: null, + isContentEditable: false, + formId: "search-form", + formAction: "https://example.com/search", + formMethod: "get", + } + ); + } + + async getFormSubmissionContext( + ref: string, + _trigger?: FormSubmissionTrigger, + ): Promise { + return this.formSubmissionContexts.get(ref) ?? null; + } + async waitForLoadState(): Promise {} async runInTemporaryTab(fn: (tab: any) => Promise): Promise { @@ -273,6 +307,76 @@ describe("Web Action Tools", () => { }); }); + it("should block agent fill of freeform submittable fields", async () => { + mockBrowser.fieldMetadata.set("input1", { + ref: "input1", + tagName: "textarea", + inputType: null, + role: null, + name: "message", + label: "Message", + placeholder: "Message", + autocomplete: null, + isContentEditable: false, + formId: "contact", + formAction: "https://example.com/contact", + formMethod: "post", + }); + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + + const result = await tools.fill.execute({ ref: "input1", value: "generated payload" }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result).toEqual({ + success: false, + action: "fill", + ref: "input1", + error: "Security policy blocked filling a submittable form field without user approval", + isRecoverable: true, + }); + expect(result.value).toBeUndefined(); + }); + + it("should allow approved freeform field fills", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + mockBrowser.fieldMetadata.set("input1", { + ref: "input1", + tagName: "textarea", + inputType: null, + role: null, + name: "message", + label: "Message", + placeholder: "Message", + autocomplete: null, + isContentEditable: false, + formId: "contact", + formAction: "https://example.com/contact", + formMethod: "post", + }); + context.approvedRefs = new Set(["input1"]); + tools = createWebActionTools(context); + + const result = await tools.fill.execute({ ref: "input1", value: "user-provided value" }); + + expect(performActionSpy).toHaveBeenCalledWith( + "input1", + PageAction.Fill, + "user-provided value", + ); + expect(result.success).toBe(true); + }); + + it("should track agent-filled operational refs", async () => { + context.agentFilledRefs = new Set(); + context.operationalRefs = new Set(); + tools = createWebActionTools(context); + + await tools.fill.execute({ ref: "input1", value: "pilo" }); + + expect(context.agentFilledRefs.has("input1")).toBe(true); + expect(context.operationalRefs.has("input1")).toBe(true); + }); + it("should emit browser action events", async () => { const emitSpy = vi.spyOn(eventEmitter, "emit"); @@ -509,6 +613,106 @@ describe("Web Action Tools", () => { expect(invalid.success).toBe(false); }); + it("should block click submit when form contains unauthorized agent-filled values", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["message"]); + context.operationalRefs = new Set(); + context.approvedRefs = new Set(); + mockBrowser.formSubmissionContexts.set("submit1", { + submitterRef: "submit1", + formId: "contact", + actionUrl: "https://example.com/contact", + method: "post", + fields: [ + { + ref: "message", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.click.execute({ ref: "submit1" }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.error).toBe( + "Security policy blocked submitting a form containing unauthorized agent-filled data", + ); + expect(JSON.stringify(result)).not.toContain("generated payload"); + }); + + it("should allow click submit when form fields are approved or operational", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["query", "email"]); + context.operationalRefs = new Set(["query"]); + context.approvedRefs = new Set(["email"]); + mockBrowser.formSubmissionContexts.set("submit1", { + submitterRef: "submit1", + formId: "search", + actionUrl: "https://example.com/search", + method: "get", + fields: [ + { + ref: "query", + name: "q", + tagName: "input", + inputType: "search", + autocomplete: null, + }, + { + ref: "email", + name: "email", + tagName: "input", + inputType: "email", + autocomplete: "email", + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.click.execute({ ref: "submit1" }); + + expect(performActionSpy).toHaveBeenCalledWith("submit1", PageAction.Click, undefined); + expect(result.success).toBe(true); + }); + + it("should block enter submit when form contains unauthorized agent-filled fields", async () => { + const formContextSpy = vi.spyOn(mockBrowser, "getFormSubmissionContext"); + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["message"]); + context.operationalRefs = new Set(); + context.approvedRefs = new Set(); + mockBrowser.formSubmissionContexts.set("input1", { + submitterRef: "input1", + formId: "contact", + actionUrl: "https://example.com/contact", + method: "post", + fields: [ + { + ref: "message", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.enter.execute({ ref: "input1" }); + + expect(formContextSpy).toHaveBeenCalledWith("input1", "enter"); + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.error).toBe( + "Security policy blocked submitting a form containing unauthorized agent-filled data", + ); + }); + it("should execute back action successfully", async () => { const performActionSpy = vi.spyOn(mockBrowser, "performAction"); diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts index 421456b2..afed888a 100644 --- a/packages/core/test/webAgent.test.ts +++ b/packages/core/test/webAgent.test.ts @@ -1,6 +1,12 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { WebAgent, WebAgentOptions } from "../src/webAgent.js"; -import { AriaBrowser, PageAction } from "../src/browser/ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + PageAction, +} from "../src/browser/ariaBrowser.js"; import { WebAgentEventEmitter, WebAgentEventType } from "../src/events.js"; import { LanguageModel, streamText } from "ai"; import { Logger } from "../src/loggers/types.js"; @@ -152,6 +158,8 @@ class MockBrowser implements AriaBrowser { `; private markdown = "# Mock Page\nContent here"; + fieldMetadata = new Map(); + formSubmissionContexts = new Map(); async start(): Promise {} async shutdown(): Promise {} @@ -191,6 +199,32 @@ class MockBrowser implements AriaBrowser { async performAction(_ref: string, _action: PageAction, _value?: string): Promise {} + async getFieldMetadata(ref: string): Promise { + return ( + this.fieldMetadata.get(ref) ?? { + ref, + tagName: "input", + inputType: "search", + role: "searchbox", + name: "q", + label: "Search", + placeholder: "Search", + autocomplete: null, + isContentEditable: false, + formId: "search-form", + formAction: "https://example.com/search", + formMethod: "get", + } + ); + } + + async getFormSubmissionContext( + ref: string, + _trigger?: FormSubmissionTrigger, + ): Promise { + return this.formSubmissionContexts.get(ref) ?? null; + } + async waitForLoadState(): Promise {} async runInTemporaryTab(fn: (tab: any) => Promise): Promise { diff --git a/packages/extension/src/background/ExtensionBrowser.ts b/packages/extension/src/background/ExtensionBrowser.ts index a3ad81c5..db4920ab 100644 --- a/packages/extension/src/background/ExtensionBrowser.ts +++ b/packages/extension/src/background/ExtensionBrowser.ts @@ -1,5 +1,10 @@ import browser from "webextension-polyfill"; -import type { AriaBrowser } from "pilo-core/core"; +import type { + AriaBrowser, + FieldMetadata, + FormSubmissionContext, + FormSubmissionTrigger, +} from "pilo-core/core"; import { PageAction, LoadState } from "pilo-core/core"; import type { Tabs } from "webextension-polyfill"; import { createLogger } from "../shared/utils/logger"; @@ -302,6 +307,201 @@ export class ExtensionBrowser implements AriaBrowser { } } + async getFieldMetadata(ref: string): Promise { + const tab = await this.getActiveTab(); + await this.ensureContentScript(); + + const [{ result }] = await browser.scripting.executeScript({ + target: { tabId: tab.id! }, + func: (elementRef: string) => { + const element = document.querySelector(`[data-pilo-ref="${elementRef}"]`); + if (!(element instanceof HTMLElement)) { + throw new Error(`Element with ref ${elementRef} not found in DOM`); + } + + const input = element instanceof HTMLInputElement ? element : null; + const form = getElementForm(element); + + return { + ref: elementRef, + tagName: element.tagName.toLowerCase(), + inputType: input?.type?.toLowerCase() ?? null, + role: element.getAttribute("role"), + name: getElementName(element), + label: getElementLabel(element), + placeholder: getElementPlaceholder(element), + autocomplete: getElementAutocomplete(element), + isContentEditable: element.isContentEditable, + formId: form?.id || null, + formAction: form?.action || null, + formMethod: form?.method?.toLowerCase() || null, + }; + + function getElementForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.form; + } + return node.closest("form"); + } + + function getElementName(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.name || null; + } + return node.getAttribute("name"); + } + + function getElementLabel(node: HTMLElement): string | null { + const ariaLabel = node.getAttribute("aria-label"); + if (ariaLabel?.trim()) return ariaLabel.trim(); + + const labelledBy = node.getAttribute("aria-labelledby"); + if (labelledBy) { + const text = labelledBy + .split(/\s+/) + .map((id) => node.ownerDocument.getElementById(id)?.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + if ("labels" in node) { + const labels = (node as HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement) + .labels; + const text = Array.from(labels || []) + .map((label) => label.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + return null; + } + + function getElementPlaceholder(node: HTMLElement): string | null { + if (node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement) { + return node.placeholder || null; + } + return null; + } + + function getElementAutocomplete(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.autocomplete || null; + } + return null; + } + }, + args: [ref], + }); + + return result as FieldMetadata; + } + + async getFormSubmissionContext( + ref: string, + trigger: FormSubmissionTrigger = "click", + ): Promise { + const tab = await this.getActiveTab(); + await this.ensureContentScript(); + + const [{ result }] = await browser.scripting.executeScript({ + target: { tabId: tab.id! }, + func: (paramsJson: string) => { + const { ref: submitterRef, trigger: submitTrigger } = JSON.parse(paramsJson) as { + ref: string; + trigger: FormSubmissionTrigger; + }; + const element = document.querySelector(`[data-pilo-ref="${submitterRef}"]`); + if (!(element instanceof HTMLElement)) { + throw new Error(`Element with ref ${submitterRef} not found in DOM`); + } + if (!canSubmitForm(element, submitTrigger)) return null; + + const form = getSubmissionForm(element); + if (!form) return null; + + const fields = Array.from(form.elements) + .filter( + (field): field is HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement => + field instanceof HTMLInputElement || + field instanceof HTMLTextAreaElement || + field instanceof HTMLSelectElement, + ) + .filter((field) => !field.disabled) + .map((field) => ({ + ref: field.getAttribute("data-pilo-ref"), + name: field.name || null, + tagName: field.tagName.toLowerCase(), + inputType: field instanceof HTMLInputElement ? field.type.toLowerCase() : null, + autocomplete: "autocomplete" in field ? field.autocomplete || null : null, + })); + + return { + submitterRef, + formId: form.id || null, + actionUrl: form.action || null, + method: form.method?.toLowerCase() || null, + fields, + }; + + function getSubmissionForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLButtonElement || + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.form; + } + return node.closest("form"); + } + + function canSubmitForm(node: HTMLElement, submitTrigger: FormSubmissionTrigger): boolean { + if (submitTrigger === "click") { + if (node instanceof HTMLButtonElement) return node.type === "submit"; + if (node instanceof HTMLInputElement) { + return node.type === "submit" || node.type === "image"; + } + return false; + } + + if (node instanceof HTMLTextAreaElement || node instanceof HTMLSelectElement) + return false; + if (!(node instanceof HTMLInputElement)) return false; + return ![ + "button", + "checkbox", + "color", + "file", + "hidden", + "radio", + "range", + "reset", + "submit", + ].includes(node.type); + } + }, + args: [JSON.stringify({ ref, trigger })], + }); + + return result as FormSubmissionContext | null; + } + async performAction(ref: string, action: PageAction, value?: string): Promise { console.log( `ExtensionBrowser: performAction() called with ref: ${ref}, action: ${action}, value: ${value}`, From 1c8e6014ff3862b05d0afaaed9ee4de62b3361e0 Mon Sep 17 00:00:00 2001 From: sbrooke Date: Tue, 26 May 2026 14:32:22 -0400 Subject: [PATCH 4/7] fix(core): require action provenance tracking --- packages/core/src/tools/webActionTools.ts | 16 ++++++++++------ packages/core/test/tools/webActionTools.test.ts | 12 ++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts index ea1bbb88..6a9dcd1a 100644 --- a/packages/core/src/tools/webActionTools.ts +++ b/packages/core/src/tools/webActionTools.ts @@ -27,8 +27,8 @@ interface WebActionContext { providerConfig: ProviderConfig; abortSignal?: AbortSignal; approvedRefs?: { has(ref: string): boolean }; - agentFilledRefs?: Set; - operationalRefs?: Set; + agentFilledRefs: Set; + operationalRefs: Set; } /** @@ -112,8 +112,8 @@ async function assessFormSubmissionForAction( const assessment = assessFormSubmission({ form, approvedRefs: context.approvedRefs ?? EMPTY_APPROVED_REFS, - agentFilledRefs: context.agentFilledRefs ?? new Set(), - operationalRefs: context.operationalRefs ?? new Set(), + agentFilledRefs: context.agentFilledRefs, + operationalRefs: context.operationalRefs, }); if (!assessment.allowed) { @@ -223,6 +223,10 @@ async function performActionWithValidation( } export function createWebActionTools(context: WebActionContext) { + if (!context.agentFilledRefs || !context.operationalRefs) { + throw new Error("Web action provenance tracking sets are required"); + } + return { click: tool({ description: TOOL_STRINGS.webActions.click.description, @@ -258,9 +262,9 @@ export function createWebActionTools(context: WebActionContext) { const result = await performActionWithValidation(PageAction.Fill, context, ref, value); if (result.success && !userApproved) { - context.agentFilledRefs?.add(ref); + context.agentFilledRefs.add(ref); if (assessment.operational) { - context.operationalRefs?.add(ref); + context.operationalRefs.add(ref); } } return result; diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts index 94a9213c..bae69ca0 100644 --- a/packages/core/test/tools/webActionTools.test.ts +++ b/packages/core/test/tools/webActionTools.test.ts @@ -139,6 +139,8 @@ describe("Web Action Tools", () => { eventEmitter, providerConfig: { model: mockProvider }, abortSignal: undefined, + agentFilledRefs: new Set(), + operationalRefs: new Set(), }; tools = createWebActionTools(context); @@ -149,6 +151,16 @@ describe("Web Action Tools", () => { }); describe("Tool Structure", () => { + it("should require provenance tracking sets", () => { + expect(() => + createWebActionTools({ + browser: mockBrowser, + eventEmitter, + providerConfig: { model: mockProvider }, + } as any), + ).toThrow("Web action provenance tracking sets are required"); + }); + it("should create all expected tools", () => { expect(tools).toBeDefined(); expect(tools.click).toBeDefined(); From 55c48eb1a7c274994fb963eee8dc9b55bf5f59c5 Mon Sep 17 00:00:00 2001 From: sbrooke Date: Tue, 26 May 2026 14:43:01 -0400 Subject: [PATCH 5/7] fix(core): preserve form refs after fill actions --- packages/core/src/webAgent.ts | 9 +++- packages/core/test/webAgent.test.ts | 71 +++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index 20a2ffce..2da86138 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -1045,8 +1045,7 @@ export class WebAgent { throw new Error(actionOutput.error); } - // Determine if page changed (most actions change the page, except extract and webSearch) - const pageChanged = actionOutput.action !== "extract" && actionOutput.action !== "webSearch"; + const pageChanged = WebAgent.shouldRefreshPageSnapshotAfterAction(actionOutput.action); // Check for terminal actions if (actionOutput.isTerminal) { @@ -1114,6 +1113,12 @@ export class WebAgent { }; } + private static readonly ACTIONS_WITHOUT_PAGE_REFRESH = new Set(["extract", "webSearch", "fill"]); + + private static shouldRefreshPageSnapshotAfterAction(action: string): boolean { + return !WebAgent.ACTIONS_WITHOUT_PAGE_REFRESH.has(action); + } + /** * Check for repeated actions and handle accordingly * @returns Action result if intervention is needed, null otherwise diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts index afed888a..0bf0ec12 100644 --- a/packages/core/test/webAgent.test.ts +++ b/packages/core/test/webAgent.test.ts @@ -897,6 +897,77 @@ describe("WebAgent", () => { expect(navigatedEvent?.data.url).toBe(startingUrl); }); + it("should keep the same snapshot after fill so form refs remain valid for submit", async () => { + mockGenerateTextWithRetry.mockResolvedValueOnce({ + text: "Planning", + toolResults: [ + { + type: "tool-result", + toolCallId: "plan_1", + toolName: "create_plan", + output: { + successCriteria: "Fill then submit", + plan: "1. Fill the form\n2. Submit the form", + }, + }, + ], + } as any); + + const snapshotSpy = vi.spyOn(mockBrowser, "getTreeWithRefs"); + + mockStreamText.mockReturnValueOnce( + createMockStreamResponse({ + text: "Fill", + toolResults: [ + { + type: "tool-result", + toolCallId: "fill_1", + toolName: "fill", + input: { ref: "input1", value: "context" }, + output: { + success: true, + action: "fill", + ref: "input1", + value: "context", + }, + }, + ], + response: { + messages: [{ role: "assistant", content: "Fill" }], + }, + }) as any, + ); + + mockStreamText.mockReturnValueOnce( + createMockStreamResponse({ + text: "Done", + toolResults: [ + { + type: "tool-result", + toolCallId: "done_1", + toolName: "done", + input: { result: "Complete" }, + output: { + success: true, + action: "done", + result: "Complete", + isTerminal: true, + }, + }, + ], + response: { + messages: [{ role: "assistant", content: "Done" }], + }, + }) as any, + ); + + mockGenerateTextWithRetry.mockResolvedValueOnce(mockValidationResponse("complete")); + + await webAgent.execute("Fill then submit", { startingUrl: "https://example.com" }); + + expect(snapshotSpy).toHaveBeenCalledTimes(1); + }); + it("should pass webSearchEnabled to planning prompt when search provider is set", async () => { // Create a WebAgent with a search provider enabled const searchAgent = new WebAgent(mockBrowser, { From 09650d5a8fde780b749c0eb5df5ae096aed0143f Mon Sep 17 00:00:00 2001 From: sbrooke Date: Tue, 26 May 2026 14:49:14 -0400 Subject: [PATCH 6/7] refactor(core): simplify action firewall helpers --- packages/core/src/security/actionFirewall.ts | 8 ++-- packages/core/src/tools/interactiveTools.ts | 16 +------- packages/core/src/tools/webActionTools.ts | 40 +++++--------------- 3 files changed, 14 insertions(+), 50 deletions(-) diff --git a/packages/core/src/security/actionFirewall.ts b/packages/core/src/security/actionFirewall.ts index 8718a754..ccbce00b 100644 --- a/packages/core/src/security/actionFirewall.ts +++ b/packages/core/src/security/actionFirewall.ts @@ -27,7 +27,7 @@ const OPERATIONAL_INPUT_TYPES = new Set([ const OPERATIONAL_ROLES = new Set(["searchbox", "combobox", "spinbutton", "slider"]); -const SENSITIVE_AUTOCOMPLETE_TOKENS = [ +const SENSITIVE_AUTOCOMPLETE_TOKENS = new Set([ "name", "honorific-prefix", "given-name", @@ -81,7 +81,7 @@ const SENSITIVE_AUTOCOMPLETE_TOKENS = [ "impp", "url", "photo", -]; +]); export function assessFill(input: { field: FieldMetadata; @@ -104,7 +104,7 @@ export function assessFill(input: { export function assessFormSubmission(input: { form: FormSubmissionContext; - approvedRefs: { has(ref: string): boolean }; + approvedRefs: ReadonlySet; agentFilledRefs: ReadonlySet; operationalRefs: ReadonlySet; }): ActionFirewallResult { @@ -136,5 +136,5 @@ function isOperationalField(field: FieldMetadata): boolean { function hasSensitiveAutocomplete(autocomplete: string | null): boolean { if (!autocomplete) return false; const tokens = autocomplete.toLowerCase().split(/\s+/); - return tokens.some((token) => SENSITIVE_AUTOCOMPLETE_TOKENS.includes(token)); + return tokens.some((token) => SENSITIVE_AUTOCOMPLETE_TOKENS.has(token)); } diff --git a/packages/core/src/tools/interactiveTools.ts b/packages/core/src/tools/interactiveTools.ts index 9f215e18..4869c679 100644 --- a/packages/core/src/tools/interactiveTools.ts +++ b/packages/core/src/tools/interactiveTools.ts @@ -26,21 +26,7 @@ interface InteractiveToolContext { * Used by the fill gate to prevent the agent from filling form fields with * generated data when interactive mode is on. */ -export class ApprovedRefs { - private refs = new Set(); - - add(ref: string): void { - this.refs.add(ref); - } - - has(ref: string): boolean { - return this.refs.has(ref); - } - - clear(): void { - this.refs.clear(); - } -} +export class ApprovedRefs extends Set {} /** * Maps field types from the request schema to the appropriate browser action. diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts index 6a9dcd1a..9e8f113d 100644 --- a/packages/core/src/tools/webActionTools.ts +++ b/packages/core/src/tools/webActionTools.ts @@ -26,7 +26,7 @@ interface WebActionContext { eventEmitter: WebAgentEventEmitter; providerConfig: ProviderConfig; abortSignal?: AbortSignal; - approvedRefs?: { has(ref: string): boolean }; + approvedRefs?: ReadonlySet; agentFilledRefs: Set; operationalRefs: Set; } @@ -49,37 +49,14 @@ type ActionResult = { isRecoverable?: boolean; }; -const EMPTY_APPROVED_REFS = { has: () => false }; +const EMPTY_APPROVED_REFS = new Set(); -function recoverableBrowserErrorResult( - action: string, - error: BrowserException, - context: WebActionContext, - ref?: string, - value?: string | number, -): ActionResult { - context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { - success: false, - action, - error: error.message, - isRecoverable: true, - }); - - return { - success: false, - action, - ...(ref && { ref }), - ...(value !== undefined && { value }), - error: error.message, - isRecoverable: true, - }; -} - -function securityBlockedResult( +function failedActionResult( action: string, error: string, context: WebActionContext, ref?: string, + value?: string | number, ): ActionResult { context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { success: false, @@ -92,6 +69,7 @@ function securityBlockedResult( success: false, action, ...(ref && { ref }), + ...(value !== undefined && { value }), error, isRecoverable: true, }; @@ -117,11 +95,11 @@ async function assessFormSubmissionForAction( }); if (!assessment.allowed) { - return securityBlockedResult(action, assessment.reason, context, ref); + return failedActionResult(action, assessment.reason, context, ref); } } catch (error) { if (error instanceof BrowserException) { - return recoverableBrowserErrorResult(action, error, context, ref); + return failedActionResult(action, error.message, context, ref); } throw error; } @@ -257,7 +235,7 @@ export function createWebActionTools(context: WebActionContext) { }); if (!assessment.allowed) { - return securityBlockedResult(PageAction.Fill, assessment.reason, context, ref); + return failedActionResult(PageAction.Fill, assessment.reason, context, ref); } const result = await performActionWithValidation(PageAction.Fill, context, ref, value); @@ -270,7 +248,7 @@ export function createWebActionTools(context: WebActionContext) { return result; } catch (error) { if (error instanceof BrowserException) { - return recoverableBrowserErrorResult(PageAction.Fill, error, context, ref); + return failedActionResult(PageAction.Fill, error.message, context, ref); } throw error; } From b0124aa3cb076b79edc21bc69e0970ac798d3d2a Mon Sep 17 00:00:00 2001 From: sbrooke Date: Tue, 26 May 2026 14:58:18 -0400 Subject: [PATCH 7/7] docs(core): document action firewall state invariants --- packages/core/src/webAgent.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index 2da86138..f751d177 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -480,6 +480,8 @@ export class WebAgent { if (needsPageSnapshot) { // Clear approved refs when page changes: ARIA refs reset on each snapshot, // so old ref strings may now point to different DOM elements. + // Recoverable blocked action errors deliberately keep needsPageSnapshot=false + // so a blocked submit retry remains tied to the same agent-filled refs. if (approvedRefs) { approvedRefs.clear(); } @@ -1113,6 +1115,9 @@ export class WebAgent { }; } + // Fill keeps the current snapshot so refs and agent-filled provenance remain + // valid for a following submit check. This trades off immediate visibility + // into dynamic validation UI until a later action refreshes the snapshot. private static readonly ACTIONS_WITHOUT_PAGE_REFRESH = new Set(["extract", "webSearch", "fill"]); private static shouldRefreshPageSnapshotAfterAction(action: string): boolean {