diff --git a/.gitleaksignore b/.gitleaksignore index d5c038bc..d782fc0b 100644 --- a/.gitleaksignore +++ b/.gitleaksignore @@ -4,3 +4,11 @@ # False positive: test value in historical commit (fixed in current code) 12323684c2a470321a34fea845a9556eb8b644d1:test/cli/provider.test.ts:generic-api-key:223 + +# False positives: OpenRouter test values in historical commits +6d7d33837971d7976864be4ab0642c2f5938997e:packages/cli/test/extensionConfig.test.ts:generic-api-key:74 +6d7d33837971d7976864be4ab0642c2f5938997e:packages/cli/test/extensionConfig.test.ts:generic-api-key:80 +6d7d33837971d7976864be4ab0642c2f5938997e:packages/cli/test/extensionConfig.test.ts:generic-api-key:106 +cbd8a7a9fb3fd1bba93b68f888a1a4246a243405:packages/cli/test/extensionConfig.test.ts:generic-api-key:74 +cbd8a7a9fb3fd1bba93b68f888a1a4246a243405:packages/cli/test/extensionConfig.test.ts:generic-api-key:80 +cbd8a7a9fb3fd1bba93b68f888a1a4246a243405:packages/cli/test/extensionConfig.test.ts:generic-api-key:106 diff --git a/packages/core/src/browser/ariaBrowser.ts b/packages/core/src/browser/ariaBrowser.ts index ca1abacd..0ec38656 100644 --- a/packages/core/src/browser/ariaBrowser.ts +++ b/packages/core/src/browser/ariaBrowser.ts @@ -57,6 +57,39 @@ export interface TemporaryTab { waitForLoadState(state: LoadState, options?: { timeout?: number }): Promise; } +export interface FieldMetadata { + ref: string; + tagName: string; + inputType: string | null; + role: string | null; + name: string | null; + label: string | null; + placeholder: string | null; + autocomplete: string | null; + isContentEditable: boolean; + formId: string | null; + formAction: string | null; + formMethod: string | null; +} + +export interface FormFieldState { + ref: string | null; + name: string | null; + tagName: string; + inputType: string | null; + autocomplete: string | null; +} + +export interface FormSubmissionContext { + submitterRef: string; + formId: string | null; + actionUrl: string | null; + method: string | null; + fields: FormFieldState[]; +} + +export type FormSubmissionTrigger = "click" | "enter"; + export interface AriaBrowser { /** The name of the browser being used */ browserName: string; @@ -99,6 +132,15 @@ export interface AriaBrowser { */ performAction(ref: string, action: PageAction, value?: string): Promise; + /** Returns structural metadata for an element ref used in form/action policy checks. */ + getFieldMetadata(ref: string): Promise; + + /** Returns the form that would be submitted by activating this ref, if any. */ + getFormSubmissionContext( + ref: string, + trigger?: FormSubmissionTrigger, + ): Promise; + /** * Waits for a specific load state of the page * @param state The load state to wait for diff --git a/packages/core/src/browser/playwrightBrowser.ts b/packages/core/src/browser/playwrightBrowser.ts index 510f2a71..8a7cb3c5 100644 --- a/packages/core/src/browser/playwrightBrowser.ts +++ b/packages/core/src/browser/playwrightBrowser.ts @@ -11,7 +11,15 @@ import { Locator, errors as playwrightErrors, } from "playwright"; -import { AriaBrowser, PageAction, LoadState, TemporaryTab } from "./ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + LoadState, + PageAction, + TemporaryTab, +} from "./ariaBrowser.js"; import { PlaywrightBlocker } from "@ghostery/adblocker-playwright"; import fetch from "cross-fetch"; import TurndownService from "turndown"; @@ -788,6 +796,199 @@ export class PlaywrightBrowser implements AriaBrowser { return locator; } + async getFieldMetadata(ref: string): Promise { + const locator = await this.validateElementRef(ref); + + try { + return await locator.evaluate((element, elementRef): FieldMetadata => { + const el = element as HTMLElement; + const input = el instanceof HTMLInputElement ? el : null; + const form = getElementForm(el); + + return { + ref: elementRef, + tagName: el.tagName.toLowerCase(), + inputType: input?.type?.toLowerCase() ?? null, + role: el.getAttribute("role"), + name: getElementName(el), + label: getElementLabel(el), + placeholder: getElementPlaceholder(el), + autocomplete: getElementAutocomplete(el), + isContentEditable: el.isContentEditable, + formId: form?.id || null, + formAction: form?.action || null, + formMethod: form?.method?.toLowerCase() || null, + }; + + function getElementForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.form; + } + return node.closest("form"); + } + + function getElementName(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.name || null; + } + return node.getAttribute("name"); + } + + function getElementLabel(node: HTMLElement): string | null { + const ariaLabel = node.getAttribute("aria-label"); + if (ariaLabel?.trim()) return ariaLabel.trim(); + + const labelledBy = node.getAttribute("aria-labelledby"); + if (labelledBy) { + const text = labelledBy + .split(/\s+/) + .map((id) => node.ownerDocument.getElementById(id)?.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + if ("labels" in node) { + const labels = (node as HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement) + .labels; + const text = Array.from(labels || []) + .map((label) => label.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + return null; + } + + function getElementPlaceholder(node: HTMLElement): string | null { + if (node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement) { + return node.placeholder || null; + } + return null; + } + + function getElementAutocomplete(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.autocomplete || null; + } + return null; + } + }, ref); + } catch (error) { + throw new BrowserActionException( + "getFieldMetadata", + `Failed to get field metadata: ${error instanceof Error ? error.message : String(error)}`, + { ref, originalError: error }, + ); + } + } + + async getFormSubmissionContext( + ref: string, + trigger: FormSubmissionTrigger = "click", + ): Promise { + const locator = await this.validateElementRef(ref); + + try { + return await locator.evaluate( + (element, { submitterRef, trigger }): FormSubmissionContext | null => { + const el = element as HTMLElement; + if (!canSubmitForm(el, trigger)) return null; + + const form = getSubmissionForm(el); + if (!form) return null; + + const fields = Array.from(form.elements) + .filter( + (field): field is HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement => + field instanceof HTMLInputElement || + field instanceof HTMLTextAreaElement || + field instanceof HTMLSelectElement, + ) + .filter((field) => !field.disabled) + .map((field) => ({ + ref: field.getAttribute("data-pilo-ref"), + name: field.name || null, + tagName: field.tagName.toLowerCase(), + inputType: field instanceof HTMLInputElement ? field.type.toLowerCase() : null, + autocomplete: "autocomplete" in field ? field.autocomplete || null : null, + })); + + return { + submitterRef, + formId: form.id || null, + actionUrl: form.action || null, + method: form.method?.toLowerCase() || null, + fields, + }; + + function getSubmissionForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLButtonElement || + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.form; + } + return node.closest("form"); + } + + function canSubmitForm(node: HTMLElement, submitTrigger: FormSubmissionTrigger): boolean { + if (submitTrigger === "click") { + if (node instanceof HTMLButtonElement) { + return node.type === "submit"; + } + if (node instanceof HTMLInputElement) { + return node.type === "submit" || node.type === "image"; + } + return false; + } + + if (node instanceof HTMLTextAreaElement || node instanceof HTMLSelectElement) + return false; + if (!(node instanceof HTMLInputElement)) return false; + return ![ + "button", + "checkbox", + "color", + "file", + "hidden", + "radio", + "range", + "reset", + "submit", + ].includes(node.type); + } + }, + { submitterRef: ref, trigger }, + ); + } catch (error) { + throw new BrowserActionException( + "getFormSubmissionContext", + `Failed to get form submission context: ${ + error instanceof Error ? error.message : String(error) + }`, + { ref, trigger, originalError: error }, + ); + } + } + async performAction(ref: string, action: PageAction, value?: string): Promise { if (!this.page) throw new Error("Browser not started"); return withSpan( diff --git a/packages/core/src/core.ts b/packages/core/src/core.ts index 75e4f2c7..a63d4b25 100644 --- a/packages/core/src/core.ts +++ b/packages/core/src/core.ts @@ -5,7 +5,12 @@ */ export { WebAgent } from "./webAgent.js"; -export type { AriaBrowser } from "./browser/ariaBrowser.js"; +export type { + AriaBrowser, + FieldMetadata, + FormSubmissionContext, + FormSubmissionTrigger, +} from "./browser/ariaBrowser.js"; export { PageAction, LoadState } from "./browser/ariaBrowser.js"; export type { TaskExecutionResult, TaskError, WebAgentOptions } from "./webAgent.js"; export { TaskErrorCode } from "./webAgent.js"; @@ -52,6 +57,8 @@ export type { Action, TaskValidationResult } from "./schemas.js"; export { RecoverableError, BrowserException, + BrowserActionException, + InvalidRefException, NavigationTimeoutException, PlanningError, NoStartingUrlError, diff --git a/packages/core/src/security/actionFirewall.ts b/packages/core/src/security/actionFirewall.ts new file mode 100644 index 00000000..ccbce00b --- /dev/null +++ b/packages/core/src/security/actionFirewall.ts @@ -0,0 +1,140 @@ +import type { FieldMetadata, FormSubmissionContext } from "../browser/ariaBrowser.js"; + +export const SECURITY_BLOCKED_UNAUTHORIZED_FILL = + "Security policy blocked filling a submittable form field without user approval"; + +export const SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT = + "Security policy blocked submitting a form containing unauthorized agent-filled data"; + +export type FillSource = "agent" | "user-approved"; + +export type ActionFirewallResult = + | { allowed: true; operational?: boolean } + | { allowed: false; reason: string; isRecoverable: true }; + +const OPERATIONAL_INPUT_TYPES = new Set([ + "search", + "url", + "number", + "date", + "datetime-local", + "month", + "time", + "week", + "color", + "range", +]); + +const OPERATIONAL_ROLES = new Set(["searchbox", "combobox", "spinbutton", "slider"]); + +const SENSITIVE_AUTOCOMPLETE_TOKENS = new Set([ + "name", + "honorific-prefix", + "given-name", + "additional-name", + "family-name", + "honorific-suffix", + "nickname", + "email", + "username", + "new-password", + "current-password", + "one-time-code", + "organization", + "street-address", + "address-line1", + "address-line2", + "address-line3", + "address-level1", + "address-level2", + "address-level3", + "address-level4", + "country", + "country-name", + "postal-code", + "cc-name", + "cc-given-name", + "cc-additional-name", + "cc-family-name", + "cc-number", + "cc-exp", + "cc-exp-month", + "cc-exp-year", + "cc-csc", + "cc-type", + "transaction-currency", + "transaction-amount", + "language", + "bday", + "bday-day", + "bday-month", + "bday-year", + "sex", + "tel", + "tel-country-code", + "tel-national", + "tel-area-code", + "tel-local", + "tel-local-prefix", + "tel-local-suffix", + "tel-extension", + "impp", + "url", + "photo", +]); + +export function assessFill(input: { + field: FieldMetadata; + source: FillSource; +}): ActionFirewallResult { + if (input.source === "user-approved") { + return { allowed: true }; + } + + if (isOperationalField(input.field)) { + return { allowed: true, operational: true }; + } + + return { + allowed: false, + reason: SECURITY_BLOCKED_UNAUTHORIZED_FILL, + isRecoverable: true, + }; +} + +export function assessFormSubmission(input: { + form: FormSubmissionContext; + approvedRefs: ReadonlySet; + agentFilledRefs: ReadonlySet; + operationalRefs: ReadonlySet; +}): ActionFirewallResult { + for (const field of input.form.fields) { + if (!field.ref || !input.agentFilledRefs.has(field.ref)) continue; + if (input.approvedRefs.has(field.ref) || input.operationalRefs.has(field.ref)) continue; + + return { + allowed: false, + reason: SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT, + isRecoverable: true, + }; + } + + return { allowed: true }; +} + +function isOperationalField(field: FieldMetadata): boolean { + const inputType = field.inputType?.toLowerCase() ?? null; + const role = field.role?.toLowerCase() ?? null; + + if (hasSensitiveAutocomplete(field.autocomplete)) return false; + if (field.tagName.toLowerCase() === "textarea" || field.isContentEditable) return false; + if (inputType && OPERATIONAL_INPUT_TYPES.has(inputType)) return true; + if (role && OPERATIONAL_ROLES.has(role)) return true; + return false; +} + +function hasSensitiveAutocomplete(autocomplete: string | null): boolean { + if (!autocomplete) return false; + const tokens = autocomplete.toLowerCase().split(/\s+/); + return tokens.some((token) => SENSITIVE_AUTOCOMPLETE_TOKENS.has(token)); +} diff --git a/packages/core/src/tools/interactiveTools.ts b/packages/core/src/tools/interactiveTools.ts index 9f215e18..4869c679 100644 --- a/packages/core/src/tools/interactiveTools.ts +++ b/packages/core/src/tools/interactiveTools.ts @@ -26,21 +26,7 @@ interface InteractiveToolContext { * Used by the fill gate to prevent the agent from filling form fields with * generated data when interactive mode is on. */ -export class ApprovedRefs { - private refs = new Set(); - - add(ref: string): void { - this.refs.add(ref); - } - - has(ref: string): boolean { - return this.refs.has(ref); - } - - clear(): void { - this.refs.clear(); - } -} +export class ApprovedRefs extends Set {} /** * Maps field types from the request schema to the appropriate browser action. diff --git a/packages/core/src/tools/webActionTools.ts b/packages/core/src/tools/webActionTools.ts index f1f415de..9e8f113d 100644 --- a/packages/core/src/tools/webActionTools.ts +++ b/packages/core/src/tools/webActionTools.ts @@ -13,6 +13,7 @@ import { buildExtractionPrompt, TOOL_STRINGS } from "../prompts.js"; import type { ProviderConfig } from "../provider.js"; import { BrowserException } from "../errors.js"; import { generateTextWithRetry } from "../utils/retry.js"; +import { assessFill, assessFormSubmission } from "../security/actionFirewall.js"; import { withSpan, SpanStatusCode, @@ -25,6 +26,9 @@ interface WebActionContext { eventEmitter: WebAgentEventEmitter; providerConfig: ProviderConfig; abortSignal?: AbortSignal; + approvedRefs?: ReadonlySet; + agentFilledRefs: Set; + operationalRefs: Set; } /** @@ -45,6 +49,64 @@ type ActionResult = { isRecoverable?: boolean; }; +const EMPTY_APPROVED_REFS = new Set(); + +function failedActionResult( + action: string, + error: string, + context: WebActionContext, + ref?: string, + value?: string | number, +): ActionResult { + context.eventEmitter.emit(WebAgentEventType.BROWSER_ACTION_COMPLETED, { + success: false, + action, + error, + isRecoverable: true, + }); + + return { + success: false, + action, + ...(ref && { ref }), + ...(value !== undefined && { value }), + error, + isRecoverable: true, + }; +} + +async function assessFormSubmissionForAction( + action: PageAction.Click | PageAction.Enter, + context: WebActionContext, + ref: string, +): Promise { + try { + const form = await context.browser.getFormSubmissionContext( + ref, + action === PageAction.Click ? "click" : "enter", + ); + if (!form) return null; + + const assessment = assessFormSubmission({ + form, + approvedRefs: context.approvedRefs ?? EMPTY_APPROVED_REFS, + agentFilledRefs: context.agentFilledRefs, + operationalRefs: context.operationalRefs, + }); + + if (!assessment.allowed) { + return failedActionResult(action, assessment.reason, context, ref); + } + } catch (error) { + if (error instanceof BrowserException) { + return failedActionResult(action, error.message, context, ref); + } + throw error; + } + + return null; +} + /** * Helper function to perform an action with full error handling and logging * Handles browser exceptions and converts them to recoverable errors for the agent @@ -139,6 +201,10 @@ async function performActionWithValidation( } export function createWebActionTools(context: WebActionContext) { + if (!context.agentFilledRefs || !context.operationalRefs) { + throw new Error("Web action provenance tracking sets are required"); + } + return { click: tool({ description: TOOL_STRINGS.webActions.click.description, @@ -146,6 +212,9 @@ export function createWebActionTools(context: WebActionContext) { ref: z.string().describe(TOOL_STRINGS.webActions.common.elementRef), }), execute: async ({ ref }) => { + const blocked = await assessFormSubmissionForAction(PageAction.Click, context, ref); + if (blocked) return blocked; + return await performActionWithValidation(PageAction.Click, context, ref); }, }), @@ -157,7 +226,32 @@ export function createWebActionTools(context: WebActionContext) { value: z.string().describe(TOOL_STRINGS.webActions.common.textValue), }), execute: async ({ ref, value }) => { - return await performActionWithValidation(PageAction.Fill, context, ref, value); + try { + const metadata = await context.browser.getFieldMetadata(ref); + const userApproved = Boolean(context.approvedRefs?.has(ref)); + const assessment = assessFill({ + field: metadata, + source: userApproved ? "user-approved" : "agent", + }); + + if (!assessment.allowed) { + return failedActionResult(PageAction.Fill, assessment.reason, context, ref); + } + + const result = await performActionWithValidation(PageAction.Fill, context, ref, value); + if (result.success && !userApproved) { + context.agentFilledRefs.add(ref); + if (assessment.operational) { + context.operationalRefs.add(ref); + } + } + return result; + } catch (error) { + if (error instanceof BrowserException) { + return failedActionResult(PageAction.Fill, error.message, context, ref); + } + throw error; + } }, }), @@ -218,6 +312,9 @@ export function createWebActionTools(context: WebActionContext) { ref: z.string().describe(TOOL_STRINGS.webActions.common.elementRef), }), execute: async ({ ref }) => { + const blocked = await assessFormSubmissionForAction(PageAction.Enter, context, ref); + if (blocked) return blocked; + return await performActionWithValidation(PageAction.Enter, context, ref); }, }), diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index 05d68a56..f751d177 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -44,7 +44,7 @@ import { SearchService } from "./search/searchService.js"; import { createPlanningTools } from "./tools/planningTools.js"; import { createValidationTools } from "./tools/validationTools.js"; import { createTabstackTools } from "./tools/tabstackTools.js"; -import { createInteractiveTools, ApprovedRefs, FILL_GATE_ERROR } from "./tools/interactiveTools.js"; +import { createInteractiveTools, ApprovedRefs } from "./tools/interactiveTools.js"; import { createTabstackClient } from "./tabstack/client.js"; import type { UserDataCallback } from "./types/interactive.js"; import { nanoid } from "nanoid"; @@ -388,12 +388,30 @@ export class WebAgent { task: string, executionState: ExecutionState, ): Promise<{ success: boolean; finalAnswer: string | null; error?: TaskError }> { + // Only include interactive tools if a callback is provided + let interactiveToolSet: Record = {}; + let approvedRefs: ApprovedRefs | null = null; + const agentFilledRefs = new Set(); + const operationalRefs = new Set(); + if (this.onUserDataRequired) { + const result = createInteractiveTools({ + callback: this.onUserDataRequired, + browser: this.browser, + eventEmitter: this.eventEmitter, + }); + interactiveToolSet = result.tools; + approvedRefs = result.approvedRefs; + } + // Setup tools once const webActionTools = createWebActionTools({ browser: this.browser, eventEmitter: this.eventEmitter, providerConfig: this.providerConfig, abortSignal: this.abortSignal, + approvedRefs: approvedRefs ?? undefined, + agentFilledRefs, + operationalRefs, }); // Only include search tools if a search service was created @@ -409,51 +427,6 @@ export class WebAgent { }) : {}; - // Only include interactive tools if a callback is provided - let interactiveToolSet: Record = {}; - let approvedRefs: ApprovedRefs | null = null; - if (this.onUserDataRequired) { - const result = createInteractiveTools({ - callback: this.onUserDataRequired, - browser: this.browser, - eventEmitter: this.eventEmitter, - }); - interactiveToolSet = result.tools; - approvedRefs = result.approvedRefs; - } - - // When interactive mode is on, gate fill/select/check to require approved refs. - // On first unapproved attempt, return an error. If the agent retries the same ref - // (indicating it's a navigation/search field, not a user-data form field), allow it - // through on the second attempt to avoid a deadlock. - if (approvedRefs) { - const warnedRefs = new Set(); - const gatedActions = ["fill", "select", "check"] as const; - for (const actionName of gatedActions) { - const originalTool = webActionTools[actionName]; - if (originalTool) { - const originalExecute = originalTool.execute!; - (originalTool as any).execute = async (args: any, options: any) => { - if (args.ref && !approvedRefs!.has(args.ref)) { - if (!warnedRefs.has(args.ref)) { - // First attempt: warn and block - warnedRefs.add(args.ref); - return { - success: false, - action: actionName, - ref: args.ref, - error: FILL_GATE_ERROR, - isRecoverable: true, - }; - } - // Second attempt: agent confirmed this is a navigation/search field, allow it - } - return originalExecute(args, options); - }; - } - } - } - // Merge all tools const allTools = { ...webActionTools, ...searchTools, ...tabstackTools, ...interactiveToolSet }; @@ -507,9 +480,13 @@ export class WebAgent { if (needsPageSnapshot) { // Clear approved refs when page changes: ARIA refs reset on each snapshot, // so old ref strings may now point to different DOM elements. + // Recoverable blocked action errors deliberately keep needsPageSnapshot=false + // so a blocked submit retry remains tied to the same agent-filled refs. if (approvedRefs) { approvedRefs.clear(); } + agentFilledRefs.clear(); + operationalRefs.clear(); await this.addPageSnapshot(); } @@ -1070,8 +1047,7 @@ export class WebAgent { throw new Error(actionOutput.error); } - // Determine if page changed (most actions change the page, except extract and webSearch) - const pageChanged = actionOutput.action !== "extract" && actionOutput.action !== "webSearch"; + const pageChanged = WebAgent.shouldRefreshPageSnapshotAfterAction(actionOutput.action); // Check for terminal actions if (actionOutput.isTerminal) { @@ -1139,6 +1115,15 @@ export class WebAgent { }; } + // Fill keeps the current snapshot so refs and agent-filled provenance remain + // valid for a following submit check. This trades off immediate visibility + // into dynamic validation UI until a later action refreshes the snapshot. + private static readonly ACTIONS_WITHOUT_PAGE_REFRESH = new Set(["extract", "webSearch", "fill"]); + + private static shouldRefreshPageSnapshotAfterAction(action: string): boolean { + return !WebAgent.ACTIONS_WITHOUT_PAGE_REFRESH.has(action); + } + /** * Check for repeated actions and handle accordingly * @returns Action result if intervention is needed, null otherwise diff --git a/packages/core/test/playwrightBrowser.test.ts b/packages/core/test/playwrightBrowser.test.ts index ec49f9d6..0350577a 100644 --- a/packages/core/test/playwrightBrowser.test.ts +++ b/packages/core/test/playwrightBrowser.test.ts @@ -854,6 +854,42 @@ describe("PlaywrightBrowser", () => { expect(error.ref).toBe("missing"); }); }); + + describe("metadata error handling", () => { + it("should wrap field metadata evaluation errors in BrowserActionException", async () => { + const mockLocator = { + count: vi.fn().mockResolvedValue(1), + evaluate: vi.fn().mockRejectedValue(new Error("Execution context was destroyed")), + }; + const mockPage = { + locator: vi.fn().mockReturnValue(mockLocator), + }; + (browser as any).page = mockPage; + + await expect(browser.getFieldMetadata("input1")).rejects.toThrow(BrowserActionException); + await expect(browser.getFieldMetadata("input1")).rejects.toThrow( + "Failed to get field metadata: Execution context was destroyed", + ); + }); + + it("should wrap form submission context evaluation errors in BrowserActionException", async () => { + const mockLocator = { + count: vi.fn().mockResolvedValue(1), + evaluate: vi.fn().mockRejectedValue(new Error("Execution context was destroyed")), + }; + const mockPage = { + locator: vi.fn().mockReturnValue(mockLocator), + }; + (browser as any).page = mockPage; + + await expect(browser.getFormSubmissionContext("submit1")).rejects.toThrow( + BrowserActionException, + ); + await expect(browser.getFormSubmissionContext("submit1")).rejects.toThrow( + "Failed to get form submission context: Execution context was destroyed", + ); + }); + }); }); describe("CDP endpoint failover", () => { diff --git a/packages/core/test/security/actionFirewall.test.ts b/packages/core/test/security/actionFirewall.test.ts new file mode 100644 index 00000000..ad4fa50e --- /dev/null +++ b/packages/core/test/security/actionFirewall.test.ts @@ -0,0 +1,149 @@ +import { describe, expect, it } from "vitest"; +import type { FieldMetadata, FormSubmissionContext } from "../../src/browser/ariaBrowser.js"; +import { + assessFill, + assessFormSubmission, + SECURITY_BLOCKED_UNAUTHORIZED_FILL, + SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT, +} from "../../src/security/actionFirewall.js"; + +function field(overrides: Partial = {}): FieldMetadata { + return { + ref: "E1", + tagName: "input", + inputType: "text", + role: null, + name: null, + label: null, + placeholder: null, + autocomplete: null, + isContentEditable: false, + formId: "form-1", + formAction: "https://example.com/search", + formMethod: "get", + ...overrides, + }; +} + +function form(overrides: Partial = {}): FormSubmissionContext { + return { + submitterRef: "E9", + formId: "form-1", + actionUrl: "https://example.com/submit", + method: "post", + fields: [], + ...overrides, + }; +} + +describe("actionFirewall", () => { + it("allows agent fills for operational search fields", () => { + const result = assessFill({ + field: field({ inputType: "search", label: "Search products" }), + source: "agent", + }); + + expect(result.allowed).toBe(true); + if (!result.allowed) throw new Error("Expected fill to be allowed"); + expect(result.operational).toBe(true); + }); + + it("blocks agent fills for freeform text fields", () => { + const result = assessFill({ + field: field({ label: "Message" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected fill to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_UNAUTHORIZED_FILL); + }); + + it("does not classify fields as operational from label text alone", () => { + const result = assessFill({ + field: field({ inputType: "text", label: "Search products", placeholder: "Search" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + }); + + it("blocks inherently freeform fields even when they have operational roles", () => { + const result = assessFill({ + field: field({ tagName: "textarea", inputType: null, role: "searchbox" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + }); + + it("blocks fields with sensitive autocomplete even when the input type looks operational", () => { + const result = assessFill({ + field: field({ inputType: "url", autocomplete: "url" }), + source: "agent", + }); + + expect(result.allowed).toBe(false); + }); + + it("allows user-approved freeform fields", () => { + const result = assessFill({ + field: field({ label: "Message" }), + source: "user-approved", + }); + + expect(result.allowed).toBe(true); + }); + + it("blocks submitting forms with unauthorized agent-filled fields", () => { + const result = assessFormSubmission({ + form: form({ + fields: [ + { + ref: "E1", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }), + approvedRefs: new Set(), + agentFilledRefs: new Set(["E1"]), + operationalRefs: new Set(), + }); + + expect(result.allowed).toBe(false); + if (result.allowed) throw new Error("Expected submit to be blocked"); + expect(result.reason).toBe(SECURITY_BLOCKED_UNAUTHORIZED_SUBMIT); + expect(result.reason).not.toContain("do not leak this value"); + }); + + it("allows submitting forms when agent-filled fields are approved or operational", () => { + const result = assessFormSubmission({ + form: form({ + fields: [ + { + ref: "E1", + name: "q", + tagName: "input", + inputType: "search", + autocomplete: null, + }, + { + ref: "E2", + name: "email", + tagName: "input", + inputType: "email", + autocomplete: "email", + }, + ], + }), + approvedRefs: new Set(["E2"]), + agentFilledRefs: new Set(["E1", "E2"]), + operationalRefs: new Set(["E1"]), + }); + + expect(result.allowed).toBe(true); + }); +}); diff --git a/packages/core/test/tools/webActionTools.test.ts b/packages/core/test/tools/webActionTools.test.ts index 34374e74..bae69ca0 100644 --- a/packages/core/test/tools/webActionTools.test.ts +++ b/packages/core/test/tools/webActionTools.test.ts @@ -1,6 +1,12 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { createWebActionTools } from "../../src/tools/webActionTools.js"; -import { AriaBrowser, PageAction } from "../../src/browser/ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + PageAction, +} from "../../src/browser/ariaBrowser.js"; import { WebAgentEventEmitter, WebAgentEventType } from "../../src/events.js"; import { LanguageModel } from "ai"; import { z } from "zod"; @@ -30,6 +36,8 @@ class MockBrowser implements AriaBrowser { browserName = "mock-browser"; public url = "https://example.com"; public title = "Example Page"; + public fieldMetadata = new Map(); + public formSubmissionContexts = new Map(); async start(): Promise {} async shutdown(): Promise {} @@ -73,6 +81,32 @@ class MockBrowser implements AriaBrowser { // Mock implementation - can be configured to throw errors for testing } + async getFieldMetadata(ref: string): Promise { + return ( + this.fieldMetadata.get(ref) ?? { + ref, + tagName: "input", + inputType: "search", + role: "searchbox", + name: "q", + label: "Search", + placeholder: "Search", + autocomplete: null, + isContentEditable: false, + formId: "search-form", + formAction: "https://example.com/search", + formMethod: "get", + } + ); + } + + async getFormSubmissionContext( + ref: string, + _trigger?: FormSubmissionTrigger, + ): Promise { + return this.formSubmissionContexts.get(ref) ?? null; + } + async waitForLoadState(): Promise {} async runInTemporaryTab(fn: (tab: any) => Promise): Promise { @@ -105,6 +139,8 @@ describe("Web Action Tools", () => { eventEmitter, providerConfig: { model: mockProvider }, abortSignal: undefined, + agentFilledRefs: new Set(), + operationalRefs: new Set(), }; tools = createWebActionTools(context); @@ -115,6 +151,16 @@ describe("Web Action Tools", () => { }); describe("Tool Structure", () => { + it("should require provenance tracking sets", () => { + expect(() => + createWebActionTools({ + browser: mockBrowser, + eventEmitter, + providerConfig: { model: mockProvider }, + } as any), + ).toThrow("Web action provenance tracking sets are required"); + }); + it("should create all expected tools", () => { expect(tools).toBeDefined(); expect(tools.click).toBeDefined(); @@ -273,6 +319,76 @@ describe("Web Action Tools", () => { }); }); + it("should block agent fill of freeform submittable fields", async () => { + mockBrowser.fieldMetadata.set("input1", { + ref: "input1", + tagName: "textarea", + inputType: null, + role: null, + name: "message", + label: "Message", + placeholder: "Message", + autocomplete: null, + isContentEditable: false, + formId: "contact", + formAction: "https://example.com/contact", + formMethod: "post", + }); + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + + const result = await tools.fill.execute({ ref: "input1", value: "generated payload" }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result).toEqual({ + success: false, + action: "fill", + ref: "input1", + error: "Security policy blocked filling a submittable form field without user approval", + isRecoverable: true, + }); + expect(result.value).toBeUndefined(); + }); + + it("should allow approved freeform field fills", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + mockBrowser.fieldMetadata.set("input1", { + ref: "input1", + tagName: "textarea", + inputType: null, + role: null, + name: "message", + label: "Message", + placeholder: "Message", + autocomplete: null, + isContentEditable: false, + formId: "contact", + formAction: "https://example.com/contact", + formMethod: "post", + }); + context.approvedRefs = new Set(["input1"]); + tools = createWebActionTools(context); + + const result = await tools.fill.execute({ ref: "input1", value: "user-provided value" }); + + expect(performActionSpy).toHaveBeenCalledWith( + "input1", + PageAction.Fill, + "user-provided value", + ); + expect(result.success).toBe(true); + }); + + it("should track agent-filled operational refs", async () => { + context.agentFilledRefs = new Set(); + context.operationalRefs = new Set(); + tools = createWebActionTools(context); + + await tools.fill.execute({ ref: "input1", value: "pilo" }); + + expect(context.agentFilledRefs.has("input1")).toBe(true); + expect(context.operationalRefs.has("input1")).toBe(true); + }); + it("should emit browser action events", async () => { const emitSpy = vi.spyOn(eventEmitter, "emit"); @@ -509,6 +625,106 @@ describe("Web Action Tools", () => { expect(invalid.success).toBe(false); }); + it("should block click submit when form contains unauthorized agent-filled values", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["message"]); + context.operationalRefs = new Set(); + context.approvedRefs = new Set(); + mockBrowser.formSubmissionContexts.set("submit1", { + submitterRef: "submit1", + formId: "contact", + actionUrl: "https://example.com/contact", + method: "post", + fields: [ + { + ref: "message", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.click.execute({ ref: "submit1" }); + + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.error).toBe( + "Security policy blocked submitting a form containing unauthorized agent-filled data", + ); + expect(JSON.stringify(result)).not.toContain("generated payload"); + }); + + it("should allow click submit when form fields are approved or operational", async () => { + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["query", "email"]); + context.operationalRefs = new Set(["query"]); + context.approvedRefs = new Set(["email"]); + mockBrowser.formSubmissionContexts.set("submit1", { + submitterRef: "submit1", + formId: "search", + actionUrl: "https://example.com/search", + method: "get", + fields: [ + { + ref: "query", + name: "q", + tagName: "input", + inputType: "search", + autocomplete: null, + }, + { + ref: "email", + name: "email", + tagName: "input", + inputType: "email", + autocomplete: "email", + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.click.execute({ ref: "submit1" }); + + expect(performActionSpy).toHaveBeenCalledWith("submit1", PageAction.Click, undefined); + expect(result.success).toBe(true); + }); + + it("should block enter submit when form contains unauthorized agent-filled fields", async () => { + const formContextSpy = vi.spyOn(mockBrowser, "getFormSubmissionContext"); + const performActionSpy = vi.spyOn(mockBrowser, "performAction"); + context.agentFilledRefs = new Set(["message"]); + context.operationalRefs = new Set(); + context.approvedRefs = new Set(); + mockBrowser.formSubmissionContexts.set("input1", { + submitterRef: "input1", + formId: "contact", + actionUrl: "https://example.com/contact", + method: "post", + fields: [ + { + ref: "message", + name: "message", + tagName: "textarea", + inputType: null, + autocomplete: null, + }, + ], + }); + tools = createWebActionTools(context); + + const result = await tools.enter.execute({ ref: "input1" }); + + expect(formContextSpy).toHaveBeenCalledWith("input1", "enter"); + expect(performActionSpy).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + expect(result.error).toBe( + "Security policy blocked submitting a form containing unauthorized agent-filled data", + ); + }); + it("should execute back action successfully", async () => { const performActionSpy = vi.spyOn(mockBrowser, "performAction"); diff --git a/packages/core/test/webAgent.test.ts b/packages/core/test/webAgent.test.ts index 421456b2..0bf0ec12 100644 --- a/packages/core/test/webAgent.test.ts +++ b/packages/core/test/webAgent.test.ts @@ -1,6 +1,12 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { WebAgent, WebAgentOptions } from "../src/webAgent.js"; -import { AriaBrowser, PageAction } from "../src/browser/ariaBrowser.js"; +import { + AriaBrowser, + FieldMetadata, + FormSubmissionTrigger, + FormSubmissionContext, + PageAction, +} from "../src/browser/ariaBrowser.js"; import { WebAgentEventEmitter, WebAgentEventType } from "../src/events.js"; import { LanguageModel, streamText } from "ai"; import { Logger } from "../src/loggers/types.js"; @@ -152,6 +158,8 @@ class MockBrowser implements AriaBrowser { `; private markdown = "# Mock Page\nContent here"; + fieldMetadata = new Map(); + formSubmissionContexts = new Map(); async start(): Promise {} async shutdown(): Promise {} @@ -191,6 +199,32 @@ class MockBrowser implements AriaBrowser { async performAction(_ref: string, _action: PageAction, _value?: string): Promise {} + async getFieldMetadata(ref: string): Promise { + return ( + this.fieldMetadata.get(ref) ?? { + ref, + tagName: "input", + inputType: "search", + role: "searchbox", + name: "q", + label: "Search", + placeholder: "Search", + autocomplete: null, + isContentEditable: false, + formId: "search-form", + formAction: "https://example.com/search", + formMethod: "get", + } + ); + } + + async getFormSubmissionContext( + ref: string, + _trigger?: FormSubmissionTrigger, + ): Promise { + return this.formSubmissionContexts.get(ref) ?? null; + } + async waitForLoadState(): Promise {} async runInTemporaryTab(fn: (tab: any) => Promise): Promise { @@ -863,6 +897,77 @@ describe("WebAgent", () => { expect(navigatedEvent?.data.url).toBe(startingUrl); }); + it("should keep the same snapshot after fill so form refs remain valid for submit", async () => { + mockGenerateTextWithRetry.mockResolvedValueOnce({ + text: "Planning", + toolResults: [ + { + type: "tool-result", + toolCallId: "plan_1", + toolName: "create_plan", + output: { + successCriteria: "Fill then submit", + plan: "1. Fill the form\n2. Submit the form", + }, + }, + ], + } as any); + + const snapshotSpy = vi.spyOn(mockBrowser, "getTreeWithRefs"); + + mockStreamText.mockReturnValueOnce( + createMockStreamResponse({ + text: "Fill", + toolResults: [ + { + type: "tool-result", + toolCallId: "fill_1", + toolName: "fill", + input: { ref: "input1", value: "context" }, + output: { + success: true, + action: "fill", + ref: "input1", + value: "context", + }, + }, + ], + response: { + messages: [{ role: "assistant", content: "Fill" }], + }, + }) as any, + ); + + mockStreamText.mockReturnValueOnce( + createMockStreamResponse({ + text: "Done", + toolResults: [ + { + type: "tool-result", + toolCallId: "done_1", + toolName: "done", + input: { result: "Complete" }, + output: { + success: true, + action: "done", + result: "Complete", + isTerminal: true, + }, + }, + ], + response: { + messages: [{ role: "assistant", content: "Done" }], + }, + }) as any, + ); + + mockGenerateTextWithRetry.mockResolvedValueOnce(mockValidationResponse("complete")); + + await webAgent.execute("Fill then submit", { startingUrl: "https://example.com" }); + + expect(snapshotSpy).toHaveBeenCalledTimes(1); + }); + it("should pass webSearchEnabled to planning prompt when search provider is set", async () => { // Create a WebAgent with a search provider enabled const searchAgent = new WebAgent(mockBrowser, { diff --git a/packages/extension/src/background/ExtensionBrowser.ts b/packages/extension/src/background/ExtensionBrowser.ts index a3ad81c5..65cdd47e 100644 --- a/packages/extension/src/background/ExtensionBrowser.ts +++ b/packages/extension/src/background/ExtensionBrowser.ts @@ -1,6 +1,11 @@ import browser from "webextension-polyfill"; -import type { AriaBrowser } from "pilo-core/core"; -import { PageAction, LoadState } from "pilo-core/core"; +import type { + AriaBrowser, + FieldMetadata, + FormSubmissionContext, + FormSubmissionTrigger, +} from "pilo-core/core"; +import { BrowserActionException, InvalidRefException, PageAction, LoadState } from "pilo-core/core"; import type { Tabs } from "webextension-polyfill"; import { createLogger } from "../shared/utils/logger"; import TurndownService from "turndown"; @@ -11,6 +16,10 @@ interface ActionResult { message?: string; } +type MetadataScriptResult = + | { success: true; data: T } + | { success: false; error: string; errorType?: "invalid-ref" }; + interface AriaSnapshotWindow { generateAndRenderAriaTree: (root: Element, counter?: { value: number }) => string; } @@ -302,6 +311,268 @@ export class ExtensionBrowser implements AriaBrowser { } } + async getFieldMetadata(ref: string): Promise { + try { + const tab = await this.getActiveTab(); + await this.ensureContentScript(); + + const [{ result }] = await browser.scripting.executeScript({ + target: { tabId: tab.id! }, + func: (elementRef: string): MetadataScriptResult => { + const element = document.querySelector(`[data-pilo-ref="${elementRef}"]`); + if (!(element instanceof HTMLElement)) { + return { + success: false, + error: `Element with ref ${elementRef} not found in DOM`, + errorType: "invalid-ref", + }; + } + + const input = element instanceof HTMLInputElement ? element : null; + const form = getElementForm(element); + + return { + success: true, + data: { + ref: elementRef, + tagName: element.tagName.toLowerCase(), + inputType: input?.type?.toLowerCase() ?? null, + role: element.getAttribute("role"), + name: getElementName(element), + label: getElementLabel(element), + placeholder: getElementPlaceholder(element), + autocomplete: getElementAutocomplete(element), + isContentEditable: element.isContentEditable, + formId: form?.id || null, + formAction: form?.action || null, + formMethod: form?.method?.toLowerCase() || null, + }, + }; + + function getElementForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.form; + } + return node.closest("form"); + } + + function getElementName(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement || + node instanceof HTMLButtonElement + ) { + return node.name || null; + } + return node.getAttribute("name"); + } + + function getElementLabel(node: HTMLElement): string | null { + const ariaLabel = node.getAttribute("aria-label"); + if (ariaLabel?.trim()) return ariaLabel.trim(); + + const labelledBy = node.getAttribute("aria-labelledby"); + if (labelledBy) { + const text = labelledBy + .split(/\s+/) + .map((id) => node.ownerDocument.getElementById(id)?.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + if ("labels" in node) { + const labels = (node as HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement) + .labels; + const text = Array.from(labels || []) + .map((label) => label.textContent?.trim() || "") + .filter(Boolean) + .join(" "); + if (text) return text; + } + + return null; + } + + function getElementPlaceholder(node: HTMLElement): string | null { + if (node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement) { + return node.placeholder || null; + } + return null; + } + + function getElementAutocomplete(node: HTMLElement): string | null { + if ( + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.autocomplete || null; + } + return null; + } + }, + args: [ref], + }); + + return this.unwrapMetadataResult( + result as MetadataScriptResult | undefined, + ref, + "getFieldMetadata", + ); + } catch (error) { + if (error instanceof InvalidRefException || error instanceof BrowserActionException) { + throw error; + } + throw new BrowserActionException( + "getFieldMetadata", + `Failed to get field metadata: ${error instanceof Error ? error.message : String(error)}`, + { ref, originalError: error }, + ); + } + } + + async getFormSubmissionContext( + ref: string, + trigger: FormSubmissionTrigger = "click", + ): Promise { + try { + const tab = await this.getActiveTab(); + await this.ensureContentScript(); + + const [{ result }] = await browser.scripting.executeScript({ + target: { tabId: tab.id! }, + func: (paramsJson: string): MetadataScriptResult => { + const { ref: submitterRef, trigger: submitTrigger } = JSON.parse(paramsJson) as { + ref: string; + trigger: FormSubmissionTrigger; + }; + const element = document.querySelector(`[data-pilo-ref="${submitterRef}"]`); + if (!(element instanceof HTMLElement)) { + return { + success: false, + error: `Element with ref ${submitterRef} not found in DOM`, + errorType: "invalid-ref", + }; + } + if (!canSubmitForm(element, submitTrigger)) return { success: true, data: null }; + + const form = getSubmissionForm(element); + if (!form) return { success: true, data: null }; + + const fields = Array.from(form.elements) + .filter( + (field): field is HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement => + field instanceof HTMLInputElement || + field instanceof HTMLTextAreaElement || + field instanceof HTMLSelectElement, + ) + .filter((field) => !field.disabled) + .map((field) => ({ + ref: field.getAttribute("data-pilo-ref"), + name: field.name || null, + tagName: field.tagName.toLowerCase(), + inputType: field instanceof HTMLInputElement ? field.type.toLowerCase() : null, + autocomplete: "autocomplete" in field ? field.autocomplete || null : null, + })); + + return { + success: true, + data: { + submitterRef, + formId: form.id || null, + actionUrl: form.action || null, + method: form.method?.toLowerCase() || null, + fields, + }, + }; + + function getSubmissionForm(node: HTMLElement): HTMLFormElement | null { + if ( + node instanceof HTMLButtonElement || + node instanceof HTMLInputElement || + node instanceof HTMLTextAreaElement || + node instanceof HTMLSelectElement + ) { + return node.form; + } + return node.closest("form"); + } + + function canSubmitForm(node: HTMLElement, submitTrigger: FormSubmissionTrigger): boolean { + if (submitTrigger === "click") { + if (node instanceof HTMLButtonElement) return node.type === "submit"; + if (node instanceof HTMLInputElement) { + return node.type === "submit" || node.type === "image"; + } + return false; + } + + if (node instanceof HTMLTextAreaElement || node instanceof HTMLSelectElement) + return false; + if (!(node instanceof HTMLInputElement)) return false; + return ![ + "button", + "checkbox", + "color", + "file", + "hidden", + "radio", + "range", + "reset", + "submit", + ].includes(node.type); + } + }, + args: [JSON.stringify({ ref, trigger })], + }); + + return this.unwrapMetadataResult( + result as MetadataScriptResult | undefined, + ref, + "getFormSubmissionContext", + ); + } catch (error) { + if (error instanceof InvalidRefException || error instanceof BrowserActionException) { + throw error; + } + throw new BrowserActionException( + "getFormSubmissionContext", + `Failed to get form submission context: ${ + error instanceof Error ? error.message : String(error) + }`, + { ref, trigger, originalError: error }, + ); + } + } + + private unwrapMetadataResult( + result: MetadataScriptResult | undefined, + ref: string, + action: "getFieldMetadata" | "getFormSubmissionContext", + ): T { + if (!result) { + throw new BrowserActionException(action, `Failed to ${action}: script returned no result`, { + ref, + }); + } + + if (!result.success) { + if (result.errorType === "invalid-ref") { + throw new InvalidRefException(ref, result.error); + } + throw new BrowserActionException(action, result.error, { ref }); + } + + return result.data; + } + async performAction(ref: string, action: PageAction, value?: string): Promise { console.log( `ExtensionBrowser: performAction() called with ref: ${ref}, action: ${action}, value: ${value}`, diff --git a/packages/extension/test/ExtensionBrowser.test.ts b/packages/extension/test/ExtensionBrowser.test.ts index 9b48d7d1..455d37c0 100644 --- a/packages/extension/test/ExtensionBrowser.test.ts +++ b/packages/extension/test/ExtensionBrowser.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; import { ExtensionBrowser } from "../src/background/ExtensionBrowser"; import browser from "webextension-polyfill"; +import { BrowserActionException, InvalidRefException } from "pilo-core/core"; vi.mock("webextension-polyfill", () => ({ default: { @@ -94,4 +95,62 @@ describe("ExtensionBrowser", () => { expect(browser.scripting.executeScript).toHaveBeenCalled(); }); }); + + describe("metadata error handling", () => { + it("should translate missing field metadata refs into InvalidRefException", async () => { + vi.mocked(browser.scripting.executeScript).mockResolvedValue([ + { + result: { + success: false, + error: "Element with ref missing-input not found in DOM", + errorType: "invalid-ref", + }, + } as any, + ]); + + await expect(extensionBrowser.getFieldMetadata("missing-input")).rejects.toThrow( + InvalidRefException, + ); + }); + + it("should translate missing form submission refs into InvalidRefException", async () => { + vi.mocked(browser.scripting.executeScript).mockResolvedValue([ + { + result: { + success: false, + error: "Element with ref missing-submit not found in DOM", + errorType: "invalid-ref", + }, + } as any, + ]); + + await expect(extensionBrowser.getFormSubmissionContext("missing-submit")).rejects.toThrow( + InvalidRefException, + ); + }); + + it("should wrap field metadata script failures in BrowserActionException", async () => { + vi.mocked(browser.scripting.executeScript) + .mockResolvedValueOnce([{ result: true } as any]) + .mockRejectedValueOnce(new Error("Cannot access contents of url")); + + const error = await extensionBrowser.getFieldMetadata("input1").catch((err) => err); + expect(error).toBeInstanceOf(BrowserActionException); + expect(error.message).toContain( + "Failed to get field metadata: Cannot access contents of url", + ); + }); + + it("should wrap form submission script failures in BrowserActionException", async () => { + vi.mocked(browser.scripting.executeScript) + .mockResolvedValueOnce([{ result: true } as any]) + .mockRejectedValueOnce(new Error("Cannot access contents of url")); + + const error = await extensionBrowser.getFormSubmissionContext("submit1").catch((err) => err); + expect(error).toBeInstanceOf(BrowserActionException); + expect(error.message).toContain( + "Failed to get form submission context: Cannot access contents of url", + ); + }); + }); });