From eeeca9b71a48a2fe6b4b41094ed53911c174f4f7 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 19 Jun 2026 03:39:29 +0800 Subject: [PATCH 1/2] Add native UI control driver (object-level desktop automation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The accessibility layer could only list/find/click elements. Add control-pattern actions so it can read and drive native controls by name/role/app/AutomationId — far more reliable than pixel/OCR for native apps and the #1 gap vs object-aware desktop tools (pywinauto / FlaUI / WinAppDriver / UiPath), surfaced by web research of competitors and practitioner pain points. - Backend interface: get_value / set_value / invoke / toggle, with a clear AccessibilityNotAvailableError for backends that can't act. - Windows UIAutomation backend implements all four via the Value / Invoke / Toggle control patterns (comtypes), degrading gracefully on COM errors. - Facade re-exports control_get_value / control_set_value / control_invoke / control_toggle; AC_control_* executor commands; ac_control_* MCP tools; Script Builder "Native UI" entries. - Headless tests inject a fake backend (no GUI/comtypes needed) and verify the API, executor, MCP wiring and graceful degradation. - v7 reference page (EN + Traditional Chinese) + README sections. --- README.md | 9 ++ README/README_zh-CN.md | 9 ++ README/README_zh-TW.md | 9 ++ .../Eng/doc/new_features/v7_features_doc.rst | 79 ++++++++++++ docs/source/Eng/eng_index.rst | 1 + .../Zh/doc/new_features/v7_features_doc.rst | 73 +++++++++++ docs/source/Zh/zh_index.rst | 1 + je_auto_control/__init__.py | 5 +- .../gui/script_builder/command_schema.py | 30 +++++ .../utils/accessibility/__init__.py | 5 +- .../utils/accessibility/accessibility_api.py | 35 +++++ .../utils/accessibility/backends/base.py | 44 ++++++- .../accessibility/backends/windows_backend.py | 92 ++++++++++++- .../utils/executor/action_executor.py | 40 ++++++ .../utils/mcp_server/tools/_factories.py | 50 ++++++- .../utils/mcp_server/tools/_handlers.py | 26 ++++ .../unit_test/headless/test_native_control.py | 122 ++++++++++++++++++ 17 files changed, 624 insertions(+), 6 deletions(-) create mode 100644 docs/source/Eng/doc/new_features/v7_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v7_features_doc.rst create mode 100644 test/unit_test/headless/test_native_control.py diff --git a/README.md b/README.md index ed1010c1..c9c52663 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ ## Table of Contents +- [What's new (2026-06-19) — Native UI Control](#whats-new-2026-06-19--native-ui-control) - [What's new (2026-06-19)](#whats-new-2026-06-19) - [What's new (2026-06-18)](#whats-new-2026-06-18) - [What's new (2026-06-17)](#whats-new-2026-06-17) @@ -59,6 +60,14 @@ --- +## What's new (2026-06-19) — Native UI Control + +Object-level desktop automation: read and drive native controls through the OS accessibility API (by name / role / app / **AutomationId**) instead of clicking pixels or OCR-ing text — far more reliable for native apps. The accessibility layer previously only listed/found/clicked; it now also acts. Ships through the full stack (facade, `AC_*`, MCP, Script Builder) with a Windows UIAutomation backend; unsupported backends raise a clear error. Full reference: [`docs/source/Eng/doc/new_features/v7_features_doc.rst`](docs/source/Eng/doc/new_features/v7_features_doc.rst). + +- **Read / set value** — `control_get_value` / `control_set_value` (`AC_control_get_value` / `AC_control_set_value`): read a textbox/combo value (no OCR) and set it in one call (no per-key typing). +- **Invoke / toggle** — `control_invoke` / `control_toggle` (`AC_control_invoke` / `AC_control_toggle`): press a button or flip a checkbox via its control pattern. +- Targets a control by `name` / `role` / `app_name` / `automation_id` (the stable Windows identifier), so it survives layout/localization changes. + ## What's new (2026-06-19) Two headless cores that shipped without the rest of their stack are now diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md index f8b5f6e9..21aa5d0f 100644 --- a/README/README_zh-CN.md +++ b/README/README_zh-CN.md @@ -12,6 +12,7 @@ ## 目录 +- [本次更新 (2026-06-19) — 原生 UI 控制](#本次更新-2026-06-19--原生-ui-控制) - [本次更新 (2026-06-19)](#本次更新-2026-06-19) - [本次更新 (2026-06-18)](#本次更新-2026-06-18) - [本次更新 (2026-06-17)](#本次更新-2026-06-17) @@ -58,6 +59,14 @@ --- +## 本次更新 (2026-06-19) — 原生 UI 控制 + +对象级桌面自动化:通过 OS 无障碍 API(以 name / role / app / **AutomationId** 定位)读取与操作原生控件,而非点像素或 OCR——对原生 app 可靠得多。无障碍层先前只能 list/find/click,现在还能操作。走完整五层(facade、`AC_*`、MCP、Script Builder),提供 Windows UIAutomation 后端;不支持的后端会抛清楚错误。完整参考:[`docs/source/Eng/doc/new_features/v7_features_doc.rst`](../docs/source/Eng/doc/new_features/v7_features_doc.rst)。 + +- **读取 / 设置值** — `control_get_value` / `control_set_value`(`AC_control_get_value` / `AC_control_set_value`):读 textbox/combo 值(不用 OCR),一次设置值(不必逐键输入)。 +- **调用 / 切换** — `control_invoke` / `control_toggle`(`AC_control_invoke` / `AC_control_toggle`):通过控件模式按按钮或切换复选框。 +- 以 `name` / `role` / `app_name` / `automation_id`(Windows 稳定标识符)定位,版面/本地化改变也不坏。 + ## 本次更新 (2026-06-19) 两个早已存在、却没接上其余各层的 headless 核心,现在成为一级功能。两者都新增 facade re-export、`AC_*` 执行器指令、MCP 工具与 Script Builder 项目,并有 headless 测试。完整参考: diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md index 3dee822a..66a2af11 100644 --- a/README/README_zh-TW.md +++ b/README/README_zh-TW.md @@ -12,6 +12,7 @@ ## 目錄 +- [本次更新 (2026-06-19) — 原生 UI 控制](#本次更新-2026-06-19--原生-ui-控制) - [本次更新 (2026-06-19)](#本次更新-2026-06-19) - [本次更新 (2026-06-18)](#本次更新-2026-06-18) - [本次更新 (2026-06-17)](#本次更新-2026-06-17) @@ -58,6 +59,14 @@ --- +## 本次更新 (2026-06-19) — 原生 UI 控制 + +物件級桌面自動化:透過 OS 無障礙 API(以 name / role / app / **AutomationId** 定位)讀取與操作原生控制項,而非點像素或 OCR——對原生 app 可靠得多。無障礙層先前只能 list/find/click,現在還能操作。走完整五層(facade、`AC_*`、MCP、Script Builder),提供 Windows UIAutomation 後端;不支援的後端會拋清楚錯誤。完整參考:[`docs/source/Zh/doc/new_features/v7_features_doc.rst`](../docs/source/Zh/doc/new_features/v7_features_doc.rst)。 + +- **讀取 / 設定值** — `control_get_value` / `control_set_value`(`AC_control_get_value` / `AC_control_set_value`):讀 textbox/combo 值(不用 OCR),一次設定值(不必逐鍵輸入)。 +- **呼叫 / 切換** — `control_invoke` / `control_toggle`(`AC_control_invoke` / `AC_control_toggle`):透過控制模式按按鈕或切換核取方塊。 +- 以 `name` / `role` / `app_name` / `automation_id`(Windows 穩定識別碼)定位,版面/在地化改變也不壞。 + ## 本次更新 (2026-06-19) 兩個早已存在、卻沒接上其餘各層的 headless 核心,現在成為一級功能。兩者都新增 facade re-export、`AC_*` 執行器指令、MCP 工具與 Script Builder 項目,並有 headless 測試。完整參考: diff --git a/docs/source/Eng/doc/new_features/v7_features_doc.rst b/docs/source/Eng/doc/new_features/v7_features_doc.rst new file mode 100644 index 00000000..ad0cff6b --- /dev/null +++ b/docs/source/Eng/doc/new_features/v7_features_doc.rst @@ -0,0 +1,79 @@ +============================================== +New Features (2026-06-19) — Native UI Control +============================================== + +Object-level desktop automation: read and drive native controls through +the OS accessibility API instead of clicking pixels or OCR-ing text. This +is far more reliable than coordinate/image automation for native apps — +the controls are addressed by name / role / app / **AutomationId**, so +they survive layout changes. + +The accessibility layer previously only *listed*, *found*, and *clicked* +elements; it now also *acts* on them via their control patterns. Ships +through the full stack (facade, ``AC_*`` executor commands, MCP tools, +Script Builder), with a Windows UIAutomation backend; backends that can't +perform an action raise a clear ``AccessibilityNotAvailableError``. + +.. contents:: + :local: + :depth: 2 + + +Reading and setting values +========================== + +:: + + from je_auto_control import control_get_value, control_set_value + + # Read a textbox / combo value directly (no OCR). + user = control_get_value(name="Username", app_name="myapp.exe") + + # Set a value in one call (no per-key typing / focus dance). + control_set_value("alice@example.com", automation_id="emailField") + +``control_get_value`` returns the control's value text (or ``None`` when +no match); ``control_set_value`` writes it via the Value pattern and +returns ``True`` on success. + +Executor commands: ``AC_control_get_value``, ``AC_control_set_value``. + + +Invoking and toggling +==================== + +:: + + from je_auto_control import control_invoke, control_toggle + + control_invoke(name="Sign in") # press a button + control_toggle(name="Remember me") # flip a checkbox / switch + +``control_invoke`` triggers a control's default action (Invoke pattern); +``control_toggle`` flips a checkbox/switch (Toggle pattern). Both return +``True`` on success. + +Executor commands: ``AC_control_invoke``, ``AC_control_toggle``. + + +Targeting controls +================= + +Every call accepts the same matchers — provide whichever uniquely +identify the control: + +* ``name`` — the control's accessible name / label. +* ``role`` — the control type. +* ``app_name`` — the owning application (e.g. ``notepad.exe``). +* ``automation_id`` — the most stable identifier (Windows AutomationId), + unaffected by layout or localization. + + +Platforms +========= + +A Windows UIAutomation backend (via ``comtypes``) implements all four +actions. On platforms / backends without a control driver yet, the calls +raise ``AccessibilityNotAvailableError`` with a clear message rather than +silently failing. The backend is swappable, so the logic is unit-tested +with an injected fake — no real GUI required. diff --git a/docs/source/Eng/eng_index.rst b/docs/source/Eng/eng_index.rst index 2e5be35f..88c5ae08 100644 --- a/docs/source/Eng/eng_index.rst +++ b/docs/source/Eng/eng_index.rst @@ -29,6 +29,7 @@ Comprehensive guides for all AutoControl features. doc/new_features/v4_features_doc doc/new_features/v5_features_doc doc/new_features/v6_features_doc + doc/new_features/v7_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/docs/source/Zh/doc/new_features/v7_features_doc.rst b/docs/source/Zh/doc/new_features/v7_features_doc.rst new file mode 100644 index 00000000..04e1388e --- /dev/null +++ b/docs/source/Zh/doc/new_features/v7_features_doc.rst @@ -0,0 +1,73 @@ +==================================== +新功能 (2026-06-19) — 原生 UI 控制 +==================================== + +物件級桌面自動化:透過 OS 無障礙 API 讀取與操作原生控制項,而非點像素或 +OCR 文字。對原生 app 而言,這比座標/影像自動化**可靠得多**——控制項以 +name / role / app / **AutomationId** 定位,因此版面改變也不會壞。 + +無障礙層先前只能 *列出*、*尋找*、*點擊* 元素;現在還能透過控制模式 +*操作* 它們。走完整五層(facade、``AC_*`` 執行器指令、MCP 工具、Script +Builder),並提供 Windows UIAutomation 後端;無法執行該動作的後端會拋出 +清楚的 ``AccessibilityNotAvailableError``。 + +.. contents:: + :local: + :depth: 2 + + +讀取與設定值 +============ + +:: + + from je_auto_control import control_get_value, control_set_value + + # 直接讀 textbox / combo 的值(不用 OCR)。 + user = control_get_value(name="Username", app_name="myapp.exe") + + # 一次設定值(不必逐鍵輸入 / 處理焦點)。 + control_set_value("alice@example.com", automation_id="emailField") + +``control_get_value`` 回傳控制項的值(無相符時回傳 ``None``); +``control_set_value`` 透過 Value pattern 寫入,成功回傳 ``True``。 + +執行器指令:``AC_control_get_value``、``AC_control_set_value``。 + + +呼叫與切換 +========== + +:: + + from je_auto_control import control_invoke, control_toggle + + control_invoke(name="Sign in") # 按下按鈕 + control_toggle(name="Remember me") # 切換核取方塊 / 開關 + +``control_invoke`` 觸發控制項的預設動作(Invoke pattern); +``control_toggle`` 切換核取方塊/開關(Toggle pattern)。兩者成功皆回傳 +``True``。 + +執行器指令:``AC_control_invoke``、``AC_control_toggle``。 + + +定位控制項 +========== + +每個呼叫都接受相同的比對條件——提供能唯一辨識控制項的任意組合: + +* ``name`` — 控制項的無障礙名稱 / 標籤。 +* ``role`` — 控制項型別。 +* ``app_name`` — 所屬應用程式(例如 ``notepad.exe``)。 +* ``automation_id`` — 最穩定的識別碼(Windows AutomationId),不受版面或 + 在地化影響。 + + +平台 +==== + +Windows UIAutomation 後端(透過 ``comtypes``)實作全部四個動作。在尚無 +控制驅動的平台/後端上,呼叫會拋出帶清楚訊息的 +``AccessibilityNotAvailableError``,而非默默失敗。後端可抽換,因此邏輯以 +注入的 fake 後端做單元測試——不需真實 GUI。 diff --git a/docs/source/Zh/zh_index.rst b/docs/source/Zh/zh_index.rst index 8d9b8988..5d5a335c 100644 --- a/docs/source/Zh/zh_index.rst +++ b/docs/source/Zh/zh_index.rst @@ -29,6 +29,7 @@ AutoControl 所有功能的完整使用指南。 doc/new_features/v4_features_doc doc/new_features/v5_features_doc doc/new_features/v6_features_doc + doc/new_features/v7_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 891abc62..382731d9 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -43,7 +43,8 @@ from je_auto_control.utils.accessibility import ( AccessibilityElement, AccessibilityNotAvailableError, AccessibilityRecorder, AXRecorderEvent, AXTreeNode, - click_accessibility_element, dump_accessibility_tree, + click_accessibility_element, control_get_value, control_invoke, + control_set_value, control_toggle, dump_accessibility_tree, find_accessibility_element, list_accessibility_elements, ) # VLM element locator (headless) @@ -544,6 +545,8 @@ def start_autocontrol_gui(*args, **kwargs): "AccessibilityRecorder", "AXRecorderEvent", "AXTreeNode", "click_accessibility_element", "dump_accessibility_tree", "find_accessibility_element", "list_accessibility_elements", + "control_get_value", "control_set_value", "control_invoke", + "control_toggle", # VLM locator "VLMNotAvailableError", "locate_by_description", "click_by_description", "verify_description", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index c4dd4f7c..b2915788 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -569,7 +569,37 @@ def _add_flow_specs(specs: List[CommandSpec]) -> None: )) +def _add_native_control_specs(specs: List[CommandSpec]) -> None: + fields = ( + FieldSpec("name", FieldType.STRING, optional=True), + FieldSpec("role", FieldType.STRING, optional=True), + FieldSpec("app_name", FieldType.STRING, optional=True), + FieldSpec("automation_id", FieldType.STRING, optional=True), + ) + specs.append(CommandSpec( + "AC_control_get_value", "Native UI", "Get Control Value", + fields=fields, + description="Read a native control's value via the accessibility API.", + )) + specs.append(CommandSpec( + "AC_control_set_value", "Native UI", "Set Control Value", + fields=(FieldSpec("value", FieldType.STRING),) + fields, + description="Set a native control's value directly (no per-key typing).", + )) + specs.append(CommandSpec( + "AC_control_invoke", "Native UI", "Invoke Control", + fields=fields, + description="Invoke a native control (e.g. press a button).", + )) + specs.append(CommandSpec( + "AC_control_toggle", "Native UI", "Toggle Control", + fields=fields, + description="Toggle a native control (e.g. a checkbox).", + )) + + def _add_misc_specs(specs: List[CommandSpec]) -> None: + _add_native_control_specs(specs) specs.append(CommandSpec( "AC_shell_command", "Shell", "Shell Command", fields=(FieldSpec("shell_command", FieldType.STRING),), diff --git a/je_auto_control/utils/accessibility/__init__.py b/je_auto_control/utils/accessibility/__init__.py index 01b78d50..c0231870 100644 --- a/je_auto_control/utils/accessibility/__init__.py +++ b/je_auto_control/utils/accessibility/__init__.py @@ -1,7 +1,8 @@ """Cross-platform accessibility-tree widget location + recording.""" from je_auto_control.utils.accessibility.accessibility_api import ( AccessibilityElement, AccessibilityNotAvailableError, AXTreeNode, - click_accessibility_element, dump_accessibility_tree, + click_accessibility_element, control_get_value, control_invoke, + control_set_value, control_toggle, dump_accessibility_tree, find_accessibility_element, list_accessibility_elements, ) from je_auto_control.utils.accessibility.recorder import ( @@ -18,4 +19,6 @@ "AXTreeWalker", "click_accessibility_element", "count_nodes", "dump_accessibility_tree", "find_accessibility_element", "list_accessibility_elements", "max_depth", + "control_get_value", "control_set_value", "control_invoke", + "control_toggle", ] diff --git a/je_auto_control/utils/accessibility/accessibility_api.py b/je_auto_control/utils/accessibility/accessibility_api.py index daff401f..452cd8ae 100644 --- a/je_auto_control/utils/accessibility/accessibility_api.py +++ b/je_auto_control/utils/accessibility/accessibility_api.py @@ -93,9 +93,44 @@ def dump_accessibility_tree(app_name: Optional[str] = None, ) +def control_get_value(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> Optional[str]: + """Read a control's value (e.g. a textbox/combo), or None if not found.""" + return get_backend().get_value( + name=name, role=role, app_name=app_name, automation_id=automation_id) + + +def control_set_value(value: str, name: Optional[str] = None, + role: Optional[str] = None, app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Set a control's value directly (no per-key typing). True on success.""" + return get_backend().set_value( + value, name=name, role=role, app_name=app_name, + automation_id=automation_id) + + +def control_invoke(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Invoke a control's default action (e.g. press a button).""" + return get_backend().invoke( + name=name, role=role, app_name=app_name, automation_id=automation_id) + + +def control_toggle(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Toggle a control (e.g. a checkbox / switch).""" + return get_backend().toggle( + name=name, role=role, app_name=app_name, automation_id=automation_id) + + __all__ = [ "AccessibilityElement", "AccessibilityNotAvailableError", "AXTreeNode", "click_accessibility_element", "dump_accessibility_tree", "find_accessibility_element", "list_accessibility_elements", + "control_get_value", "control_set_value", "control_invoke", + "control_toggle", ] diff --git a/je_auto_control/utils/accessibility/backends/base.py b/je_auto_control/utils/accessibility/backends/base.py index 9af687c1..a43a599d 100644 --- a/je_auto_control/utils/accessibility/backends/base.py +++ b/je_auto_control/utils/accessibility/backends/base.py @@ -1,11 +1,19 @@ """Abstract accessibility backend.""" from typing import List, Optional -from je_auto_control.utils.accessibility.element import AccessibilityElement +from je_auto_control.utils.accessibility.element import ( + AccessibilityElement, AccessibilityNotAvailableError, +) class AccessibilityBackend: - """Each backend exposes the platform's accessibility tree as flat lists.""" + """Each backend exposes the platform's accessibility tree as flat lists. + + Beyond listing, a backend may *act* on a control via its native + control patterns (read/set a value, invoke, toggle). Backends that + don't implement these raise :class:`AccessibilityNotAvailableError` + through :meth:`_unsupported`. + """ name: str = "abstract" available: bool = False @@ -14,3 +22,35 @@ def list_elements(self, app_name: Optional[str] = None, max_results: int = 200, ) -> List[AccessibilityElement]: raise NotImplementedError + + # --- control patterns (object-level actions) --------------------------- + + def get_value(self, name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> Optional[str]: + """Return the matched control's value text, or None if not found.""" + self._unsupported("get_value") + + def set_value(self, value: str, name: Optional[str] = None, + role: Optional[str] = None, app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Set the matched control's value; return True on success.""" + self._unsupported("set_value") + + def invoke(self, name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Invoke the matched control (e.g. press a button).""" + self._unsupported("invoke") + + def toggle(self, name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Toggle the matched control (e.g. a checkbox).""" + self._unsupported("toggle") + + def _unsupported(self, operation: str): + """Raise a clear error for an action this backend can't perform.""" + raise AccessibilityNotAvailableError( + f"{operation} is not supported by the {self.name} backend", + ) diff --git a/je_auto_control/utils/accessibility/backends/windows_backend.py b/je_auto_control/utils/accessibility/backends/windows_backend.py index 41b1c417..20fcbbb7 100644 --- a/je_auto_control/utils/accessibility/backends/windows_backend.py +++ b/je_auto_control/utils/accessibility/backends/windows_backend.py @@ -14,13 +14,16 @@ AccessibilityBackend, ) from je_auto_control.utils.accessibility.element import ( - AccessibilityElement, AccessibilityNotAvailableError, + AccessibilityElement, AccessibilityNotAvailableError, element_matches, ) from je_auto_control.utils.logging.logging_instance import autocontrol_logger _TREE_SCOPE_DESCENDANTS = 4 _UIA_IS_CONTROL_ELEMENT_PROPERTY = 30016 _UIA_NAME_PROPERTY = 30005 +_UIA_VALUE_PATTERN_ID = 10002 +_UIA_INVOKE_PATTERN_ID = 10000 +_UIA_TOGGLE_PATTERN_ID = 10015 def _is_available() -> bool: @@ -39,6 +42,7 @@ class WindowsAccessibilityBackend(AccessibilityBackend): def __init__(self) -> None: self.available = _is_available() self._automation = None + self._uia_module = None def _ensure_automation(self): if self._automation is not None: @@ -61,6 +65,7 @@ def _ensure_automation(self): interface=uia_module.IUIAutomation, ) self._automation = automation + self._uia_module = uia_module return automation def list_elements(self, app_name: Optional[str] = None, @@ -87,6 +92,91 @@ def list_elements(self, app_name: Optional[str] = None, results.append(element) return results + def _find_raw(self, name, role, app_name, automation_id): + """Re-walk the tree and return the first matching raw UIA element.""" + automation = self._ensure_automation() + try: + root = automation.GetRootElement() + condition = automation.CreatePropertyCondition( + _UIA_IS_CONTROL_ELEMENT_PROPERTY, True, + ) + found = root.FindAll(_TREE_SCOPE_DESCENDANTS, condition) + except (OSError, AttributeError) as error: + autocontrol_logger.error("UIA FindAll failed: %r", error) + return None + for idx in range(int(found.Length or 0)): + raw = found.GetElement(idx) + element = _convert_uia(raw) + if element is None: + continue + if automation_id is not None and element.native_id != automation_id: + continue + if element_matches(element, name=name, role=role, app_name=app_name): + return raw + return None + + def _pattern(self, raw, pattern_id, interface_name): + """Return a queried control pattern interface, or None.""" + try: + unknown = raw.GetCurrentPattern(pattern_id) + if not unknown: + return None + interface = getattr(self._uia_module, interface_name) + return unknown.QueryInterface(interface) + except (OSError, AttributeError, ValueError): + return None + + def get_value(self, name=None, role=None, app_name=None, + automation_id=None) -> Optional[str]: + raw = self._find_raw(name, role, app_name, automation_id) + pattern = self._pattern(raw, _UIA_VALUE_PATTERN_ID, + "IUIAutomationValuePattern") if raw else None + if pattern is None: + return None + try: + return str(pattern.CurrentValue or "") + except (OSError, AttributeError): + return None + + def set_value(self, value, name=None, role=None, app_name=None, + automation_id=None) -> bool: + raw = self._find_raw(name, role, app_name, automation_id) + pattern = self._pattern(raw, _UIA_VALUE_PATTERN_ID, + "IUIAutomationValuePattern") if raw else None + if pattern is None: + return False + try: + pattern.SetValue(str(value)) + return True + except (OSError, AttributeError): + return False + + def invoke(self, name=None, role=None, app_name=None, + automation_id=None) -> bool: + raw = self._find_raw(name, role, app_name, automation_id) + pattern = self._pattern(raw, _UIA_INVOKE_PATTERN_ID, + "IUIAutomationInvokePattern") if raw else None + if pattern is None: + return False + try: + pattern.Invoke() + return True + except (OSError, AttributeError): + return False + + def toggle(self, name=None, role=None, app_name=None, + automation_id=None) -> bool: + raw = self._find_raw(name, role, app_name, automation_id) + pattern = self._pattern(raw, _UIA_TOGGLE_PATTERN_ID, + "IUIAutomationTogglePattern") if raw else None + if pattern is None: + return False + try: + pattern.Toggle() + return True + except (OSError, AttributeError): + return False + def _convert_uia(raw) -> Optional[AccessibilityElement]: try: diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 3cffe055..baa8cc9e 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2212,6 +2212,42 @@ def _run_state_machine(spec: Any) -> Dict[str, Any]: return run_state_machine(spec) +def _control_get_value(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> Optional[str]: + """Adapter: read a native control's value via the accessibility backend.""" + from je_auto_control.utils.accessibility import control_get_value + return control_get_value(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + +def _control_set_value(value: str, name: Optional[str] = None, + role: Optional[str] = None, app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Adapter: set a native control's value via the accessibility backend.""" + from je_auto_control.utils.accessibility import control_set_value + return control_set_value(value, name=name, role=role, app_name=app_name, + automation_id=automation_id) + + +def _control_invoke(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Adapter: invoke a native control (e.g. press a button).""" + from je_auto_control.utils.accessibility import control_invoke + return control_invoke(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + +def _control_toggle(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> bool: + """Adapter: toggle a native control (e.g. a checkbox).""" + from je_auto_control.utils.accessibility import control_toggle + return control_toggle(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + class Executor: """ Executor @@ -2359,6 +2395,10 @@ def __init__(self): "AC_a11y_find": _a11y_find_as_dict, "AC_a11y_click": click_accessibility_element, "AC_a11y_dump": _a11y_dump, + "AC_control_get_value": _control_get_value, + "AC_control_set_value": _control_set_value, + "AC_control_invoke": _control_invoke, + "AC_control_toggle": _control_toggle, "AC_a11y_record_start": _a11y_record_start, "AC_a11y_record_stop": _a11y_record_stop, "AC_a11y_record_events": _a11y_record_events, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 6a6ee3f3..8e688373 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -1045,6 +1045,53 @@ def webrunner_tools() -> List[MCPTool]: ] +def a11y_control_tools() -> List[MCPTool]: + _M = { + "name": {"type": "string"}, + "role": {"type": "string"}, + "app_name": {"type": "string"}, + "automation_id": {"type": "string"}, + } + return [ + MCPTool( + name="ac_control_get_value", + description=("Read a native control's value (textbox/combo/etc.) " + "via the OS accessibility API, located by name/role/" + "app_name/automation_id. Far more reliable than OCR. " + "Returns the value string or null."), + input_schema=schema(dict(_M)), + handler=h.control_get_value, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_control_set_value", + description=("Set a native control's value directly (no per-key " + "typing). Located by name/role/app_name/automation_id. " + "Returns true on success."), + input_schema=schema({"value": {"type": "string"}, **_M}, + required=["value"]), + handler=h.control_set_value, + annotations=DESTRUCTIVE, + ), + MCPTool( + name="ac_control_invoke", + description=("Invoke a native control's default action (e.g. press " + "a button) via the accessibility API."), + input_schema=schema(dict(_M)), + handler=h.control_invoke, + annotations=DESTRUCTIVE, + ), + MCPTool( + name="ac_control_toggle", + description=("Toggle a native control (e.g. a checkbox/switch) via " + "the accessibility API."), + input_schema=schema(dict(_M)), + handler=h.control_toggle, + annotations=DESTRUCTIVE, + ), + ] + + def a11y_tree_tools() -> List[MCPTool]: return [ MCPTool( @@ -2547,7 +2594,8 @@ def media_assert_tools() -> List[MCPTool]: mouse_tools, keyboard_tools, screen_tools, image_and_ocr_tools, window_tools, system_tools, recording_tools, drag_and_send_tools, semantic_locator_tools, self_healing_tools, anchor_locator_tools, - ab_locator_tools, a11y_tree_tools, ocr_structure_tools, + ab_locator_tools, a11y_tree_tools, a11y_control_tools, + ocr_structure_tools, smart_wait_tools, cost_telemetry_tools, failure_hook_tools, computer_use_tools, dag_tools, presence_tools, chatops_tools, redaction_tools, android_widget_tools, ios_tools, webrunner_tools, diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index f3b08086..9407a4e8 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -719,6 +719,32 @@ def a11y_click(name: Optional[str] = None, app_name=app_name)) +def control_get_value(name=None, role=None, app_name=None, + automation_id=None): + from je_auto_control.utils.accessibility import control_get_value as _g + return _g(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + +def control_set_value(value, name=None, role=None, app_name=None, + automation_id=None): + from je_auto_control.utils.accessibility import control_set_value as _s + return _s(value, name=name, role=role, app_name=app_name, + automation_id=automation_id) + + +def control_invoke(name=None, role=None, app_name=None, automation_id=None): + from je_auto_control.utils.accessibility import control_invoke as _i + return _i(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + +def control_toggle(name=None, role=None, app_name=None, automation_id=None): + from je_auto_control.utils.accessibility import control_toggle as _t + return _t(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + def vlm_locate(description: str, screen_region: Optional[List[int]] = None, model: Optional[str] = None) -> Optional[List[int]]: diff --git a/test/unit_test/headless/test_native_control.py b/test/unit_test/headless/test_native_control.py new file mode 100644 index 00000000..d7367b85 --- /dev/null +++ b/test/unit_test/headless/test_native_control.py @@ -0,0 +1,122 @@ +"""Headless tests for native UI control actions (get/set/invoke/toggle). + +A fake accessibility backend is injected, so the tests exercise the API, +executor commands and MCP wiring without any real UIAutomation/AX/comtypes +or a live GUI. The real Windows UIA path is platform code, validated the +same way the rest of the backend is. +""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.accessibility import accessibility_api as api +from je_auto_control.utils.accessibility.backends.base import ( + AccessibilityBackend, +) +from je_auto_control.utils.accessibility.backends.null_backend import ( + NullAccessibilityBackend, +) +from je_auto_control.utils.accessibility.element import ( + AccessibilityElement, AccessibilityNotAvailableError, +) + + +class _FakeBackend(AccessibilityBackend): + name = "fake" + available = True + + def __init__(self): + self.values = {"Username": "alice"} + self.invoked = [] + self.toggled = [] + + def list_elements(self, app_name=None, max_results=200): + return [AccessibilityElement(name="Username", role="edit", + bounds=(0, 0, 10, 10))] + + def get_value(self, name=None, role=None, app_name=None, + automation_id=None): + return self.values.get(name) + + def set_value(self, value, name=None, role=None, app_name=None, + automation_id=None): + self.values[name] = value + return True + + def invoke(self, name=None, role=None, app_name=None, automation_id=None): + self.invoked.append(name) + return True + + def toggle(self, name=None, role=None, app_name=None, automation_id=None): + self.toggled.append(name) + return True + + +@pytest.fixture() +def fake(monkeypatch): + backend = _FakeBackend() + monkeypatch.setattr(api, "get_backend", lambda: backend) + return backend + + +def test_get_value(fake): + assert ac.control_get_value(name="Username") == "alice" + assert ac.control_get_value(name="Missing") is None + + +def test_set_value(fake): + assert ac.control_set_value("bob", name="Username") is True + assert fake.values["Username"] == "bob" + + +def test_invoke(fake): + assert ac.control_invoke(name="OK") is True + assert "OK" in fake.invoked + + +def test_toggle(fake): + assert ac.control_toggle(name="Remember me") is True + assert "Remember me" in fake.toggled + + +def test_executor_commands(fake): + ac.execute_action([ + ["AC_control_set_value", {"value": "zoe", "name": "Username"}], + ["AC_control_invoke", {"name": "Login"}], + ["AC_control_toggle", {"name": "Stay signed in"}], + ]) + assert fake.values["Username"] == "zoe" + assert "Login" in fake.invoked + assert "Stay signed in" in fake.toggled + + +def test_facade_and_executor_registered(fake): + assert ac.control_get_value is api.control_get_value + assert {"AC_control_get_value", "AC_control_set_value", + "AC_control_invoke", "AC_control_toggle"} <= ac.executor.known_commands() + + +def test_mcp_tools_registered(): + from je_auto_control.utils.mcp_server.tools import build_default_tool_registry + names = {t.name for t in build_default_tool_registry()} + assert {"ac_control_get_value", "ac_control_set_value", + "ac_control_invoke", "ac_control_toggle"} <= names + + +def test_unsupported_backend_raises_clearly(): + backend = NullAccessibilityBackend() + for call in (lambda: backend.get_value(name="x"), + lambda: backend.set_value("v", name="x"), + lambda: backend.invoke(name="x"), + lambda: backend.toggle(name="x")): + with pytest.raises(AccessibilityNotAvailableError): + call() + + +def test_builder_specs_present_and_wired(): + from je_auto_control.gui.script_builder.command_schema import _build_specs + known = ac.executor.known_commands() + cmds = {s.command for s in _build_specs()} + assert {"AC_control_get_value", "AC_control_set_value", + "AC_control_invoke", "AC_control_toggle"} <= cmds + assert {"AC_control_get_value", "AC_control_set_value", + "AC_control_invoke", "AC_control_toggle"} <= known From 8a213d439026fcd0ee23804346b4fb8ea4dd264e Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 19 Jun 2026 04:05:00 +0800 Subject: [PATCH 2/2] Add AC_read_table: read grid/table/list controls as rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete the native control driver with table reading — the agent research's other top desktop gap (data scraping). Adds read_table to the backend interface, a Windows UIAutomation Grid-pattern implementation, read_control_table in the facade, the AC_read_table executor command, the ac_read_table MCP tool, a Script Builder entry, fake-backend tests, and the v7 docs / README sections. --- README.md | 1 + README/README_zh-CN.md | 1 + README/README_zh-TW.md | 1 + .../Eng/doc/new_features/v7_features_doc.rst | 16 +++++++++++ .../Zh/doc/new_features/v7_features_doc.rst | 16 +++++++++++ je_auto_control/__init__.py | 3 +- .../gui/script_builder/command_schema.py | 5 ++++ .../utils/accessibility/__init__.py | 3 +- .../utils/accessibility/accessibility_api.py | 11 +++++++- .../utils/accessibility/backends/base.py | 7 +++++ .../accessibility/backends/windows_backend.py | 27 ++++++++++++++++++ .../utils/executor/action_executor.py | 10 +++++++ .../utils/mcp_server/tools/_factories.py | 10 +++++++ .../utils/mcp_server/tools/_handlers.py | 6 ++++ .../unit_test/headless/test_native_control.py | 28 ++++++++++++++----- 15 files changed, 135 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c9c52663..4f109776 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Object-level desktop automation: read and drive native controls through the OS a - **Read / set value** — `control_get_value` / `control_set_value` (`AC_control_get_value` / `AC_control_set_value`): read a textbox/combo value (no OCR) and set it in one call (no per-key typing). - **Invoke / toggle** — `control_invoke` / `control_toggle` (`AC_control_invoke` / `AC_control_toggle`): press a button or flip a checkbox via its control pattern. +- **Read a table/grid** — `read_control_table` (`AC_read_table`): scrape a grid/list/table control into rows of cell strings — desktop data extraction without OCR. - Targets a control by `name` / `role` / `app_name` / `automation_id` (the stable Windows identifier), so it survives layout/localization changes. ## What's new (2026-06-19) diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md index 21aa5d0f..c9b77da3 100644 --- a/README/README_zh-CN.md +++ b/README/README_zh-CN.md @@ -65,6 +65,7 @@ - **读取 / 设置值** — `control_get_value` / `control_set_value`(`AC_control_get_value` / `AC_control_set_value`):读 textbox/combo 值(不用 OCR),一次设置值(不必逐键输入)。 - **调用 / 切换** — `control_invoke` / `control_toggle`(`AC_control_invoke` / `AC_control_toggle`):通过控件模式按按钮或切换复选框。 +- **读取表格/列表** — `read_control_table`(`AC_read_table`):把 grid/list/table 控件抓成逐行单元格字符串——不用 OCR 的桌面数据提取。 - 以 `name` / `role` / `app_name` / `automation_id`(Windows 稳定标识符)定位,版面/本地化改变也不坏。 ## 本次更新 (2026-06-19) diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md index 66a2af11..6d885604 100644 --- a/README/README_zh-TW.md +++ b/README/README_zh-TW.md @@ -65,6 +65,7 @@ - **讀取 / 設定值** — `control_get_value` / `control_set_value`(`AC_control_get_value` / `AC_control_set_value`):讀 textbox/combo 值(不用 OCR),一次設定值(不必逐鍵輸入)。 - **呼叫 / 切換** — `control_invoke` / `control_toggle`(`AC_control_invoke` / `AC_control_toggle`):透過控制模式按按鈕或切換核取方塊。 +- **讀取表格/清單** — `read_control_table`(`AC_read_table`):把 grid/list/table 控制項抓成逐列儲存格字串——不用 OCR 的桌面資料擷取。 - 以 `name` / `role` / `app_name` / `automation_id`(Windows 穩定識別碼)定位,版面/在地化改變也不壞。 ## 本次更新 (2026-06-19) diff --git a/docs/source/Eng/doc/new_features/v7_features_doc.rst b/docs/source/Eng/doc/new_features/v7_features_doc.rst index ad0cff6b..8ae3706b 100644 --- a/docs/source/Eng/doc/new_features/v7_features_doc.rst +++ b/docs/source/Eng/doc/new_features/v7_features_doc.rst @@ -56,6 +56,22 @@ Invoking and toggling Executor commands: ``AC_control_invoke``, ``AC_control_toggle``. +Reading tables / grids +==================== + +:: + + from je_auto_control import read_control_table + + rows = read_control_table(name="Results", app_name="myapp.exe") + # -> [["Sam", "30"], ["Lee", "25"], ...] + +``read_control_table`` reads a grid/table/list control into rows of cell +strings via the Grid pattern — reliable desktop data scraping without OCR. + +Executor command: ``AC_read_table``. + + Targeting controls ================= diff --git a/docs/source/Zh/doc/new_features/v7_features_doc.rst b/docs/source/Zh/doc/new_features/v7_features_doc.rst index 04e1388e..628aab5a 100644 --- a/docs/source/Zh/doc/new_features/v7_features_doc.rst +++ b/docs/source/Zh/doc/new_features/v7_features_doc.rst @@ -52,6 +52,22 @@ Builder),並提供 Windows UIAutomation 後端;無法執行該動作的後端會 執行器指令:``AC_control_invoke``、``AC_control_toggle``。 +讀取表格 / 清單 +================ + +:: + + from je_auto_control import read_control_table + + rows = read_control_table(name="Results", app_name="myapp.exe") + # -> [["Sam", "30"], ["Lee", "25"], ...] + +``read_control_table`` 透過 Grid pattern 把 grid/table/list 控制項讀成 +逐列的儲存格字串——不用 OCR 的可靠桌面資料抓取。 + +執行器指令:``AC_read_table``。 + + 定位控制項 ========== diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 382731d9..0c53342b 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -46,6 +46,7 @@ click_accessibility_element, control_get_value, control_invoke, control_set_value, control_toggle, dump_accessibility_tree, find_accessibility_element, list_accessibility_elements, + read_control_table, ) # VLM element locator (headless) from je_auto_control.utils.vision import ( @@ -546,7 +547,7 @@ def start_autocontrol_gui(*args, **kwargs): "click_accessibility_element", "dump_accessibility_tree", "find_accessibility_element", "list_accessibility_elements", "control_get_value", "control_set_value", "control_invoke", - "control_toggle", + "control_toggle", "read_control_table", # VLM locator "VLMNotAvailableError", "locate_by_description", "click_by_description", "verify_description", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index b2915788..f68b0ab0 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -596,6 +596,11 @@ def _add_native_control_specs(specs: List[CommandSpec]) -> None: fields=fields, description="Toggle a native control (e.g. a checkbox).", )) + specs.append(CommandSpec( + "AC_read_table", "Native UI", "Read Table / Grid", + fields=fields, + description="Read a grid/table/list control as rows of cell strings.", + )) def _add_misc_specs(specs: List[CommandSpec]) -> None: diff --git a/je_auto_control/utils/accessibility/__init__.py b/je_auto_control/utils/accessibility/__init__.py index c0231870..e6bf4378 100644 --- a/je_auto_control/utils/accessibility/__init__.py +++ b/je_auto_control/utils/accessibility/__init__.py @@ -4,6 +4,7 @@ click_accessibility_element, control_get_value, control_invoke, control_set_value, control_toggle, dump_accessibility_tree, find_accessibility_element, list_accessibility_elements, + read_control_table, ) from je_auto_control.utils.accessibility.recorder import ( AXRecorderEvent, AccessibilityRecorder, @@ -20,5 +21,5 @@ "dump_accessibility_tree", "find_accessibility_element", "list_accessibility_elements", "max_depth", "control_get_value", "control_set_value", "control_invoke", - "control_toggle", + "control_toggle", "read_control_table", ] diff --git a/je_auto_control/utils/accessibility/accessibility_api.py b/je_auto_control/utils/accessibility/accessibility_api.py index 452cd8ae..39816b13 100644 --- a/je_auto_control/utils/accessibility/accessibility_api.py +++ b/je_auto_control/utils/accessibility/accessibility_api.py @@ -126,11 +126,20 @@ def control_toggle(name: Optional[str] = None, role: Optional[str] = None, name=name, role=role, app_name=app_name, automation_id=automation_id) +def read_control_table(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None, + ) -> List[List[str]]: + """Read a grid/table/list control as rows of cell strings.""" + return get_backend().read_table( + name=name, role=role, app_name=app_name, automation_id=automation_id) + + __all__ = [ "AccessibilityElement", "AccessibilityNotAvailableError", "AXTreeNode", "click_accessibility_element", "dump_accessibility_tree", "find_accessibility_element", "list_accessibility_elements", "control_get_value", "control_set_value", "control_invoke", - "control_toggle", + "control_toggle", "read_control_table", ] diff --git a/je_auto_control/utils/accessibility/backends/base.py b/je_auto_control/utils/accessibility/backends/base.py index a43a599d..2ca8ef7e 100644 --- a/je_auto_control/utils/accessibility/backends/base.py +++ b/je_auto_control/utils/accessibility/backends/base.py @@ -49,6 +49,13 @@ def toggle(self, name: Optional[str] = None, role: Optional[str] = None, """Toggle the matched control (e.g. a checkbox).""" self._unsupported("toggle") + def read_table(self, name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None, + ) -> List[List[str]]: + """Read a grid/table/list control as rows of cell strings.""" + self._unsupported("read_table") + def _unsupported(self, operation: str): """Raise a clear error for an action this backend can't perform.""" raise AccessibilityNotAvailableError( diff --git a/je_auto_control/utils/accessibility/backends/windows_backend.py b/je_auto_control/utils/accessibility/backends/windows_backend.py index 20fcbbb7..be8c3c41 100644 --- a/je_auto_control/utils/accessibility/backends/windows_backend.py +++ b/je_auto_control/utils/accessibility/backends/windows_backend.py @@ -24,6 +24,7 @@ _UIA_VALUE_PATTERN_ID = 10002 _UIA_INVOKE_PATTERN_ID = 10000 _UIA_TOGGLE_PATTERN_ID = 10015 +_UIA_GRID_PATTERN_ID = 10006 def _is_available() -> bool: @@ -177,6 +178,32 @@ def toggle(self, name=None, role=None, app_name=None, except (OSError, AttributeError): return False + def read_table(self, name=None, role=None, app_name=None, + automation_id=None): + raw = self._find_raw(name, role, app_name, automation_id) + pattern = self._pattern(raw, _UIA_GRID_PATTERN_ID, + "IUIAutomationGridPattern") if raw else None + if pattern is None: + return [] + try: + rows = int(pattern.CurrentRowCount or 0) + cols = int(pattern.CurrentColumnCount or 0) + except (OSError, AttributeError): + return [] + return [self._read_row(pattern, r, cols) for r in range(rows)] + + @staticmethod + def _read_row(pattern, row: int, cols: int): + """Read one grid row into a list of cell strings.""" + cells = [] + for col in range(cols): + try: + cell = pattern.GetItem(row, col) + cells.append(str(cell.CurrentName or "") if cell else "") + except (OSError, AttributeError): + cells.append("") + return cells + def _convert_uia(raw) -> Optional[AccessibilityElement]: try: diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index baa8cc9e..80ca461b 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2248,6 +2248,15 @@ def _control_toggle(name: Optional[str] = None, role: Optional[str] = None, automation_id=automation_id) +def _read_table(name: Optional[str] = None, role: Optional[str] = None, + app_name: Optional[str] = None, + automation_id: Optional[str] = None) -> List[List[str]]: + """Adapter: read a grid/table/list control as rows of cell strings.""" + from je_auto_control.utils.accessibility import read_control_table + return read_control_table(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + class Executor: """ Executor @@ -2399,6 +2408,7 @@ def __init__(self): "AC_control_set_value": _control_set_value, "AC_control_invoke": _control_invoke, "AC_control_toggle": _control_toggle, + "AC_read_table": _read_table, "AC_a11y_record_start": _a11y_record_start, "AC_a11y_record_stop": _a11y_record_stop, "AC_a11y_record_events": _a11y_record_events, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 8e688373..f095f898 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -1089,6 +1089,16 @@ def a11y_control_tools() -> List[MCPTool]: handler=h.control_toggle, annotations=DESTRUCTIVE, ), + MCPTool( + name="ac_read_table", + description=("Read a grid/table/list control as rows of cell " + "strings via the accessibility Grid pattern. Located " + "by name/role/app_name/automation_id. Reliable " + "desktop data scraping without OCR."), + input_schema=schema(dict(_M)), + handler=h.read_table, + annotations=READ_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 9407a4e8..4a3963d1 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -745,6 +745,12 @@ def control_toggle(name=None, role=None, app_name=None, automation_id=None): automation_id=automation_id) +def read_table(name=None, role=None, app_name=None, automation_id=None): + from je_auto_control.utils.accessibility import read_control_table as _r + return _r(name=name, role=role, app_name=app_name, + automation_id=automation_id) + + def vlm_locate(description: str, screen_region: Optional[List[int]] = None, model: Optional[str] = None) -> Optional[List[int]]: diff --git a/test/unit_test/headless/test_native_control.py b/test/unit_test/headless/test_native_control.py index d7367b85..37294f5a 100644 --- a/test/unit_test/headless/test_native_control.py +++ b/test/unit_test/headless/test_native_control.py @@ -50,6 +50,10 @@ def toggle(self, name=None, role=None, app_name=None, automation_id=None): self.toggled.append(name) return True + def read_table(self, name=None, role=None, app_name=None, + automation_id=None): + return [["Sam", "30"], ["Lee", "25"]] + @pytest.fixture() def fake(monkeypatch): @@ -89,17 +93,26 @@ def test_executor_commands(fake): assert "Stay signed in" in fake.toggled +def test_read_table(fake): + rows = ac.read_control_table(name="Grid") + assert rows == [["Sam", "30"], ["Lee", "25"]] + record = ac.execute_action([["AC_read_table", {"name": "Grid"}]]) + assert any("Sam" in str(v) for v in record.values()) + + def test_facade_and_executor_registered(fake): assert ac.control_get_value is api.control_get_value assert {"AC_control_get_value", "AC_control_set_value", - "AC_control_invoke", "AC_control_toggle"} <= ac.executor.known_commands() + "AC_control_invoke", "AC_control_toggle", + "AC_read_table"} <= ac.executor.known_commands() def test_mcp_tools_registered(): from je_auto_control.utils.mcp_server.tools import build_default_tool_registry names = {t.name for t in build_default_tool_registry()} assert {"ac_control_get_value", "ac_control_set_value", - "ac_control_invoke", "ac_control_toggle"} <= names + "ac_control_invoke", "ac_control_toggle", + "ac_read_table"} <= names def test_unsupported_backend_raises_clearly(): @@ -107,7 +120,8 @@ def test_unsupported_backend_raises_clearly(): for call in (lambda: backend.get_value(name="x"), lambda: backend.set_value("v", name="x"), lambda: backend.invoke(name="x"), - lambda: backend.toggle(name="x")): + lambda: backend.toggle(name="x"), + lambda: backend.read_table(name="x")): with pytest.raises(AccessibilityNotAvailableError): call() @@ -116,7 +130,7 @@ def test_builder_specs_present_and_wired(): from je_auto_control.gui.script_builder.command_schema import _build_specs known = ac.executor.known_commands() cmds = {s.command for s in _build_specs()} - assert {"AC_control_get_value", "AC_control_set_value", - "AC_control_invoke", "AC_control_toggle"} <= cmds - assert {"AC_control_get_value", "AC_control_set_value", - "AC_control_invoke", "AC_control_toggle"} <= known + wanted = {"AC_control_get_value", "AC_control_set_value", + "AC_control_invoke", "AC_control_toggle", "AC_read_table"} + assert wanted <= cmds + assert wanted <= known