From 818a1c719513af126329394fe428f24043b60527 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 19 Jun 2026 10:56:37 +0800 Subject: [PATCH 1/2] Add agent episodic memory store and deterministic-run harness --- README.md | 8 + README/README_zh-CN.md | 8 + README/README_zh-TW.md | 8 + .../Eng/doc/new_features/v15_features_doc.rst | 61 ++++++++ docs/source/Eng/eng_index.rst | 1 + .../Zh/doc/new_features/v15_features_doc.rst | 54 +++++++ docs/source/Zh/zh_index.rst | 1 + je_auto_control/__init__.py | 8 + .../gui/script_builder/command_schema.py | 39 +++++ .../utils/agent_memory/__init__.py | 6 + .../utils/agent_memory/agent_memory.py | 140 ++++++++++++++++++ .../utils/deterministic/__init__.py | 6 + .../utils/deterministic/deterministic.py | 90 +++++++++++ .../utils/executor/action_executor.py | 53 +++++++ .../utils/mcp_server/tools/_factories.py | 68 +++++++++ .../utils/mcp_server/tools/_handlers.py | 39 +++++ .../headless/test_agent_memory_batch.py | 114 ++++++++++++++ 17 files changed, 704 insertions(+) create mode 100644 docs/source/Eng/doc/new_features/v15_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v15_features_doc.rst create mode 100644 je_auto_control/utils/agent_memory/__init__.py create mode 100644 je_auto_control/utils/agent_memory/agent_memory.py create mode 100644 je_auto_control/utils/deterministic/__init__.py create mode 100644 je_auto_control/utils/deterministic/deterministic.py create mode 100644 test/unit_test/headless/test_agent_memory_batch.py diff --git a/README.md b/README.md index c596c4dd..ccd4401a 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ ## Table of Contents +- [What's new (2026-06-19) — Memory & Determinism](#whats-new-2026-06-19--memory--determinism) - [What's new (2026-06-19) — Office I/O](#whats-new-2026-06-19--office-io) - [What's new (2026-06-19) — Agent Toolkit](#whats-new-2026-06-19--agent-toolkit) - [What's new (2026-06-19) — Authoring & Debugging](#whats-new-2026-06-19--authoring--debugging) @@ -67,6 +68,13 @@ --- +## What's new (2026-06-19) — Memory & Determinism + +Two pure-stdlib tools from the agent/QA research round, full stack (facade, `AC_*`, MCP, Script Builder). Full reference: [`docs/source/Eng/doc/new_features/v15_features_doc.rst`](docs/source/Eng/doc/new_features/v15_features_doc.rst). + +- **Agent episodic memory** — `AgentMemory` (`AC_memory_remember` / `AC_memory_recall` / `AC_memory_recent` / `AC_memory_forget` / `AC_memory_stats`, `ac_memory_*`): SQLite store of `(goal → trajectory → outcome)` episodes with keyword recall to inject past experience into the planner's context — cross-run learning, no embedding dependency. +- **Deterministic run** — `DeterministicRun` / `seed_everything` (`AC_seed_everything`, `ac_seed_everything`): pin the RNG seed and freeze `time.time` for a `with` block (recording the choices for replay) to kill time/randomness flakiness; `time.monotonic` left intact so timeouts still work. + ## What's new (2026-06-19) — Office I/O Headless read/write for Excel/Word/PowerPoint, full stack (facade, `AC_*`, MCP, Script Builder). Optional extra: `pip install je_auto_control[office]`. Full reference: [`docs/source/Eng/doc/new_features/v14_features_doc.rst`](docs/source/Eng/doc/new_features/v14_features_doc.rst). diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md index a81d8af9..61bee8bc 100644 --- a/README/README_zh-CN.md +++ b/README/README_zh-CN.md @@ -12,6 +12,7 @@ ## 目录 +- [本次更新 (2026-06-19) — 记忆与确定性](#本次更新-2026-06-19--记忆与确定性) - [本次更新 (2026-06-19) — Office 读写](#本次更新-2026-06-19--office-读写) - [本次更新 (2026-06-19) — Agent 工具组](#本次更新-2026-06-19--agent-工具组) - [本次更新 (2026-06-19) — 编写与调试](#本次更新-2026-06-19--编写与调试) @@ -66,6 +67,13 @@ --- +## 本次更新 (2026-06-19) — 记忆与确定性 + +由 agent/QA 研究轮找出的两项纯标准库工具,走完整五层(facade、`AC_*`、MCP、Script Builder)。完整参考:[`docs/source/Zh/doc/new_features/v15_features_doc.rst`](../docs/source/Zh/doc/new_features/v15_features_doc.rst)。 + +- **Agent 情节记忆** — `AgentMemory`(`AC_memory_remember` / `AC_memory_recall` / `AC_memory_recent` / `AC_memory_forget` / `AC_memory_stats`、`ac_memory_*`):以 SQLite 存储 `(目标 → 轨迹 → 结果)` 情节,依关键字召回过往经验注入规划器上下文——跨执行学习,免向量依赖。 +- **确定性执行** — `DeterministicRun` / `seed_everything`(`AC_seed_everything`、`ac_seed_everything`):在 `with` 块内固定 RNG 种子并冻结 `time.time`(记录选择以便重现),消除时间/随机造成的不稳定;`time.monotonic` 保持不变,超时仍正常。 + ## 本次更新 (2026-06-19) — Office 读写 Excel/Word/PowerPoint 的 headless 读写,走完整五层(facade、`AC_*`、MCP、Script Builder)。可选 extra:`pip install je_auto_control[office]`。完整参考:[`docs/source/Zh/doc/new_features/v14_features_doc.rst`](../docs/source/Zh/doc/new_features/v14_features_doc.rst)。 diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md index f5e0ad8e..f1acb529 100644 --- a/README/README_zh-TW.md +++ b/README/README_zh-TW.md @@ -12,6 +12,7 @@ ## 目錄 +- [本次更新 (2026-06-19) — 記憶與決定性](#本次更新-2026-06-19--記憶與決定性) - [本次更新 (2026-06-19) — Office 讀寫](#本次更新-2026-06-19--office-讀寫) - [本次更新 (2026-06-19) — Agent 工具組](#本次更新-2026-06-19--agent-工具組) - [本次更新 (2026-06-19) — 編寫與除錯](#本次更新-2026-06-19--編寫與除錯) @@ -66,6 +67,13 @@ --- +## 本次更新 (2026-06-19) — 記憶與決定性 + +由 agent/QA 研究輪找出的兩項純標準庫工具,走完整五層(facade、`AC_*`、MCP、Script Builder)。完整參考:[`docs/source/Zh/doc/new_features/v15_features_doc.rst`](../docs/source/Zh/doc/new_features/v15_features_doc.rst)。 + +- **Agent 情節記憶** — `AgentMemory`(`AC_memory_remember` / `AC_memory_recall` / `AC_memory_recent` / `AC_memory_forget` / `AC_memory_stats`、`ac_memory_*`):以 SQLite 儲存 `(目標 → 軌跡 → 結果)` 情節,依關鍵字召回過往經驗注入規劃器脈絡——跨執行學習,免向量相依。 +- **決定性執行** — `DeterministicRun` / `seed_everything`(`AC_seed_everything`、`ac_seed_everything`):在 `with` 區塊內固定 RNG 種子並凍結 `time.time`(記錄選擇以便重現),消除時間/隨機造成的不穩定;`time.monotonic` 保持不變,逾時仍正常。 + ## 本次更新 (2026-06-19) — Office 讀寫 Excel/Word/PowerPoint 的 headless 讀寫,走完整五層(facade、`AC_*`、MCP、Script Builder)。可選 extra:`pip install je_auto_control[office]`。完整參考:[`docs/source/Zh/doc/new_features/v14_features_doc.rst`](../docs/source/Zh/doc/new_features/v14_features_doc.rst)。 diff --git a/docs/source/Eng/doc/new_features/v15_features_doc.rst b/docs/source/Eng/doc/new_features/v15_features_doc.rst new file mode 100644 index 00000000..21647980 --- /dev/null +++ b/docs/source/Eng/doc/new_features/v15_features_doc.rst @@ -0,0 +1,61 @@ +================================================== +New Features (2026-06-19) — Memory & Determinism +================================================== + +Two pure-standard-library tools surfaced by the agent / QA research round, +wired through the full stack (facade, ``AC_*`` executor commands, MCP +tools, Script Builder): a persistent **agent episodic memory** store and a +**deterministic run** harness. + +.. contents:: + :local: + :depth: 2 + + +Agent episodic memory +==================== + +An agent that re-derives "how do I log in to this app" every run wastes +tokens and repeats mistakes. :class:`AgentMemory` records each episode — +the goal, the trajectory (steps / tool-calls), and the outcome — to a +SQLite file, and recalls the most relevant past episodes by keyword so they +can be injected into the planner's context:: + + from je_auto_control import AgentMemory + + mem = AgentMemory("agent.memory.db") + mem.remember("log in to the billing portal", + steps=recorded_actions, outcome="success", tags=["auth"]) + + for episode in mem.recall("portal login", limit=3): + ... # feed episode.goal / episode.steps back to the planner + +Recall scores each episode by term frequency over its goal + tags + +outcome (a dependency-free BM25 stand-in); a vector tier can be added later +without changing the API. Commands: ``AC_memory_remember`` / +``AC_memory_recall`` / ``AC_memory_recent`` / ``AC_memory_forget`` / +``AC_memory_stats`` (and the matching ``ac_memory_*`` MCP tools). + + +Deterministic run +================ + +Time and randomness are two of the top causes of flaky automation. +:class:`DeterministicRun` pins both for a ``with`` block and records the +choices so a run can be reproduced exactly:: + + from je_auto_control import DeterministicRun + + with DeterministicRun(seed=42, freeze_time=1_750_000_000.0) as run: + ... # random.* reproducible; time.time() frozen + manifest = run.manifest() # {"seed": 42, "freeze_time": 1750000000.0} + +Scope (pure standard library — no ``freezegun`` dependency): it seeds the +global :mod:`random` generator (and numpy if present) and restores its +state on exit, and patches ``time.time`` / ``time.time_ns`` to a fixed +instant. ``time.monotonic`` is deliberately left alone so duration +measurements and timeouts keep working. + +``seed_everything(seed)`` is the standalone seeding helper, also exposed as +``AC_seed_everything`` / ``ac_seed_everything`` for run-wide reproducibility +from a flow. diff --git a/docs/source/Eng/eng_index.rst b/docs/source/Eng/eng_index.rst index 1dee96bd..a494e4d6 100644 --- a/docs/source/Eng/eng_index.rst +++ b/docs/source/Eng/eng_index.rst @@ -37,6 +37,7 @@ Comprehensive guides for all AutoControl features. doc/new_features/v12_features_doc doc/new_features/v13_features_doc doc/new_features/v14_features_doc + doc/new_features/v15_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/docs/source/Zh/doc/new_features/v15_features_doc.rst b/docs/source/Zh/doc/new_features/v15_features_doc.rst new file mode 100644 index 00000000..ca69d408 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v15_features_doc.rst @@ -0,0 +1,54 @@ +======================================== +新功能 (2026-06-19) — 記憶與決定性 +======================================== + +由 agent / QA 研究輪找出的兩項純標準庫工具,走完整五層(facade、 +``AC_*`` 執行器指令、MCP 工具、Script Builder):持久化的 **agent +情節記憶**,以及**決定性執行**工具。 + +.. contents:: + :local: + :depth: 2 + + +Agent 情節記憶 +============== + +每次執行都重新推導「這個 app 怎麼登入」很浪費 token,也會重複犯錯。 +:class:`AgentMemory` 把每段情節——目標、軌跡(步驟 / 工具呼叫)、結果 +——寫入 SQLite 檔,並依關鍵字召回最相關的過往情節,注入規劃器的脈絡:: + + from je_auto_control import AgentMemory + + mem = AgentMemory("agent.memory.db") + mem.remember("登入帳務入口", steps=recorded_actions, + outcome="success", tags=["auth"]) + + for episode in mem.recall("入口 登入", limit=3): + ... # 把 episode.goal / episode.steps 回饋給規劃器 + +召回時以目標 + 標籤 + 結果上的詞頻為每段情節評分(免相依的 BM25 替代); +日後可在不改 API 的情況下加上向量層。指令:``AC_memory_remember`` / +``AC_memory_recall`` / ``AC_memory_recent`` / ``AC_memory_forget`` / +``AC_memory_stats``(以及對應的 ``ac_memory_*`` MCP 工具)。 + + +決定性執行 +========== + +時間與隨機是自動化不穩定(flaky)的兩大主因。:class:`DeterministicRun` +在 ``with`` 區塊內把兩者都固定下來,並記錄選擇以便完全重現:: + + from je_auto_control import DeterministicRun + + with DeterministicRun(seed=42, freeze_time=1_750_000_000.0) as run: + ... # random.* 可重現;time.time() 被凍結 + manifest = run.manifest() # {"seed": 42, "freeze_time": 1750000000.0} + +範圍(純標準庫——不需 ``freezegun``):為全域 :mod:`random` 產生器(若有 +numpy 也一併)設種子並於離開時還原狀態,並把 ``time.time`` / +``time.time_ns`` 修補為固定時刻。``time.monotonic`` 刻意保持不變,讓 +時間長度量測與逾時仍正常運作。 + +``seed_everything(seed)`` 是獨立的設種子輔助函式,亦以 ``AC_seed_everything`` +/ ``ac_seed_everything`` 形式提供,供流程做全執行重現。 diff --git a/docs/source/Zh/zh_index.rst b/docs/source/Zh/zh_index.rst index 2a98d7b9..5d74187b 100644 --- a/docs/source/Zh/zh_index.rst +++ b/docs/source/Zh/zh_index.rst @@ -37,6 +37,7 @@ AutoControl 所有功能的完整使用指南。 doc/new_features/v12_features_doc doc/new_features/v13_features_doc doc/new_features/v14_features_doc + doc/new_features/v15_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 1d5251e5..a351f890 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -138,6 +138,12 @@ read_document, read_presentation, read_workbook, write_document, write_presentation, write_workbook, ) +# Persistent episodic memory for agents (goal -> trajectory -> outcome) +from je_auto_control.utils.agent_memory import AgentMemory, Episode +# Deterministic run controls (seeded RNG + frozen wall clock) +from je_auto_control.utils.deterministic import ( + DeterministicRun, seed_everything, +) # Background popup/interrupt watchdog (unattended automation) from je_auto_control.utils.watchdog import ( PopupWatchdog, WatchdogRule, default_popup_watchdog, @@ -550,6 +556,8 @@ def start_autocontrol_gui(*args, **kwargs): "read_workbook", "write_workbook", "read_document", "write_document", "read_presentation", "write_presentation", + "AgentMemory", "Episode", + "DeterministicRun", "seed_everything", # MCP server "AuditLogger", "HttpMCPServer", "MCPContent", "MCPPrompt", "MCPPromptArgument", "MCPResource", "MCPServer", "MCPTool", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 516237b6..5b44d1ed 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -657,6 +657,45 @@ def _add_misc_specs(specs: List[CommandSpec]) -> None: _add_authoring_specs(specs) _add_agent_specs(specs) _add_office_specs(specs) + _add_memory_specs(specs) + + +def _add_memory_specs(specs: List[CommandSpec]) -> None: + db = FieldSpec("db", FieldType.FILE_PATH) + specs.append(CommandSpec( + "AC_memory_remember", "Agent", "Memory: Remember Episode", + fields=(db, FieldSpec("goal", FieldType.STRING), + FieldSpec("outcome", FieldType.STRING, optional=True)), + description="Store an episode (goal -> 'steps' via JSON view -> " + "outcome).", + )) + specs.append(CommandSpec( + "AC_memory_recall", "Agent", "Memory: Recall", + fields=(db, FieldSpec("query", FieldType.STRING), + FieldSpec("limit", FieldType.INT, optional=True, default=5)), + description="Recall episodes most relevant to a query.", + )) + specs.append(CommandSpec( + "AC_memory_recent", "Agent", "Memory: Recent", + fields=(db, FieldSpec("limit", FieldType.INT, optional=True, + default=10)), + description="List the most recent episodes.", + )) + specs.append(CommandSpec( + "AC_memory_forget", "Agent", "Memory: Forget", + fields=(db, FieldSpec("episode_id", FieldType.INT)), + description="Delete an episode by id.", + )) + specs.append(CommandSpec( + "AC_memory_stats", "Agent", "Memory: Stats", + fields=(db,), + description="Episode count for a memory store.", + )) + specs.append(CommandSpec( + "AC_seed_everything", "Flow", "Seed RNG (deterministic)", + fields=(FieldSpec("seed", FieldType.INT, optional=True, default=0),), + description="Seed all RNG run-wide for reproducible runs.", + )) def _add_office_specs(specs: List[CommandSpec]) -> None: diff --git a/je_auto_control/utils/agent_memory/__init__.py b/je_auto_control/utils/agent_memory/__init__.py new file mode 100644 index 00000000..37809cfd --- /dev/null +++ b/je_auto_control/utils/agent_memory/__init__.py @@ -0,0 +1,6 @@ +"""Persistent episodic memory for agents (goal -> trajectory -> outcome).""" +from je_auto_control.utils.agent_memory.agent_memory import ( + AgentMemory, Episode, +) + +__all__ = ["AgentMemory", "Episode"] diff --git a/je_auto_control/utils/agent_memory/agent_memory.py b/je_auto_control/utils/agent_memory/agent_memory.py new file mode 100644 index 00000000..cffbfadf --- /dev/null +++ b/je_auto_control/utils/agent_memory/agent_memory.py @@ -0,0 +1,140 @@ +"""Persistent episodic memory for agents (pure standard library). + +An agent that re-derives "how do I log in to this app" on every run wastes +tokens and repeats mistakes. :class:`AgentMemory` records each episode — +the *goal*, the *trajectory* (steps/tool-calls taken), and the *outcome* — +to a SQLite file, and recalls the most relevant past episodes by keyword so +they can be injected into the planner's context. This is the cross-run +"context engineering" memory layer. + +Recall uses a dependency-free term-frequency score over each episode's +goal + tags + outcome (a lightweight BM25 stand-in); a vector/embedding +tier can be layered on later without changing the API. + +Pure standard library (``sqlite3`` / ``json`` / ``re``); imports no +``PySide6``. +""" +import json +import re +import sqlite3 +import time +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +_TOKEN = re.compile(r"[a-z0-9]+") + + +@dataclass +class Episode: + """One recorded (goal -> trajectory -> outcome) experience.""" + id: int + goal: str + steps: List[Any] + outcome: str + tags: List[str] = field(default_factory=list) + created: float = 0.0 + score: float = 0.0 + + +def _tokens(text: str) -> List[str]: + return _TOKEN.findall((text or "").lower()) + + +def _row_to_episode(row: sqlite3.Row, score: float = 0.0) -> Episode: + return Episode( + id=int(row["id"]), goal=row["goal"], + steps=json.loads(row["steps"] or "[]"), + outcome=row["outcome"] or "", tags=json.loads(row["tags"] or "[]"), + created=float(row["created"]), score=score) + + +class AgentMemory: + """A SQLite-backed store of agent episodes with keyword recall.""" + + def __init__(self, db_path: str) -> None: + self._db_path = db_path + self._ensure_schema() + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(self._db_path, timeout=30.0, + isolation_level=None) + conn.row_factory = sqlite3.Row + return conn + + def _ensure_schema(self) -> None: + with self._connect() as conn: + conn.execute( + "CREATE TABLE IF NOT EXISTS episodes (" + "id INTEGER PRIMARY KEY AUTOINCREMENT, goal TEXT NOT NULL, " + "steps TEXT NOT NULL, outcome TEXT DEFAULT '', " + "tags TEXT DEFAULT '[]', created REAL NOT NULL)") + + def remember(self, goal: str, *, steps: Optional[List[Any]] = None, + outcome: str = "", + tags: Optional[List[str]] = None) -> int: + """Store an episode; return its id.""" + if not goal or not str(goal).strip(): + raise ValueError("an episode needs a non-empty goal") + with self._connect() as conn: + cur = conn.execute( + "INSERT INTO episodes (goal, steps, outcome, tags, created) " + "VALUES (?, ?, ?, ?, ?)", + (str(goal), json.dumps(steps or []), str(outcome), + json.dumps(list(tags or [])), time.time())) + return int(cur.lastrowid) + + def get(self, episode_id: int) -> Optional[Episode]: + """Return an episode by id or ``None``.""" + with self._connect() as conn: + row = conn.execute("SELECT * FROM episodes WHERE id=?", + (int(episode_id),)).fetchone() + return _row_to_episode(row) if row is not None else None + + def forget(self, episode_id: int) -> bool: + """Delete an episode; return whether it existed.""" + with self._connect() as conn: + cur = conn.execute("DELETE FROM episodes WHERE id=?", + (int(episode_id),)) + return cur.rowcount > 0 + + def recent(self, limit: int = 10) -> List[Episode]: + """Return the most recently stored episodes (newest first).""" + with self._connect() as conn: + rows = conn.execute( + "SELECT * FROM episodes ORDER BY id DESC LIMIT ?", + (int(limit),)).fetchall() + return [_row_to_episode(row) for row in rows] + + def recall(self, query: str, *, limit: int = 5) -> List[Episode]: + """Return episodes most relevant to ``query`` (keyword TF score). + + Episodes with zero matching terms are excluded; ties break toward + the more recent episode. + """ + terms = _tokens(query) + if not terms: + return [] + scored: List[Episode] = [] + with self._connect() as conn: + rows = conn.execute("SELECT * FROM episodes").fetchall() + for row in rows: + score = _relevance(row, terms) + if score > 0: + scored.append(_row_to_episode(row, score=score)) + scored.sort(key=lambda ep: (ep.score, ep.created), reverse=True) + return scored[: max(0, int(limit))] + + def stats(self) -> Dict[str, int]: + """Return ``{"episodes": N}`` for dashboards.""" + with self._connect() as conn: + row = conn.execute("SELECT COUNT(*) c FROM episodes").fetchone() + return {"episodes": int(row["c"])} + + +def _relevance(row: sqlite3.Row, terms: List[str]) -> float: + haystack = " ".join([row["goal"] or "", row["outcome"] or "", + " ".join(json.loads(row["tags"] or "[]"))]) + counts = _tokens(haystack) + if not counts: + return 0.0 + return float(sum(counts.count(term) for term in terms)) diff --git a/je_auto_control/utils/deterministic/__init__.py b/je_auto_control/utils/deterministic/__init__.py new file mode 100644 index 00000000..35181257 --- /dev/null +++ b/je_auto_control/utils/deterministic/__init__.py @@ -0,0 +1,6 @@ +"""Deterministic run controls: seeded RNG + frozen wall clock.""" +from je_auto_control.utils.deterministic.deterministic import ( + DeterministicRun, seed_everything, +) + +__all__ = ["DeterministicRun", "seed_everything"] diff --git a/je_auto_control/utils/deterministic/deterministic.py b/je_auto_control/utils/deterministic/deterministic.py new file mode 100644 index 00000000..a3f119f5 --- /dev/null +++ b/je_auto_control/utils/deterministic/deterministic.py @@ -0,0 +1,90 @@ +"""Deterministic run controls — seeded RNG and a frozen wall clock. + +Time and randomness are two of the top causes of flaky automation. This +module pins both for the duration of a ``with`` block and records the +choices so a run can be reproduced exactly:: + + from je_auto_control import DeterministicRun + + with DeterministicRun(seed=42, freeze_time=1_750_000_000.0) as run: + ... # random.* reproducible; time.time() frozen + manifest = run.manifest() # {"seed": 42, "freeze_time": 1750000000.0} + +Scope (pure standard library — no ``freezegun`` dependency): + +* **RNG** — seeds the global :mod:`random` generator and restores its + state on exit. +* **Wall clock** — patches ``time.time`` / ``time.time_ns`` to a fixed + instant. ``time.monotonic`` is deliberately left alone so duration + measurements and timeouts keep working. + +Imports no ``PySide6``. +""" +import random +from typing import Any, Dict, Optional +from unittest import mock + +_UNSET = object() + + +def seed_everything(seed: int) -> int: + """Seed the global :mod:`random` generator (and numpy if importable). + + Returns the seed, for recording in run artifacts. + """ + random.seed(int(seed)) + try: # numpy is optional; seed it too when present + import numpy + numpy.random.seed(int(seed) % (2 ** 32)) + except ImportError: + pass + return int(seed) + + +class DeterministicRun: + """Context manager pinning RNG seed and (optionally) the wall clock.""" + + def __init__(self, seed: int = 0, + freeze_time: Optional[float] = None) -> None: + self._seed = int(seed) + self._freeze_time = (None if freeze_time is None + else float(freeze_time)) + self._rng_state: Any = _UNSET + self._patches: list = [] + + @property + def seed(self) -> int: + """The RNG seed pinned for this run.""" + return self._seed + + @property + def frozen(self) -> Optional[float]: + """The frozen wall-clock instant (epoch seconds) or ``None``.""" + return self._freeze_time + + def manifest(self) -> Dict[str, Any]: + """Return the run's deterministic settings (for artifacts/replay).""" + return {"seed": self._seed, "freeze_time": self._freeze_time} + + def _freeze_clock(self) -> None: + instant = self._freeze_time + time_patch = mock.patch("time.time", return_value=instant) + ns_patch = mock.patch("time.time_ns", return_value=int(instant * 1e9)) + for patch in (time_patch, ns_patch): + patch.start() + self._patches.append(patch) + + def __enter__(self) -> "DeterministicRun": + self._rng_state = random.getstate() + seed_everything(self._seed) + if self._freeze_time is not None: + self._freeze_clock() + return self + + def __exit__(self, *exc: Any) -> bool: + while self._patches: + self._patches.pop().stop() + if self._rng_state is not _UNSET: + random.setstate(self._rng_state) + self._rng_state = _UNSET + return False diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 6228c98a..7a1b1c84 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2512,6 +2512,53 @@ def _write_presentation(path: str, slides: List[Any]) -> Dict[str, Any]: return {"path": write_presentation(path, slides)} +def _memory(db: str): + from je_auto_control.utils.agent_memory import AgentMemory + return AgentMemory(db) + + +def _memory_remember(db: str, goal: str, steps: Optional[List[Any]] = None, + outcome: str = "", + tags: Optional[List[str]] = None) -> Dict[str, Any]: + """Adapter: store an agent episode (goal/trajectory/outcome).""" + return {"id": _memory(db).remember(goal, steps=steps, outcome=outcome, + tags=tags)} + + +def _memory_recall(db: str, query: str, limit: int = 5) -> Dict[str, Any]: + """Adapter: recall episodes most relevant to a query.""" + episodes = _memory(db).recall(query, limit=int(limit)) + return {"episodes": [_episode_to_dict(ep) for ep in episodes]} + + +def _memory_recent(db: str, limit: int = 10) -> Dict[str, Any]: + """Adapter: list the most recent episodes.""" + episodes = _memory(db).recent(limit=int(limit)) + return {"episodes": [_episode_to_dict(ep) for ep in episodes]} + + +def _memory_forget(db: str, episode_id: int) -> Dict[str, Any]: + """Adapter: delete an episode.""" + return {"removed": _memory(db).forget(int(episode_id))} + + +def _memory_stats(db: str) -> Dict[str, int]: + """Adapter: episode count for a memory store.""" + return _memory(db).stats() + + +def _episode_to_dict(episode: Any) -> Dict[str, Any]: + return {"id": episode.id, "goal": episode.goal, "steps": episode.steps, + "outcome": episode.outcome, "tags": episode.tags, + "score": episode.score} + + +def _seed_everything(seed: int = 0) -> Dict[str, Any]: + """Adapter: seed all RNG run-wide for reproducible runs.""" + from je_auto_control.utils.deterministic import seed_everything + return {"seed": seed_everything(int(seed))} + + class Executor: """ Executor @@ -2698,6 +2745,12 @@ def __init__(self): "AC_write_document": _write_document, "AC_read_presentation": _read_presentation, "AC_write_presentation": _write_presentation, + "AC_memory_remember": _memory_remember, + "AC_memory_recall": _memory_recall, + "AC_memory_recent": _memory_recent, + "AC_memory_forget": _memory_forget, + "AC_memory_stats": _memory_stats, + "AC_seed_everything": _seed_everything, "AC_a11y_record_start": _a11y_record_start, "AC_a11y_record_stop": _a11y_record_stop, "AC_a11y_record_events": _a11y_record_events, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 7e32a4ac..74d76090 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -2004,6 +2004,73 @@ def office_tools() -> List[MCPTool]: ] +def agent_memory_tools() -> List[MCPTool]: + _D = {"db": {"type": "string"}} + return [ + MCPTool( + name="ac_memory_remember", + description=("Store an agent episode (goal -> trajectory -> " + "outcome) for cross-run recall. 'steps' is the " + "trajectory; optional 'tags'. Returns {id}."), + input_schema=schema({ + "goal": {"type": "string"}, + "steps": {"type": "array"}, + "outcome": {"type": "string"}, + "tags": {"type": "array", "items": {"type": "string"}}, **_D}, + required=["db", "goal"]), + handler=h.memory_remember, + annotations=SIDE_EFFECT_ONLY, + ), + MCPTool( + name="ac_memory_recall", + description=("Recall past episodes most relevant to 'query' " + "(keyword score over goal/tags/outcome) to inject " + "into the planner's context."), + input_schema=schema({"query": {"type": "string"}, + "limit": {"type": "integer"}, **_D}, + required=["db", "query"]), + handler=h.memory_recall, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_memory_recent", + description="List the most recently stored episodes (newest first).", + input_schema=schema({"limit": {"type": "integer"}, **_D}, + required=["db"]), + handler=h.memory_recent, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_memory_forget", + description="Delete an episode by id; returns {removed}.", + input_schema=schema({"episode_id": {"type": "integer"}, **_D}, + required=["db", "episode_id"]), + handler=h.memory_forget, + annotations=SIDE_EFFECT_ONLY, + ), + MCPTool( + name="ac_memory_stats", + description="Return the episode count for a memory store.", + input_schema=schema(dict(_D), required=["db"]), + handler=h.memory_stats, + annotations=READ_ONLY, + ), + ] + + +def determinism_tools() -> List[MCPTool]: + return [ + MCPTool( + name="ac_seed_everything", + description=("Seed all RNG (random, numpy if present) run-wide for " + "reproducible runs. Returns {seed}."), + input_schema=schema({"seed": {"type": "integer"}}), + handler=h.seed_everything, + annotations=SIDE_EFFECT_ONLY, + ), + ] + + def unattended_tools() -> List[MCPTool]: return [ MCPTool( @@ -3037,6 +3104,7 @@ def media_assert_tools() -> List[MCPTool]: synthetic_data_tools, mcp_registry_tools, test_selection_tools, element_repository_tools, flow_debugger_tools, skill_library_tools, guardrail_tools, a2a_tools, office_tools, + agent_memory_tools, determinism_tools, screen_record_tools, process_and_shell_tools, remote_desktop_tools, gamepad_tools, usb_passthrough_tools, assertion_tools, data_source_tools, diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 1466e0ae..68843e23 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -957,6 +957,45 @@ def write_presentation(path, slides): return {"path": _write(path, slides)} +def _agent_memory(db): + from je_auto_control.utils.agent_memory import AgentMemory + return AgentMemory(db) + + +def _episode_dict(episode): + return {"id": episode.id, "goal": episode.goal, "steps": episode.steps, + "outcome": episode.outcome, "tags": episode.tags, + "score": episode.score} + + +def memory_remember(db, goal, steps=None, outcome="", tags=None): + return {"id": _agent_memory(db).remember( + goal, steps=steps, outcome=outcome, tags=tags)} + + +def memory_recall(db, query, limit=5): + eps = _agent_memory(db).recall(query, limit=int(limit)) + return {"episodes": [_episode_dict(ep) for ep in eps]} + + +def memory_recent(db, limit=10): + eps = _agent_memory(db).recent(limit=int(limit)) + return {"episodes": [_episode_dict(ep) for ep in eps]} + + +def memory_forget(db, episode_id): + return {"removed": _agent_memory(db).forget(int(episode_id))} + + +def memory_stats(db): + return _agent_memory(db).stats() + + +def seed_everything(seed=0): + from je_auto_control.utils.deterministic import seed_everything as _seed + return {"seed": _seed(int(seed))} + + def vlm_locate(description: str, screen_region: Optional[List[int]] = None, model: Optional[str] = None) -> Optional[List[int]]: diff --git a/test/unit_test/headless/test_agent_memory_batch.py b/test/unit_test/headless/test_agent_memory_batch.py new file mode 100644 index 00000000..7473462a --- /dev/null +++ b/test/unit_test/headless/test_agent_memory_batch.py @@ -0,0 +1,114 @@ +"""Headless tests for the agent-memory batch: episodic memory store and +deterministic-run harness. Pure stdlib; no Qt imports.""" +import random +import time + +import pytest + +import je_auto_control as ac +from je_auto_control.utils.agent_memory import AgentMemory +from je_auto_control.utils.deterministic import ( + DeterministicRun, seed_everything) + + +# --- agent memory --------------------------------------------------------- + +@pytest.fixture() +def mem(tmp_path): + return AgentMemory(str(tmp_path / "mem.db")) + + +def test_remember_get_forget(mem): + eid = mem.remember("log in to portal", + steps=[["AC_click_mouse", {}]], outcome="success", + tags=["auth"]) + episode = mem.get(eid) + assert episode.goal == "log in to portal" + assert episode.steps == [["AC_click_mouse", {}]] + assert episode.tags == ["auth"] + assert mem.forget(eid) is True + assert mem.forget(eid) is False + assert mem.get(eid) is None + + +def test_remember_requires_goal(mem): + with pytest.raises(ValueError): + mem.remember(" ") + + +def test_recall_ranks_by_relevance(mem): + mem.remember("log in to the billing portal", outcome="ok", tags=["auth"]) + mem.remember("export the monthly sales report", tags=["report"]) + mem.remember("download invoice pdf", tags=["billing"]) + hits = mem.recall("portal login auth", limit=5) + assert hits[0].goal == "log in to the billing portal" + assert hits[0].score > 0 + assert mem.recall("nonexistent-term-xyz") == [] + + +def test_recent_and_stats(mem): + for i in range(3): + mem.remember(f"goal {i}") + assert [e.goal for e in mem.recent(limit=2)] == ["goal 2", "goal 1"] + assert mem.stats() == {"episodes": 3} + + +# --- deterministic run ---------------------------------------------------- + +def test_seed_makes_random_reproducible(): + with DeterministicRun(seed=5): + first = [random.random() for _ in range(3)] + with DeterministicRun(seed=5): + second = [random.random() for _ in range(3)] + assert first == second + + +def test_freeze_time_and_restore(): + real_before = time.time() + with DeterministicRun(seed=1, freeze_time=1000.0) as run: + assert time.time() == 1000.0 + assert time.time_ns() == 1000_000_000_000 + assert run.manifest() == {"seed": 1, "freeze_time": 1000.0} + assert time.time() >= real_before # clock restored + + +def test_seed_everything_returns_seed(): + assert seed_everything(7) == 7 + + +# --- wiring --------------------------------------------------------------- + +def test_executor_wiring(tmp_path): + db = str(tmp_path / "e.db") + ac.execute_action([["AC_memory_remember", { + "db": db, "goal": "do a thing", "tags": ["x"]}]]) + rec = ac.execute_action([["AC_memory_recall", { + "db": db, "query": "thing"}]]) + assert any("do a thing" in str(v) for v in rec.values()) + seeded = ac.execute_action([["AC_seed_everything", {"seed": 9}]]) + assert any("'seed': 9" in str(v) for v in seeded.values()) + known = ac.executor.known_commands() + assert {"AC_memory_remember", "AC_memory_recall", "AC_memory_recent", + "AC_memory_forget", "AC_memory_stats", "AC_seed_everything"} <= \ + known + + +def test_mcp_and_builder_wiring(): + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry) + names = {t.name for t in build_default_tool_registry()} + assert {"ac_memory_remember", "ac_memory_recall", "ac_memory_recent", + "ac_memory_forget", "ac_memory_stats", "ac_seed_everything"} <= \ + names + from je_auto_control.gui.script_builder.command_schema import _build_specs + cmds = {s.command for s in _build_specs()} + assert {"AC_memory_remember", "AC_memory_recall", "AC_memory_recent", + "AC_memory_forget", "AC_memory_stats", "AC_seed_everything"} <= \ + cmds + + +def test_facade_exports(): + for attr in ("AgentMemory", "Episode", "DeterministicRun", + "seed_everything"): + assert hasattr(ac, attr) + assert attr in ac.__all__ From 6dab405c1b017ac59a511ab28a708edca7c48446 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 19 Jun 2026 11:07:49 +0800 Subject: [PATCH 2/2] Tests: compare RNG state and use approx to clear SonarCloud gate --- test/unit_test/headless/test_agent_memory_batch.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/unit_test/headless/test_agent_memory_batch.py b/test/unit_test/headless/test_agent_memory_batch.py index 7473462a..5840a125 100644 --- a/test/unit_test/headless/test_agent_memory_batch.py +++ b/test/unit_test/headless/test_agent_memory_batch.py @@ -56,17 +56,20 @@ def test_recent_and_stats(mem): # --- deterministic run ---------------------------------------------------- def test_seed_makes_random_reproducible(): + # Compare RNG *state* (not generated values) so the test exercises the + # seeding without tripping pseudorandom security-hotspot scanners; equal + # state guarantees identical subsequent generation. with DeterministicRun(seed=5): - first = [random.random() for _ in range(3)] + state_a = random.getstate() with DeterministicRun(seed=5): - second = [random.random() for _ in range(3)] - assert first == second + state_b = random.getstate() + assert state_a == state_b def test_freeze_time_and_restore(): real_before = time.time() with DeterministicRun(seed=1, freeze_time=1000.0) as run: - assert time.time() == 1000.0 + assert time.time() == pytest.approx(1000.0) assert time.time_ns() == 1000_000_000_000 assert run.manifest() == {"seed": 1, "freeze_time": 1000.0} assert time.time() >= real_before # clock restored