diff --git a/src/autoskillit/core/_type_protocols.py b/src/autoskillit/core/_type_protocols.py index 4f3e6aaa..8e8e60aa 100644 --- a/src/autoskillit/core/_type_protocols.py +++ b/src/autoskillit/core/_type_protocols.py @@ -182,6 +182,7 @@ async def run( add_dirs: Sequence[ValidatedAddDir] = (), timeout: float | None = None, stale_threshold: float | None = None, + idle_output_timeout: float | None = None, expected_output_patterns: Sequence[str] = (), write_behavior: WriteBehaviorSpec | None = None, completion_marker: str = "", diff --git a/src/autoskillit/execution/headless.py b/src/autoskillit/execution/headless.py index 15d311ee..4e506e34 100644 --- a/src/autoskillit/execution/headless.py +++ b/src/autoskillit/execution/headless.py @@ -953,6 +953,7 @@ async def run_headless_core( add_dirs: Sequence[ValidatedAddDir] = (), timeout: float | None = None, stale_threshold: float | None = None, + idle_output_timeout: float | None = None, expected_output_patterns: Sequence[str] = (), write_behavior: WriteBehaviorSpec | None = None, completion_marker: str = "", @@ -988,6 +989,12 @@ async def run_headless_core( effective_timeout = timeout if timeout is not None else cfg.timeout effective_stale = stale_threshold if stale_threshold is not None else cfg.stale_threshold + _raw_idle = ( + idle_output_timeout + if idle_output_timeout is not None + else float(cfg.idle_output_timeout) + ) + effective_idle: float | None = _raw_idle if _raw_idle > 0.0 else None logger.debug( "run_headless_core_entry", @@ -1023,7 +1030,7 @@ async def run_headless_core( stale_threshold=effective_stale, completion_drain_timeout=cfg.completion_drain_timeout, linux_tracing_config=linux_tracing_cfg, - idle_output_timeout=cfg.idle_output_timeout, + idle_output_timeout=effective_idle, max_suppression_seconds=cfg.max_suppression_seconds, ) finally: @@ -1181,6 +1188,7 @@ async def run( add_dirs: Sequence[ValidatedAddDir] = (), timeout: float | None = None, stale_threshold: float | None = None, + idle_output_timeout: float | None = None, expected_output_patterns: Sequence[str] = (), write_behavior: WriteBehaviorSpec | None = None, completion_marker: str = "", @@ -1199,6 +1207,7 @@ async def run( add_dirs=add_dirs, timeout=effective_timeout, stale_threshold=effective_stale, + idle_output_timeout=idle_output_timeout, expected_output_patterns=expected_output_patterns, write_behavior=write_behavior, completion_marker=completion_marker, diff --git a/src/autoskillit/recipe/io.py b/src/autoskillit/recipe/io.py index 78015aa8..9ea047c4 100644 --- a/src/autoskillit/recipe/io.py +++ b/src/autoskillit/recipe/io.py @@ -148,6 +148,7 @@ def find_recipe_by_name(name: str, project_dir: Path) -> RecipeInfo | None: "gate", "optional_context_refs", "stale_threshold", + "idle_output_timeout", } ) if _PARSE_STEP_HANDLED_FIELDS != frozenset(RecipeStep.__dataclass_fields__): @@ -254,6 +255,7 @@ def _parse_step(data: dict[str, Any]) -> RecipeStep: gate=data.get("gate"), optional_context_refs=data.get("optional_context_refs", []), stale_threshold=data.get("stale_threshold"), + idle_output_timeout=data.get("idle_output_timeout"), ) diff --git a/src/autoskillit/recipe/rules_tools.py b/src/autoskillit/recipe/rules_tools.py index c654c6d3..ab92a287 100644 --- a/src/autoskillit/recipe/rules_tools.py +++ b/src/autoskillit/recipe/rules_tools.py @@ -13,7 +13,15 @@ _TOOL_PARAMS: dict[str, frozenset[str]] = { # --- Execution tools --- "run_skill": frozenset( - {"skill_command", "cwd", "model", "step_name", "order_id", "stale_threshold"} + { + "skill_command", + "cwd", + "model", + "step_name", + "order_id", + "stale_threshold", + "idle_output_timeout", + } ), "run_cmd": frozenset({"cmd", "cwd", "timeout", "step_name"}), "run_python": frozenset({"callable", "args", "timeout"}), diff --git a/src/autoskillit/recipe/schema.py b/src/autoskillit/recipe/schema.py index 06ef4a89..e2a1b7a4 100644 --- a/src/autoskillit/recipe/schema.py +++ b/src/autoskillit/recipe/schema.py @@ -89,6 +89,7 @@ class RecipeStep: default_factory=list ) # Context variable names that may be referenced before they are captured (cyclic routes) stale_threshold: int | None = None # None means use global RunSkillConfig.stale_threshold + idle_output_timeout: int | None = None # None = use global cfg; 0 = disabled for this step @dataclass diff --git a/src/autoskillit/recipe/validator.py b/src/autoskillit/recipe/validator.py index ac6372a9..1821a3c1 100644 --- a/src/autoskillit/recipe/validator.py +++ b/src/autoskillit/recipe/validator.py @@ -161,6 +161,14 @@ def validate_recipe(recipe: Recipe) -> list[str]: f"when set, got {step.stale_threshold!r}" ) + if step.idle_output_timeout is not None and ( + not isinstance(step.idle_output_timeout, int) or step.idle_output_timeout < 0 + ): + errors.append( + f"Step {step_name!r}: 'idle_output_timeout' must be a non-negative integer " + f"when set (0 = disabled), got {step.idle_output_timeout!r}" + ) + if step.on_result is not None: if step.on_success is not None: errors.append( diff --git a/src/autoskillit/recipes/research.yaml b/src/autoskillit/recipes/research.yaml index 85157fae..c881e5de 100644 --- a/src/autoskillit/recipes/research.yaml +++ b/src/autoskillit/recipes/research.yaml @@ -293,6 +293,7 @@ steps: implement_phase: tool: run_skill stale_threshold: 2400 + idle_output_timeout: 0 with: skill_command: "/autoskillit:implement-experiment ${{ context.phase_plan_path }}" cwd: "${{ context.worktree_path }}" @@ -312,6 +313,7 @@ steps: troubleshoot_implement_failure: tool: run_skill stale_threshold: 2400 + idle_output_timeout: 0 with: skill_command: "/autoskillit:troubleshoot-experiment ${{ context.worktree_path }} implement_phase" cwd: "${{ inputs.source_dir }}" @@ -343,6 +345,7 @@ steps: run_experiment: tool: run_skill stale_threshold: 2400 + idle_output_timeout: 0 with: skill_command: "/autoskillit:run-experiment ${{ context.worktree_path }}" cwd: "${{ context.worktree_path }}" @@ -357,6 +360,7 @@ steps: adjust_experiment: tool: run_skill stale_threshold: 2400 + idle_output_timeout: 0 with: skill_command: "/autoskillit:run-experiment ${{ context.worktree_path }} --adjust" cwd: "${{ context.worktree_path }}" @@ -661,6 +665,7 @@ steps: re_run_experiment: tool: run_skill stale_threshold: 2400 + idle_output_timeout: 0 with: skill_command: "/autoskillit:run-experiment ${{ context.worktree_path }} --adjust" cwd: "${{ context.worktree_path }}" diff --git a/src/autoskillit/server/tools_execution.py b/src/autoskillit/server/tools_execution.py index 29752a69..38151079 100644 --- a/src/autoskillit/server/tools_execution.py +++ b/src/autoskillit/server/tools_execution.py @@ -151,6 +151,7 @@ async def run_skill( step_name: str = "", order_id: str = "", stale_threshold: int | None = None, + idle_output_timeout: int | None = None, ctx: Context = CurrentContext(), ) -> str: """Run a Claude Code headless session with a skill command. @@ -190,6 +191,9 @@ async def run_skill( stale_threshold: Override the staleness kill threshold in seconds. When set on a RecipeStep, the recipe orchestrator passes it here. None uses the global config default (RunSkillConfig.stale_threshold, default 1200s). + idle_output_timeout: Override the idle stdout kill threshold in seconds. + 0 = disabled for this step. None = use global config + (RunSkillConfig.idle_output_timeout, default 600s). """ if (headless := _require_not_headless("run_skill")) is not None: return headless @@ -296,6 +300,9 @@ async def run_skill( expected_output_patterns=expected_output_patterns, write_behavior=write_spec, stale_threshold=float(stale_threshold) if stale_threshold is not None else None, + idle_output_timeout=float(idle_output_timeout) + if idle_output_timeout is not None + else None, completion_marker=invocation_marker, ) if skill_result.success: diff --git a/tests/execution/test_headless.py b/tests/execution/test_headless.py index da8bf2cb..67bc5b77 100644 --- a/tests/execution/test_headless.py +++ b/tests/execution/test_headless.py @@ -3560,6 +3560,75 @@ def test_headless_executor_accepts_completion_marker(self) -> None: assert param.default == "" +class TestHeadlessExecutorIdleOutputTimeout: + """Protocol conformance and resolution logic for idle_output_timeout.""" + + def test_headless_executor_accepts_idle_output_timeout(self) -> None: + import inspect + + from autoskillit.execution.headless import DefaultHeadlessExecutor + + sig = inspect.signature(DefaultHeadlessExecutor.run) + assert "idle_output_timeout" in sig.parameters + param = sig.parameters["idle_output_timeout"] + assert param.default is None + + def _success_payload(self, marker: str) -> SubprocessResult: + payload = json.dumps( + { + "type": "result", + "subtype": "success", + "is_error": False, + "result": f"Done. {marker}", + "session_id": "sess-iot", + } + ) + return SubprocessResult(0, payload, "", TerminationReason.NATURAL_EXIT, pid=1) + + @pytest.mark.anyio + async def test_default_headless_executor_uses_per_step_idle_output_timeout( + self, tool_ctx + ) -> None: + """idle_output_timeout=120 is converted to float and passed to the runner.""" + from autoskillit.execution.headless import run_headless_core + + marker = tool_ctx.config.run_skill.completion_marker + tool_ctx.runner.push(self._success_payload(marker)) + await run_headless_core( + "/investigate foo", cwd="/tmp", ctx=tool_ctx, idle_output_timeout=120.0 + ) + _, _cwd, _timeout, kwargs = tool_ctx.runner.call_args_list[0] + assert kwargs["idle_output_timeout"] == 120.0 + + @pytest.mark.anyio + async def test_default_headless_executor_converts_zero_to_none(self, tool_ctx) -> None: + """idle_output_timeout=0 is converted to None (disabled) before passing to runner.""" + from autoskillit.execution.headless import run_headless_core + + marker = tool_ctx.config.run_skill.completion_marker + tool_ctx.runner.push(self._success_payload(marker)) + await run_headless_core( + "/investigate foo", cwd="/tmp", ctx=tool_ctx, idle_output_timeout=0.0 + ) + _, _cwd, _timeout, kwargs = tool_ctx.runner.call_args_list[0] + assert kwargs["idle_output_timeout"] is None + + @pytest.mark.anyio + async def test_default_headless_executor_falls_back_to_cfg_idle_output_timeout( + self, tool_ctx + ) -> None: + """idle_output_timeout=None falls back to float(cfg.idle_output_timeout).""" + from autoskillit.execution.headless import run_headless_core + + marker = tool_ctx.config.run_skill.completion_marker + tool_ctx.runner.push(self._success_payload(marker)) + await run_headless_core( + "/investigate foo", cwd="/tmp", ctx=tool_ctx, idle_output_timeout=None + ) + _, _cwd, _timeout, kwargs = tool_ctx.runner.call_args_list[0] + assert kwargs["idle_output_timeout"] == 600.0 + + def _ndjson_with_write(result_text: str, file_paths: list[str], session_id: str = "test-session"): """Build NDJSON stdout with Write tool_use entries and a result record.""" records = [] diff --git a/tests/pipeline/test_context.py b/tests/pipeline/test_context.py index eb0d9a30..e22675bc 100644 --- a/tests/pipeline/test_context.py +++ b/tests/pipeline/test_context.py @@ -158,6 +158,20 @@ def test_headless_executor_protocol_accepts_timeout() -> None: assert params["stale_threshold"].default is None +def test_headless_executor_protocol_accepts_idle_output_timeout() -> None: + """HeadlessExecutor.run() signature must include optional idle_output_timeout.""" + import inspect + + from autoskillit.core import HeadlessExecutor + + sig = inspect.signature(HeadlessExecutor.run) + params = sig.parameters + assert "idle_output_timeout" in params, ( + "HeadlessExecutor.run missing idle_output_timeout param" + ) + assert params["idle_output_timeout"].default is None + + def test_recipe_repository_protocol_has_rich_methods() -> None: """RecipeRepository protocol must expose load_and_validate, validate_from_path, list_all.""" from autoskillit.core import RecipeRepository diff --git a/tests/recipe/test_io.py b/tests/recipe/test_io.py index 6bd9cb77..7b852f5b 100644 --- a/tests/recipe/test_io.py +++ b/tests/recipe/test_io.py @@ -443,6 +443,21 @@ def test_parse_step_stale_threshold_defaults_to_none(self) -> None: step = _parse_step(data) assert step.stale_threshold is None + def test_parse_step_reads_idle_output_timeout(self) -> None: + data = {"tool": "run_skill", "idle_output_timeout": 120, "on_success": "done"} + step = _parse_step(data) + assert step.idle_output_timeout == 120 + + def test_parse_step_idle_output_timeout_defaults_to_none(self) -> None: + data = {"tool": "run_skill", "on_success": "done"} + step = _parse_step(data) + assert step.idle_output_timeout is None + + def test_parse_step_idle_output_timeout_zero_means_disabled(self) -> None: + data = {"tool": "run_skill", "idle_output_timeout": 0, "on_success": "done"} + step = _parse_step(data) + assert step.idle_output_timeout == 0 + # MOD4 def test_bundled_resolve_failures_steps_use_config_default(self) -> None: bd = builtin_recipes_dir() diff --git a/tests/recipe/test_validator.py b/tests/recipe/test_validator.py index 4af43bfa..aa321648 100644 --- a/tests/recipe/test_validator.py +++ b/tests/recipe/test_validator.py @@ -397,6 +397,37 @@ def test_validator_accepts_positive_stale_threshold(self) -> None: errors = validate_recipe(recipe) assert not any("stale_threshold" in e for e in errors) + def test_validator_rejects_negative_idle_output_timeout(self) -> None: + recipe = Recipe( + name="test", + description="test", + steps={"s": RecipeStep(tool="run_skill", on_success="done", idle_output_timeout=-1)}, + kitchen_rules=["test"], + ) + errors = validate_recipe(recipe) + assert any("idle_output_timeout" in e for e in errors) + + def test_validator_accepts_zero_idle_output_timeout(self) -> None: + # 0 = disabled, must NOT be rejected + recipe = Recipe( + name="test", + description="test", + steps={"s": RecipeStep(tool="run_skill", on_success="done", idle_output_timeout=0)}, + kitchen_rules=["test"], + ) + errors = validate_recipe(recipe) + assert not any("idle_output_timeout" in e for e in errors) + + def test_validator_accepts_positive_idle_output_timeout(self) -> None: + recipe = Recipe( + name="test", + description="test", + steps={"s": RecipeStep(tool="run_skill", on_success="done", idle_output_timeout=120)}, + kitchen_rules=["test"], + ) + errors = validate_recipe(recipe) + assert not any("idle_output_timeout" in e for e in errors) + # --------------------------------------------------------------------------- # TestDataFlowQuality — migrated from test_recipe_parser.py diff --git a/tests/server/test_tools_execution.py b/tests/server/test_tools_execution.py index 0dae3dcd..8d9cef9a 100644 --- a/tests/server/test_tools_execution.py +++ b/tests/server/test_tools_execution.py @@ -796,6 +796,7 @@ async def run( order_id: str = "", timeout: float | None = None, stale_threshold: float | None = None, + idle_output_timeout: float | None = None, expected_output_patterns: tuple[str, ...] | list[str] = (), write_behavior=None, completion_marker: str = "", @@ -843,6 +844,7 @@ async def run( order_id: str = "", timeout: float | None = None, stale_threshold: float | None = None, + idle_output_timeout: float | None = None, expected_output_patterns: tuple[str, ...] | list[str] = (), write_behavior=None, completion_marker: str = "", @@ -1441,3 +1443,56 @@ async def run(self, skill_command, cwd, *, add_dirs=(), **kwargs) -> SkillResult assert "mermaid" in closure arch_members = {n for n in closure if n.startswith("arch-lens-")} assert len(arch_members) >= 1 + + +def _make_capturing_executor(): + """Return (executor, captured_dict) for testing idle_output_timeout propagation.""" + from autoskillit.core import SkillResult + + captured: dict = {} + + class MockExecutor: + async def run( + self, skill_command, cwd, *, idle_output_timeout=None, **kwargs + ) -> SkillResult: + captured["idle_output_timeout"] = idle_output_timeout + return SkillResult( + success=True, + result="ok", + session_id="", + subtype="success", + is_error=False, + exit_code=0, + needs_retry=False, + retry_reason="none", + stderr="", + token_usage=None, + ) + + return MockExecutor(), captured + + +@pytest.mark.anyio +async def test_run_skill_passes_idle_output_timeout(tool_ctx, monkeypatch) -> None: + """run_skill passes idle_output_timeout (as float) to executor.run().""" + executor, captured = _make_capturing_executor() + tool_ctx.executor = executor + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx) + + from autoskillit.server.tools_execution import run_skill + + await run_skill("/test skill", "/tmp", idle_output_timeout=120) + assert captured["idle_output_timeout"] == 120.0 # int→float conversion + + +@pytest.mark.anyio +async def test_run_skill_idle_output_timeout_defaults_to_none(tool_ctx, monkeypatch) -> None: + """run_skill passes None to executor.run() when idle_output_timeout is not set.""" + executor, captured = _make_capturing_executor() + tool_ctx.executor = executor + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx) + + from autoskillit.server.tools_execution import run_skill + + await run_skill("/test skill", "/tmp") + assert captured["idle_output_timeout"] is None