From 36bf260b566463e99a9a4633f14865094fbd1828 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Fri, 12 Jun 2026 04:35:32 +0800 Subject: [PATCH 1/5] fix: audit P0-P3 all fixes + architecture hardening + 31 skins Audit fixes (AUDIT-2026-06-12.md): - P0: C1 config_dir resolution, C4 directory artifact handling, C5 JSON UTF-8, C6 version prefix - P1: H1 MCP dual-path lookup, H2 check_version path, H3 LiteLLM suppress, H4 Prefect cleanup, H5 KB embed degraded flag - P2: M1 sys.path.insert eliminated, M2 loguru bridge, M5 config dual-source, M6 Docker vars, M7 JSON dedup, M8 dynamic paths, M9 locale-aware workspace, M10 LLM env override - P3: L1 CLI loop registration, L2 .env.minimal.example, L3 upload exts, L4 Agent placeholders, L5 Docker tags, L7 /health/deep Architecture: - pyproject.toml includes utils* package - utils/__init__.py: InterceptHandler for loguru bridge - REPL prefix / to ! (Claude Code conflict) - CLI+REPL unified: slash_commands.py single source - Fixed /model /history !insights runtime bugs - deploy/conftest.py: dynamic PROJECT_NAME paths Skins: 22 animal/creature skins with kaomoji + block art + per-skin animation --- .github/workflows/ci.yml | 2 + ...13\350\257\225\344\270\273\347\256\241.md" | 8 +- deploy/config/.env.minimal.example | 14 + deploy/config/conftest.py | 8 + deploy/config/pytest.ini | 6 +- install.py | 135 ++++-- runtime/__init__.py | 2 +- runtime/api/deps.py | 19 +- runtime/api/main.py | 59 ++- runtime/cli/commands/run.py | 2 +- runtime/cli/commands/slash_handlers.py | 135 +++--- runtime/cli/completer.py | 12 +- runtime/cli/insights.py | 2 + runtime/cli/interactive.py | 144 +++--- runtime/cli/main.py | 49 +- runtime/cli/skins.py | 419 ++++++++++++++++++ runtime/cli/slash_commands.py | 95 ++-- runtime/config/settings.py | 17 +- runtime/docker-compose.yml | 18 +- runtime/gateway/platforms/dingtalk.py | 47 +- runtime/gateway/platforms/qqbot.py | 37 +- runtime/gateway/platforms/telegram.py | 6 +- runtime/mcp/client.py | 4 +- runtime/mcp/knowledge_base/server.py | 6 +- runtime/orchestrator/agents/base.py | 8 +- runtime/pyproject.toml | 3 +- runtime/router/llm_client.py | 31 +- runtime/tests/conftest.py | 34 +- runtime/tests/test_completer.py | 24 +- runtime/tests/test_utils_absentee.py | 44 +- runtime/tests/test_utils_bug_tracker.py | 26 +- runtime/tests/test_utils_evidence_chain.py | 3 +- runtime/tests/test_utils_fairness.py | 44 +- runtime/tests/test_utils_i18n_taboo.py | 6 +- runtime/tests/test_utils_quality_gate.py | 43 +- runtime/tests/test_utils_silent_failure.py | 46 +- runtime/tests/test_utils_taboo_matrix.py | 6 +- runtime/tutor/i18n.py | 17 +- utils/__init__.py | 29 ++ utils/a11y_i18n/a11y_scanner.py | 1 + utils/a11y_i18n/i18n_checker.py | 5 - utils/data/data_factory.py | 1 + utils/data/db_test_helper.py | 1 + utils/design/prd_loader.py | 5 +- utils/design/suite_minimizer.py | 1 + utils/paths.py | 24 +- utils/performance/chaos_helper.py | 1 + utils/quality/flaky_detector.py | 4 +- utils/reporting/evidence_chain.py | 4 +- utils/reporting/generate_report.py | 4 +- utils/security/api_security_scanner.py | 1 + utils/testing/bdd_runner.py | 1 + utils/testing/soak_runner.py | 4 +- utils/testing/state_machine_tester.py | 1 + utils/trackers/github_bug_manager.py | 4 +- utils/trackers/zentao_bug_manager.py | 5 +- 56 files changed, 1171 insertions(+), 506 deletions(-) create mode 100644 deploy/config/.env.minimal.example diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b08199bc..53cf7a44 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -418,6 +418,8 @@ jobs: python -m pytest runtime/tests/ \ --ignore=runtime/tests/test_router_real.py \ --ignore=runtime/tests/test_smoke_e2e.py \ + --cov=runtime --cov=utils \ + --cov-report=term --cov-report=html \ -p no:cacheprovider \ -v --tb=short echo "✅ pytest 单元测试全过" diff --git "a/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" "b/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" index 21da6491..f97ca1e7 100644 --- "a/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" +++ "b/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" @@ -540,10 +540,10 @@ report = slo_report({ 【质量评分】XX/100(加权得分) 【用例执行】 - 总计:XXX - 通过:XXX(XX%) - 失败:XXX(XX%) - 跳过:XXX + 总计:{total_cases} + 通过:{passed_cases}({pass_rate}%) + 失败:{failed_cases}({fail_rate}%) + 跳过:{skipped_cases} 【Bug统计】 P0:X个(已全部修复:是/否) diff --git a/deploy/config/.env.minimal.example b/deploy/config/.env.minimal.example new file mode 100644 index 00000000..8ad4d8c5 --- /dev/null +++ b/deploy/config/.env.minimal.example @@ -0,0 +1,14 @@ +# Test-Agent minimal config — copy to .env and fill in values. +# See .env.example for full reference (318 lines, all options). + +# ── Required: LLM ── +TAGENT_LLM_PROVIDER=claude +TAGENT_LLM_API_KEY=sk-your-key-here + +# ── Optional: paths ── +# TAGENT_LANG=zh # zh / en / zh-en (default: auto-detect) +# TAGENT_OUTPUT_DIR=workspace/test-reports + +# ── Optional: persistence ── +# TAGENT_DB_URL=sqlite:///workspace/tagent.db +# TAGENT_MINIO_ENDPOINT=localhost:9000 diff --git a/deploy/config/conftest.py b/deploy/config/conftest.py index 1bd5af34..0739e0a0 100644 --- a/deploy/config/conftest.py +++ b/deploy/config/conftest.py @@ -295,6 +295,14 @@ def pytest_configure(config): for d in workflow_dirs: Path(d).mkdir(parents=True, exist_ok=True) + # 动态设置 allure / junit 输出路径 → workspace/测试报告/{项目名}/ + _output_base = get_output_dir("", current_run_id()) + _allure_dir = _output_base / "allure-results" + _junit_file = _output_base / "junit-results.xml" + _allure_dir.mkdir(parents=True, exist_ok=True) + config.option.allure_report_dir = str(_allure_dir) + config.option.xmlpath = str(_junit_file) + # 标记已初始化 sentinel.parent.mkdir(parents=True, exist_ok=True) sentinel.touch() diff --git a/deploy/config/pytest.ini b/deploy/config/pytest.ini index 8bcb2c36..4d357e87 100644 --- a/deploy/config/pytest.ini +++ b/deploy/config/pytest.ini @@ -84,8 +84,8 @@ addopts = --tb=short --strict-markers -p no:warnings - --alluredir=workspace/测试报告/{项目名}/allure-results - --junitxml=workspace/测试报告/{项目名}/junit-results.xml + # alluredir / junitxml set dynamically in conftest.py via get_project_name() + # set PROJECT_NAME env var or edit conftest.py to customize # JUnit XML 格式:xunit2(兼容现代解析器) junit_family = xunit2 @@ -101,7 +101,7 @@ junit_family = xunit2 log_cli = true log_cli_level = INFO log_cli_format = %(asctime)s [%(levelname)s] %(name)s: %(message)s -log_file = workspace/测试报告/{项目名}/pytest.log +log_file = workspace/测试报告/default/pytest.log log_file_level = DEBUG log_file_format = %(asctime)s [%(levelname)s] %(name)s: %(message)s diff --git a/install.py b/install.py index eb26b982..55cbf672 100644 --- a/install.py +++ b/install.py @@ -15,6 +15,12 @@ python install.py --update # 轻量更新当前目录 python install.py /path/to/project --update # 轻量更新指定目录 +开发者本地部署(无需 GitHub): + 将 install.py 放在源码仓库根目录(与 ai/ runtime/ utils/ 同级),直接运行即可: + python install.py /path/to/deploy-target + 脚本自动检测到 ai/agents/ + runtime/ + utils/ 后跳过 git clone, + 直接从本地源码拷贝到目标目录。 + 安全提示:不要 pipe-to-python。先下载再审查后执行: curl -fsSL -o install.py https://raw.githubusercontent.com/Wool-xing/Test-Agent/main/install.py python install.py /path/to/your-test-project @@ -22,7 +28,7 @@ 环境变量(可选): TEST_AGENT_REPO_URL 仓库 URL TEST_AGENT_REPO_BRANCH 分支名(默认 main) - TEST_AGENT_LOCAL_SRC CI 用:本地源码路径,跳过 git clone + TEST_AGENT_LOCAL_SRC 显式指定本地源码路径(自动检测失败时兜底) TEST_AGENT_NO_CN_MIRROR 设为 1 跳过清华 PyPI 镜像 """ @@ -85,6 +91,13 @@ def _parse_args(): REPO_URL = os.environ.get("TEST_AGENT_REPO_URL", "https://github.com/Wool-xing/Test-Agent.git") REPO_BRANCH = os.environ.get("TEST_AGENT_REPO_BRANCH", "main") +# 源码根目录标记(用于自动检测本地开发环境) +_SOURCE_MARKERS = [ + os.path.join("ai", "agents"), + "runtime", + "utils", +] + PRESERVE_FILES = [ ".env", "quality_gates.yaml", @@ -235,6 +248,32 @@ def _print_manual_hint(missing): print("→ 安装完成后重新运行: python install.py") +def _detect_source_dir(): + """检测本地源码目录。 + + 优先级: + 1. TEST_AGENT_LOCAL_SRC 环境变量(显式覆盖) + 2. 自动检测:install.py 所在目录是否包含源码标记(ai/agents/, runtime/, utils/) + + Returns: + (source_dir, is_local) — is_local=True 时可直接用源码,无需 clone。 + """ + # 显式覆盖 + local_src = os.environ.get("TEST_AGENT_LOCAL_SRC") + if local_src: + if not os.path.isdir(local_src): + print(f"❌ TEST_AGENT_LOCAL_SRC 指向的目录不存在: {local_src}") + sys.exit(1) + return os.path.abspath(local_src), True + + # 自动检测:install.py 所在目录是否就是源码仓库根目录 + script_dir = os.path.dirname(os.path.abspath(__file__)) + for marker in _SOURCE_MARKERS: + if not os.path.isdir(os.path.join(script_dir, marker)): + return None, False + return script_dir, True + + def find_python(): """跨平台检测 Python 3,排除 MS Store stub。""" candidates = ["python3", "python", "py"] @@ -706,7 +745,7 @@ def _update_deps(project_root): def do_update(): - """轻量更新:克隆最新模板 → 比较版本 → 拷贝文件 → 更新依赖 → 保留用户数据。""" + """轻量更新:获取最新模板 → 比较版本 → 拷贝文件 → 更新依赖 → 保留用户数据。""" version_file = os.path.join(PROJECT_ROOT, "VERSION") legacy_file = os.path.join(PROJECT_ROOT, ".version") # Migration: rename legacy .version to VERSION if VERSION is missing @@ -725,21 +764,23 @@ def do_update(): print(f"→ 当前版本: {local_version}") - template_dir_parent = tempfile.mkdtemp() - template_dir = os.path.join(template_dir_parent, "Test-Agent") + # 检测源码来源 + source_dir, is_local = _detect_source_dir() - try: - local_src = os.environ.get("TEST_AGENT_LOCAL_SRC") - if local_src: - print(f"→ [dev mode] 复制本地源代码: {local_src} → {template_dir}") - shutil.copytree(local_src, template_dir) - else: - print("→ 检查更新...") - subprocess.run( - ["git", "clone", "--depth", "1", "--branch", REPO_BRANCH, REPO_URL, template_dir], - check=True, - ) + if is_local: + template_dir = source_dir + template_dir_parent = None + print(f"→ [本地源码] 从 {source_dir} 部署更新") + else: + template_dir_parent = tempfile.mkdtemp() + template_dir = os.path.join(template_dir_parent, "Test-Agent") + print("→ 检查更新...") + subprocess.run( + ["git", "clone", "--depth", "1", "--branch", REPO_BRANCH, REPO_URL, template_dir], + check=True, + ) + try: remote_version = _read_template_version(template_dir) if remote_version is None: print("❌ 无法读取远程版本信息") @@ -822,10 +863,8 @@ def do_update(): print("=" * 50) finally: - if os.path.isdir(template_dir_parent): + if template_dir_parent is not None and os.path.isdir(template_dir_parent): shutil.rmtree(template_dir_parent, onerror=_rmtree_onerror) - # cleanup backup tmp if any leftover (restore_user_data usually handles this) - # handled in finally block of main, but do_update has its own finally def main(): @@ -843,34 +882,34 @@ def main(): # 2. 幂等备份 backed = backup_user_data(PROJECT_ROOT) - template_dir_parent = tempfile.mkdtemp() - template_dir = os.path.join(template_dir_parent, "Test-Agent") + # 3. 获取模板来源(本地源码自动检测 → 跳过 clone 和临时目录) + source_dir, is_local = _detect_source_dir() - try: - # 3. 获取模板 - local_src = os.environ.get("TEST_AGENT_LOCAL_SRC") - if local_src: - print(f"→ [dev mode] 复制本地源代码: {local_src} → {template_dir}") - shutil.copytree(local_src, template_dir) - else: - print(f"→ 从 GitHub 克隆模板...") - print(f" {REPO_URL} ({REPO_BRANCH})") - try: - subprocess.run( - ["git", "clone", "--depth", "1", "--branch", REPO_BRANCH, REPO_URL, template_dir], - check=True, timeout=120, - ) - except subprocess.TimeoutExpired: - print("❌ Git 克隆超时(>120 秒),请检查网络或使用本地模式:") - print(f" set TEST_AGENT_LOCAL_SRC={os.getcwd()}") - print(f" python install.py <目标目录>") - sys.exit(1) - except subprocess.CalledProcessError as e: - print(f"❌ Git 克隆失败: {e}") - print(f" 仓库: {REPO_URL}") - print(f" 可以尝试本地模式:set TEST_AGENT_LOCAL_SRC={os.getcwd()}") - sys.exit(1) + if is_local: + template_dir = source_dir + template_dir_parent = None + print(f"→ [本地源码] 从 {source_dir} 部署") + else: + template_dir_parent = tempfile.mkdtemp() + template_dir = os.path.join(template_dir_parent, "Test-Agent") + print(f"→ 从 GitHub 克隆模板...") + print(f" {REPO_URL} ({REPO_BRANCH})") + try: + subprocess.run( + ["git", "clone", "--depth", "1", "--branch", REPO_BRANCH, REPO_URL, template_dir], + check=True, timeout=120, + ) + except subprocess.TimeoutExpired: + print("❌ Git 克隆超时(>120 秒),请检查网络或使用本地模式:") + print(f" 直接将 install.py 放到源码仓库根目录运行即可自动识别") + sys.exit(1) + except subprocess.CalledProcessError as e: + print(f"❌ Git 克隆失败: {e}") + print(f" 仓库: {REPO_URL}") + print(f" 可将 install.py 放到源码仓库根目录运行,自动识别本地源码") + sys.exit(1) + try: # 4. 安装 Claude Code if shutil.which("claude") is None: print("→ 安装 Claude Code...") @@ -895,10 +934,10 @@ def main(): # 8. 创建 tagent.bat / tagent 包装脚本 _create_wrappers(PROJECT_ROOT) - # 10. 恢复用户数据 + # 9. 恢复用户数据 restore_user_data(PROJECT_ROOT, backed) - # 11. 写入 VERSION 供后续更新检测 + # 10. 写入 VERSION 供后续更新检测 version = _read_template_version(template_dir) if version: _write_local_version(PROJECT_ROOT, version) @@ -911,8 +950,8 @@ def main(): traceback.print_exc() finally: - # 清理临时目录 - if os.path.isdir(template_dir_parent): + # 仅清理远程 clone 的临时目录(本地源码不删) + if template_dir_parent is not None and os.path.isdir(template_dir_parent): shutil.rmtree(template_dir_parent, onerror=_rmtree_onerror) tmp = backed.pop("__tmp__", None) if tmp and os.path.isdir(tmp): diff --git a/runtime/__init__.py b/runtime/__init__.py index e0d469a3..183e55f4 100644 --- a/runtime/__init__.py +++ b/runtime/__init__.py @@ -11,7 +11,7 @@ def _read_version() -> str: """从项目根 VERSION 文件读取版本号,单点 source of truth。""" vf = __Path(__file__).resolve().parents[1] / "VERSION" if vf.is_file(): - return vf.read_text(encoding="utf-8").strip() + return vf.read_text(encoding="utf-8").strip().lstrip("Vv") return "0.0.0" diff --git a/runtime/api/deps.py b/runtime/api/deps.py index 3b55bf1b..463041fe 100644 --- a/runtime/api/deps.py +++ b/runtime/api/deps.py @@ -39,11 +39,20 @@ def submit(self, artifact: TargetArtifact, *, persist: bool = True) -> tuple[str # V1.14 主宪章 §40 — 把原始 artifact 文本注入每节点 inputs,让 AgentRunner 拿得到 full_text = artifact.text or "" if not full_text and artifact.path: - try: - full_text = Path(artifact.path).read_text(encoding="utf-8", errors="replace") - except OSError as e: - logger.warning("cannot read artifact {}: {}", artifact.path, e) - full_text = f"[READ_ERROR: {artifact.path}]" + p = Path(artifact.path) + if p.is_dir(): + try: + items = sorted(f.name for f in sorted(p.iterdir())[:200]) + full_text = f"[DIRECTORY: {artifact.path}]\n" + "\n".join(items) + except OSError as e: + logger.warning("cannot list directory {}: {}", artifact.path, e) + full_text = f"[DIRECTORY_ERROR: {artifact.path}]" + else: + try: + full_text = p.read_text(encoding="utf-8", errors="replace") + except OSError as e: + logger.warning("cannot read artifact {}: {}", artifact.path, e) + full_text = f"[READ_ERROR: {artifact.path}]" for node in decision.dag: if "artifact_text" not in node.inputs: node.inputs["artifact_text"] = full_text[:20_000] diff --git a/runtime/api/main.py b/runtime/api/main.py index 70b46ab1..0128bc09 100644 --- a/runtime/api/main.py +++ b/runtime/api/main.py @@ -10,7 +10,7 @@ from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, Request, UploadFile from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse as _JSONResponse from loguru import logger from runtime import __version__ @@ -26,9 +26,34 @@ from runtime.config.settings import get_settings from runtime.observability.prometheus_metrics import create_metrics_router +import os as _os + +_DEFAULT_UPLOAD_EXTS: set[str] = { + ".md", ".txt", ".pdf", ".docx", ".xlsx", ".zip", + ".png", ".jpg", ".jpeg", ".html", ".json", ".yml", ".yaml", + ".py", ".js", ".ts", ".apk", ".ipa", +} + + +def _allowed_upload_exts() -> set[str]: + custom = _os.getenv("TAGENT_ALLOWED_UPLOAD_EXTS") + return set(custom.split(",")) if custom else _DEFAULT_UPLOAD_EXTS + + +class JSONResponse(_JSONResponse): + """JSONResponse that writes raw UTF-8 bytes — CJK chars unescaped.""" + + def render(self, content: Any) -> bytes: + import json as _json + + return _json.dumps( + content, ensure_ascii=False, allow_nan=False, separators=(",", ":") + ).encode("utf-8") + + _settings = get_settings() -app = FastAPI(title="Test-Agent Runtime", version=__version__) +app = FastAPI(title="Test-Agent Runtime", version=__version__, default_response_class=JSONResponse) app.add_middleware( CORSMiddleware, allow_origins=["tauri://localhost"], @@ -51,7 +76,7 @@ @app.middleware("http") async def auth_middleware(request: Request, call_next: Any) -> Any: token = _settings.api_auth_token - if token and request.url.path not in ("/health", "/docs", "/openapi.json"): + if token and request.url.path not in ("/health", "/health/deep", "/docs", "/openapi.json"): auth = request.headers.get("Authorization", "") if not auth or not secrets.compare_digest(auth.removeprefix("Bearer "), token): return JSONResponse(status_code=401, content={"detail": "unauthorized"}) @@ -67,6 +92,32 @@ def health() -> dict: return {"status": "ok", "version": __version__} +@app.get("/health/deep") +def health_deep() -> dict: + from runtime.cli.doctor import ( + check_catalog, + check_config, + check_dependencies, + check_environment, + check_llm, + check_workspace, + ) + + sections: dict[str, list[dict]] = {} + for name, fn in [ + ("environment", check_environment), + ("catalog", check_catalog), + ("config", check_config), + ("dependencies", check_dependencies), + ("llm", check_llm), + ("workspace", check_workspace), + ]: + sections[name] = fn() + + all_ok = all(ch["ok"] for s in sections.values() for ch in s) + return {"status": "ok" if all_ok else "degraded", "version": __version__, "checks": sections} + + @app.get("/catalog", response_model=CatalogResponse) def catalog() -> CatalogResponse: data = _kernel.catalog() @@ -100,7 +151,7 @@ def run_text(payload: RunCreateText, bg: BackgroundTasks, mode: str = "exec", la @app.post("/run/file", response_model=RunCreated) async def run_file(file: UploadFile = File(..., max_length=50_000_000), bg: BackgroundTasks = None, extra: str = Form("")) -> RunCreated: # type: ignore[assignment] # noqa: B008 suffix = Path(file.filename or "upload").suffix.lower() - allowed = {".md", ".txt", ".pdf", ".docx", ".xlsx", ".zip", ".png", ".jpg", ".jpeg", ".html", ".json", ".yml", ".yaml", ".py", ".js", ".ts", ".apk", ".ipa"} + allowed = _allowed_upload_exts() if suffix not in allowed: raise HTTPException(status_code=400, detail=f"file type not supported: {suffix}") with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: diff --git a/runtime/cli/commands/run.py b/runtime/cli/commands/run.py index f01c04df..3c61dbe3 100644 --- a/runtime/cli/commands/run.py +++ b/runtime/cli/commands/run.py @@ -12,7 +12,7 @@ from runtime.tutor.verbosity import Mode, set_mode -def register_run(app: typer.Typer) -> None: +def register(app: typer.Typer) -> None: @app.command() def run( target: str = typer.Argument(..., help="path / url / free-form text"), diff --git a/runtime/cli/commands/slash_handlers.py b/runtime/cli/commands/slash_handlers.py index 4f2c157b..1256682f 100644 --- a/runtime/cli/commands/slash_handlers.py +++ b/runtime/cli/commands/slash_handlers.py @@ -3,10 +3,11 @@ import os, sys, time from pathlib import Path from runtime.cli._shared import console +from runtime.cli.completer import _PROVIDERS from runtime.cli.conversation import ConversationMemory _SESSION_FILE = Path(__file__).resolve().parents[2] / "workspace" / "gateway" / "active_session.json" _SESSION_DIR = _SESSION_FILE.parent -_cmd_history = [] +_command_history_list = [] _last_fix = None _last_trace = None _start_time = 0.0 @@ -67,7 +68,7 @@ def _do_quit() -> None: raise SystemExit(0) -# ── /status ─────────────────────────────────────────────────────── +# ── !status ─────────────────────────────────────────────────────── def _cmd_status(args: str) -> None: @@ -100,7 +101,7 @@ def _cmd_status(args: str) -> None: console.print("[dim]No conversation yet.[/]") -# ── /model ──────────────────────────────────────────────────────── +# ── !model ──────────────────────────────────────────────────────── def _cmd_model(args: str) -> None: @@ -124,7 +125,7 @@ def _cmd_model(args: str) -> None: marker = " [bold green]← current[/]" if p == current else "" detail = models.get(p, "") console.print(f" [cyan]{p}[/]{marker} [dim]{detail}[/]") - console.print("\n[dim]Usage: /model [model] e.g. /model deepseek deepseek-chat[/]") + console.print("\n[dim]Usage: !model [model] e.g. !model deepseek deepseek-chat[/]") return # Check if user typed a model name instead of provider @@ -132,12 +133,12 @@ def _cmd_model(args: str) -> None: # Try fuzzy match against known models → providers for p in _PROVIDERS: if name.startswith(p) or p.startswith(name): - console.print(f"[yellow]'{name}' is a model name. Did you mean [cyan]/model {p}[/]?[/]") + console.print(f"[yellow]'{name}' is a model name. Did you mean [cyan]!model {p}[/]?[/]") break else: console.print(f"[red]Unknown provider: {name}[/]") console.print(f"[dim]Available: {', '.join(_PROVIDERS)}[/]") - console.print("[dim]Tip: provider first, then model — e.g. [cyan]/model deepseek deepseek-chat[/][/]") + console.print("[dim]Tip: provider first, then model — e.g. [cyan]!model deepseek deepseek-chat[/][/]") return os.environ["TAGENT_LLM_PROVIDER"] = name @@ -157,11 +158,11 @@ def _cmd_model(args: str) -> None: console.print(f"[green]Switched[/] → provider: [cyan]{name}[/] model: [cyan]{_current_model()}[/]") -# ── /cache — LLM response cache stats/clear ───────────────────────── +# ── !cache — LLM response cache stats/clear ───────────────────────── def _cmd_cache(args: str) -> None: - """Show or clear the LLM response cache. Usage: /cache [clear].""" + """Show or clear the LLM response cache. Usage: !cache [clear].""" from runtime.router.llm_cache import cache_stats, clear_cache if args.strip() == "clear": n = clear_cache() @@ -170,7 +171,7 @@ def _cmd_cache(args: str) -> None: stats = cache_stats() console.print(f"[bold]LLM Cache:[/] {stats['entries']} entries, {stats['size_kb']} KB, TTL={stats['ttl_hours']}h") if stats["entries"] > 0: - console.print("[dim]Use /cache clear to flush.[/]") + console.print("[dim]Use !cache clear to flush.[/]") # ── /! — command history ──────────────────────────────────────────── @@ -178,11 +179,11 @@ def _cmd_cache(args: str) -> None: def _rerun_history(index: int) -> None: """Re-run a command from history by index (0=most recent).""" - if not _cmd_history: + if not _command_history_list: console.print("[dim]No commands in history.[/]") return try: - cmd = list(reversed(_cmd_history))[index] + cmd = list(reversed(_command_history_list))[index] console.print(f"[dim]Re-running: [cyan]{cmd[:80]}{'...' if len(cmd) > 80 else ''}[/][/]") _handle_natural_language(cmd) except IndexError: @@ -191,16 +192,16 @@ def _rerun_history(index: int) -> None: def _cmd_history(args: str) -> None: """Show recent command history. Use /1..9 to re-run.""" - if not _cmd_history: + if not _command_history_list: console.print("[dim]No commands in history yet.[/]") return - for i, cmd in enumerate(reversed(_cmd_history[-9:]), 1): + for i, cmd in enumerate(reversed(_command_history_list[-9:]), 1): preview = cmd[:100] + ("..." if len(cmd) > 100 else "") console.print(f" [cyan]/{i}[/] {preview}") - console.print(f"[dim]Run /1 (most recent) through /{min(9, len(_cmd_history))} to re-execute.[/]") + console.print(f"[dim]Run /1 (most recent) through /{min(9, len(_command_history_list))} to re-execute.[/]") -# ── /fc — fix last command typo (thefuck-style) ──────────────────── +# ── !fc — fix last command typo (thefuck-style) ──────────────────── def _cmd_fc(args: str) -> None: @@ -219,7 +220,7 @@ def _cmd_fc(args: str) -> None: def _cmd_ready(args: str) -> None: - """Multi-dimensional release readiness check. Usage: /ready [--fast].""" + """Multi-dimensional release readiness check. Usage: !ready [--fast].""" from runtime.cli.readiness import run_readiness from rich.table import Table @@ -282,7 +283,7 @@ def _cmd_hook(args: str) -> None: if action == "list" or not action: hooks = list_hooks() if not hooks: - console.print("[dim]No hooks registered. Use /hook add or /hook prebuilt[/]") + console.print("[dim]No hooks registered. Use !hook add or /hook prebuilt[/]") return table = Table(title=f"Hooks · {len(hooks)}", show_header=True) table.add_column("ID", style="dim") @@ -300,7 +301,7 @@ def _cmd_hook(args: str) -> None: elif action == "add": sub_parts = rest.strip().split(maxsplit=1) if len(sub_parts) < 2: - console.print("[dim]Usage: /hook add [/]") + console.print("[dim]Usage: !hook add [/]") console.print("[dim]Phases: before | after | on_error[/]") console.print("[dim]Example: /hook add after 'echo done'[/]") return @@ -322,7 +323,7 @@ def _cmd_hook(args: str) -> None: elif action == "remove": hid = rest.strip() if not hid: - console.print("[dim]Usage: /hook remove [/]") + console.print("[dim]Usage: !hook remove [/]") return if remove_hook(hid): console.print(f"[green]Hook #{hid} removed[/]") @@ -334,11 +335,11 @@ def _cmd_hook(args: str) -> None: console.print(f"[green]{n} hooks activated[/]") -# ── /skin — switch CLI theme ──────────────────────────────────────── +# ── !skin — switch CLI theme ──────────────────────────────────────── def _cmd_skin(args: str) -> None: - """Switch CLI skin/theme. Usage: /skin [name]. No args lists available.""" + """Switch CLI skin/theme. Usage: !skin [name]. No args lists available.""" from runtime.cli.skins import list_skins, set_skin, get_current_skin_name name = args.strip().lower() @@ -354,7 +355,7 @@ def _cmd_skin(args: str) -> None: if set_skin(name): console.print(f"[green]Skin:[/] [cyan]{name}[/] (restart REPL to see banner)") else: - console.print(f"[dim]Unknown skin '{name}'. Use /skin to list.[/]") + console.print(f"[dim]Unknown skin '{name}'. Use !skin to list.[/]") # ── /lang — switch UI language ────────────────────────────────────── @@ -367,7 +368,7 @@ def _cmd_lang(args: str) -> None: if name not in ("zh", "en", "zh-en"): current = get_lang() console.print(f"Current: [cyan]{current}[/]") - console.print("[dim]Usage: /lang zh | en | zh-en[/]") + console.print("[dim]Usage: !lang zh | en | zh-en[/]") return set_lang(name) labels = {"zh": "中文", "en": "English", "zh-en": "中文/English"} @@ -390,13 +391,13 @@ def _cmd_personality(args: str) -> None: marker = " [green]← active[/]" if p["name"] == current else "" console.print(f" [cyan]{p['name']}{marker}[/] — {p['description']}") if not current: - console.print("\n[dim]Usage: /personality [/]") + console.print("\n[dim]Usage: !personality [/]") return if set_personality(name): console.print(f"[green]Personality:[/] [cyan]{name}[/] (injected into context)") else: - console.print(f"[dim]Unknown personality '{name}'. Use /personality to list.[/]") + console.print(f"[dim]Unknown personality '{name}'. Use !personality to list.[/]") # ── /tools — dynamic agent/skill list ────────────────────────────── @@ -457,7 +458,7 @@ def _cmd_undo(args: str) -> None: """Remove last user+assistant exchange from conversation memory. Can be called repeatedly to unwind multiple turns. - Run /retry after undo to re-submit the undone prompt. + Run !retry after undo to re-submit the undone prompt. """ mem = _get_memory() user_text, assistant_text = mem.undo_last_exchange() @@ -576,7 +577,7 @@ def _cmd_resume(args: str) -> None: sid = args.strip() if not sid: - console.print("[dim]Usage: /resume [/]") + console.print("[dim]Usage: !resume [/]") return match = None @@ -586,7 +587,7 @@ def _cmd_resume(args: str) -> None: break if match is None: - console.print(f"[dim]No session matching '{sid}' found. Use /sessions to list.[/]") + console.print(f"[dim]No session matching '{sid}' found. Use !sessions to list.[/]") return loaded = ConversationMemory.load(match) @@ -671,7 +672,7 @@ def _cmd_remember(args: str) -> None: """Save a fact to MEMORY.md for cross-session persistence.""" fact = args.strip() if not fact: - console.print("[red]Usage: /remember [/]") + console.print("[red]Usage: !remember [/]") console.print("[dim]Example: /remember This project uses PostgreSQL[/]") return from runtime.cli.conversation import load_memory_md, save_memory_fact @@ -687,7 +688,7 @@ def _cmd_forget(args: str) -> None: """Remove facts from MEMORY.md matching a keyword.""" keyword = args.strip() if not keyword: - console.print("[red]Usage: /forget [/]") + console.print("[red]Usage: !forget [/]") console.print("[dim]Example: /forget PostgreSQL[/]") return from runtime.cli.conversation import forget_memory_fact, load_memory_md @@ -706,7 +707,7 @@ def _cmd_memory(args: str) -> None: from runtime.cli.conversation import load_memory_md mem = load_memory_md() if not mem: - console.print("[dim]MEMORY.md is empty. Use /remember to save knowledge.[/]") + console.print("[dim]MEMORY.md is empty. Use !remember to save knowledge.[/]") return from rich.panel import Panel lines = mem.count("\n") + 1 @@ -760,7 +761,7 @@ def _cmd_mcp_call(args: str) -> None: parts = args.strip().split(maxsplit=2) if len(parts) < 2: - console.print("[red]Usage: /mcp-call [json_args][/]") + console.print("[red]Usage: !mcp-call [json_args][/]") console.print("[dim]Example: /mcp-call test-orchestrator catalog[/]") return @@ -815,7 +816,7 @@ def _cmd_cron(args: str) -> None: jobs = list_jobs() if not jobs: - console.print("[dim]No scheduled jobs. Use /cron add [/]") + console.print("[dim]No scheduled jobs. Use !cron add [/]") console.print("[dim]Example: /cron add '0 9 * * *' smoke test daily[/]") return @@ -850,7 +851,7 @@ def _cmd_cron(args: str) -> None: tokens = rest.split(maxsplit=1) if len(tokens) < 2: - console.print("[red]Usage: /cron add [/]") + console.print("[red]Usage: !cron add [/]") console.print("[dim]Cron: /cron add '0 9 * * *' run full regression[/]") console.print("[dim]Natural: /cron add 'every morning' run smoke tests[/]") console.print("[dim]Try: every morning / every day at 18 / every monday / hourly[/]") @@ -885,7 +886,7 @@ def _cmd_cron(args: str) -> None: elif sub == "remove": job_id = rest.strip() if not job_id: - console.print("[red]Usage: /cron remove [/]") + console.print("[red]Usage: !cron remove [/]") return from runtime.scheduler.jobs import remove_job @@ -920,7 +921,7 @@ def _cmd_cron_health(args: str) -> None: console.print(f"[green]✓ Health check scheduled hourly[/] {job_id}") -# ── /model-router — display auto-routing configuration ───────────── +# ── !model-router — display auto-routing configuration ───────────── def _cmd_model_router(args: str) -> None: @@ -953,7 +954,7 @@ def _cmd_model_router(args: str) -> None: console.print(table) console.print("[dim]Auto: classify_task(prompt) → LIGHT/HEAVY → model selection[/]") - console.print("[dim]Override via /model [model] or TAGENT_LLM_MODEL env[/]") + console.print("[dim]Override via !model [model] or TAGENT_LLM_MODEL env[/]") # ── /search — full-text conversation search (P3 #16) ─────────────── @@ -963,7 +964,7 @@ def _cmd_search(args: str) -> None: """Search conversation history with FTS5.""" query = args.strip() if not query: - console.print("[red]Usage: /search [/]") + console.print("[red]Usage: !search [/]") console.print("[dim]Example: /search login page bug[/]") return @@ -1053,7 +1054,7 @@ def _cmd_distill(args: str) -> None: """Distill the last execution into a reusable skill document. Requires a complex execution (3+ nodes, 2+ agent types). - Usage: /distill [name] — name is auto-generated if omitted. + Usage: !distill [name] — name is auto-generated if omitted. The generated skill is saved to skills/.md. """ global _last_trace @@ -1088,7 +1089,7 @@ def _cmd_api(args: str) -> None: if action == "gen": sub = rest.split(maxsplit=1) if len(sub) < 2: - console.print("[dim]Usage: /api gen [/]") + console.print("[dim]Usage: !api gen [/]") return try: from utils.design.openapi_test_gen import load_openapi_spec, generate_test_cases @@ -1102,14 +1103,14 @@ def _cmd_api(args: str) -> None: elif action == "test": sub = rest.split(maxsplit=1) if not sub: - console.print("[dim]Usage: /api test [spec_path][/]") + console.print("[dim]Usage: !api test [spec_path][/]") return console.print(f"[bold]API Smoke:[/] {sub[0]}") try: from utils.design.openapi_test_gen import load_openapi_spec, smoke_test_all_endpoints spec = load_openapi_spec(sub[1]) if len(sub) > 1 else {"paths": {}} if not spec.get("paths"): - console.print("[dim]No OpenAPI spec — use /api gen first[/]") + console.print("[dim]No OpenAPI spec — use !api gen first[/]") return result = smoke_test_all_endpoints(spec, sub[0]) table = Table(title="API Smoke Results") @@ -1123,7 +1124,7 @@ def _cmd_api(args: str) -> None: except Exception as e: console.print(f"[red]{e}[/]") else: - console.print("[dim]Usage: /api gen | test [spec][/]") + console.print("[dim]Usage: !api gen | test [spec][/]") # ── /plugins — list loaded plugins (P3 #22) ──────────────────────── @@ -1152,7 +1153,7 @@ def _cmd_plugins_list(args: str) -> None: console.print(table) -# ── /alias — command shortcuts ───────────────────────────────────── +# ── !alias — command shortcuts ───────────────────────────────────── def _cmd_alias(args: str) -> None: @@ -1167,7 +1168,7 @@ def _cmd_alias(args: str) -> None: if action == "list" or not action: aliases = list_aliases() if not aliases: - console.print("[dim]No aliases. /alias add smoke '/test --quick'[/]") + console.print("[dim]No aliases. !alias add smoke '/test --quick'[/]") return table = Table(title=f"Aliases · {len(aliases)}", show_header=True) table.add_column("Name", style="cyan") @@ -1181,8 +1182,8 @@ def _cmd_alias(args: str) -> None: name = sub[0] if sub else "" cmd = sub[1] if len(sub) > 1 else "" if not name or not cmd: - console.print("[dim]Usage: /alias add [/]") - console.print("[dim]Example: /alias add smoke '/test --quick'[/]") + console.print("[dim]Usage: !alias add [/]") + console.print("[dim]Example: !alias add smoke '/test --quick'[/]") return a = add_alias(name, cmd) console.print(f"[green]Alias:[/] [cyan]{a.name}[/] → {a.command}") @@ -1190,7 +1191,7 @@ def _cmd_alias(args: str) -> None: elif action == "remove": name = rest.strip() if not name: - console.print("[dim]Usage: /alias remove [/]") + console.print("[dim]Usage: !alias remove [/]") return if remove_alias(name): console.print(f"[green]Removed: {name}[/]") @@ -1216,7 +1217,7 @@ def _cmd_ws(args: str) -> None: current = get_current() workspaces = list_workspaces() if not workspaces: - console.print("[dim]No workspaces. Use /ws add [path] or /ws auto[/]") + console.print("[dim]No workspaces. Use !ws add [path] or /ws auto[/]") return table = Table(title=f"Workspaces · {len(workspaces)}", show_header=True) table.add_column("Name", style="cyan") @@ -1232,7 +1233,7 @@ def _cmd_ws(args: str) -> None: name = sub[0] if sub else "" path = sub[1] if len(sub) > 1 else str(_Path.cwd()) if not name: - console.print("[dim]Usage: /ws add [path][/]") + console.print("[dim]Usage: !ws add [path][/]") return w = add_workspace(name, path) console.print(f"[green]Workspace:[/] {w.name} → {w.path}") @@ -1240,7 +1241,7 @@ def _cmd_ws(args: str) -> None: elif action == "remove": name = rest.strip() if not name: - console.print("[dim]Usage: /ws remove [/]") + console.print("[dim]Usage: !ws remove [/]") return if remove_workspace(name): console.print(f"[green]Removed: {name}[/]") @@ -1250,7 +1251,7 @@ def _cmd_ws(args: str) -> None: elif action == "switch": name = rest.strip() if not name: - console.print("[dim]Usage: /ws switch [/]") + console.print("[dim]Usage: !ws switch [/]") return w = switch_to(name) if w: @@ -1310,7 +1311,7 @@ def _cmd_gateway(args: str) -> None: def _cmd_task(args: str) -> None: - """Manage tasks: add, list, done, cancel. Usage: /task [args].""" + """Manage tasks: add, list, done, cancel. Usage: !task [args].""" from rich.table import Table from runtime.cli.tasks import add_task, delete_task, list_tasks, stats, update_task @@ -1331,7 +1332,7 @@ def _cmd_task(args: str) -> None: crit_str = "" criteria = [c.strip() for c in crit_str.split(",") if c.strip()] if not title.strip(): - console.print("[dim]Usage: /task add [--criteria <cond1>,<cond2>][/]") + console.print("[dim]Usage: !task add <title> [--criteria <cond1>,<cond2>][/]") console.print("[dim]Example: /task add Run API smoke tests --criteria all P0 pass,coverage 80%[/]") return task = add_task(title, criteria=criteria) @@ -1344,7 +1345,7 @@ def _cmd_task(args: str) -> None: status_filter = rest if rest else None tasks = list_tasks(status_filter) if not tasks: - console.print("[dim]No tasks. Use /task add <title> to create one.[/]") + console.print("[dim]No tasks. Use !task add <title> to create one.[/]") return st = stats() console.print(f"[bold]Tasks:[/] {st['total']} total ({st['pending']} pending, {st['in_progress']} active, {st['done']} done)") @@ -1362,7 +1363,7 @@ def _cmd_task(args: str) -> None: elif action == "done": tid = rest.strip() if not tid: - console.print("[dim]Usage: /task done <id>[/]") + console.print("[dim]Usage: !task done <id>[/]") return t = update_task(tid, status="done") if t: @@ -1373,7 +1374,7 @@ def _cmd_task(args: str) -> None: elif action == "start": tid = rest.strip() if not tid: - console.print("[dim]Usage: /task start <id>[/]") + console.print("[dim]Usage: !task start <id>[/]") return t = update_task(tid, status="in_progress") if t: @@ -1384,7 +1385,7 @@ def _cmd_task(args: str) -> None: elif action == "cancel": tid = rest.strip() if not tid: - console.print("[dim]Usage: /task cancel <id>[/]") + console.print("[dim]Usage: !task cancel <id>[/]") return t = update_task(tid, status="cancelled") if t: @@ -1395,7 +1396,7 @@ def _cmd_task(args: str) -> None: elif action == "delete": tid = rest.strip() if not tid: - console.print("[dim]Usage: /task delete <id>[/]") + console.print("[dim]Usage: !task delete <id>[/]") return if delete_task(tid): console.print(f"[dim]Task #{tid} deleted.[/]") @@ -1417,7 +1418,7 @@ def _cmd_cross(args: str) -> None: parts = args.strip().split(None, 1) if len(parts) < 2 or parts[0] != "env": - console.print("[dim]Usage: /cross env <env1> [env2...] <prompt>[/]") + console.print("[dim]Usage: !cross env <env1> [env2...] <prompt>[/]") console.print("[dim]Example: /cross env test staging run API smoke tests[/]") console.print("[dim]Presets saved via /env save <name>[/]") return @@ -1483,7 +1484,7 @@ def _cmd_clean(args: str) -> None: for c in cleanable[:15]: table.add_row(c["path"][:60], f"{c['size_kb']} KB", f"{c['age_hours']}h ago") console.print(table) - console.print("[dim]Run /clean run to delete. Delivery artifacts never touched.[/]") + console.print("[dim]Run !clean run to delete. Delivery artifacts never touched.[/]") # ── /data — test data generation ──────────────────────────────────── @@ -1523,7 +1524,7 @@ def _cmd_data(args: str) -> None: console.print(f"[green]Generated:[/] {count} products [dim]→ {out}[/]") else: - console.print("[dim]Usage: /data users|products|related <count>[/]") + console.print("[dim]Usage: !data users|products|related <count>[/]") console.print("[dim]Example: /data users 100[/]") except ImportError: console.print("[red]DataFactoryV2 not available. Install: pip install faker[/]") @@ -1694,7 +1695,7 @@ def _cmd_insights(args: str) -> None: """Show usage analytics across saved sessions. Scans workspace/gateway/*.json for session data. - Usage: /insights [days] — default 30 days. + Usage: !insights [days] — default 30 days. Shows: session count, avg turns, top agents, daily activity chart. """ from rich.table import Table @@ -1739,7 +1740,7 @@ def _cmd_insights(args: str) -> None: console.print(f" {day} {bar} {count}") -# ── /doctor — comprehensive environment health check ──────────────── +# ── !doctor — comprehensive environment health check ──────────────── def _cmd_doctor(args: str) -> None: @@ -1766,7 +1767,7 @@ def _cmd_doctor(args: str) -> None: table.add_row(label, check.get("detail", "")) console.print(table) - console.print(f"\n[bold]{ok_count} checks passed[/] [dim]Run /help for next steps.[/]") + console.print(f"\n[bold]{ok_count} checks passed[/] [dim]Run !help for next steps.[/]") # ── /nudge — suggest facts worth remembering ─────────────────────── @@ -1776,7 +1777,7 @@ def _cmd_nudge(args: str) -> None: """Scan recent conversation for facts worth persisting to MEMORY.md. Detects patterns: config changes, preferences, decisions. - Use /remember <fact> to save suggestions, /memory to review. + Use !remember <fact> to save suggestions, /memory to review. """ mem = _get_memory() if not mem.messages: @@ -1795,7 +1796,7 @@ def _cmd_nudge(args: str) -> None: seen.add(m.content[:80]) break if not suggestions: - console.print("[dim]No notable facts detected. Use /remember <fact> manually.[/]") + console.print("[dim]No notable facts detected. Use !remember <fact> manually.[/]") return console.print("[bold]Suggestions from this session:[/]") for i, s in enumerate(suggestions[:5], 1): diff --git a/runtime/cli/completer.py b/runtime/cli/completer.py index 108f1302..652fc9b4 100644 --- a/runtime/cli/completer.py +++ b/runtime/cli/completer.py @@ -1,7 +1,7 @@ """Tab completion for interactive REPL — slash commands + paths + agent/skill names. Provides slash command completion, path completion, agent/skill name completion, -and context-aware completion for /model provider names. +and context-aware completion for !model provider names. """ from __future__ import annotations @@ -19,7 +19,7 @@ _BUILTINS = [ ("help", "Show help"), ("status", "Session stats + model info"), - ("session", "Alias for /status"), + ("session", "Alias for !status"), ("model", "Switch LLM provider"), ("tools", "List agents + skills"), ("memory", "Show MEMORY.md contents"), @@ -51,7 +51,7 @@ class SlashCompleter(Completer): - """Complete /commands, provider names, file paths, and agent/skill names.""" + """Complete !commands, provider names, file paths, and agent/skill names.""" def __init__(self): self._path_completer = PathCompleter(expanduser=True) @@ -77,10 +77,10 @@ def get_completions(self, document: Document, complete_event): text = document.text_before_cursor # After /, complete command names or context-aware args - if text.startswith("/"): - word = text.lstrip("/") + if text.startswith("!"): + word = text.lstrip("!") - # /model <provider> — complete provider names + # !model <provider> — complete provider names if word.startswith("model "): provider_part = word.split(" ", 1)[1] # If there's a second arg (model name), fall through to no completion diff --git a/runtime/cli/insights.py b/runtime/cli/insights.py index 8fc7bc39..3fbac71a 100644 --- a/runtime/cli/insights.py +++ b/runtime/cli/insights.py @@ -50,6 +50,8 @@ def collect_stats(days: int = 30) -> list[SessionStats]: except (json.JSONDecodeError, OSError): continue + if not isinstance(data, dict): + continue messages = data.get("messages", []) if not messages: continue diff --git a/runtime/cli/interactive.py b/runtime/cli/interactive.py index 06bd9c87..2c953a98 100644 --- a/runtime/cli/interactive.py +++ b/runtime/cli/interactive.py @@ -2,7 +2,7 @@ Bare `tagent` enters interactive session: - Natural language → LLM routing → streaming activity feed - - /command → slash dispatch with Tab completion + history + - !command → command dispatch with Tab completion + history - ↑↓ arrows → command history - Ctrl+C → interrupt (REPL stays alive) - Ctrl+D → quit (auto-saves session) @@ -35,7 +35,7 @@ ૮₍˶ᵔ ᗜ ᵔ˶₎ა Test-Agent v{version} AI Router · {experts} Experts · {skills} Skills - Type /help for commands, or describe your test task.""" + Type !help for commands, or describe your test task.""" _SESSION_DIR = _Path(__file__).resolve().parents[2] / "workspace" / "gateway" _SESSION_FILE = _SESSION_DIR / "active_session.json" @@ -199,14 +199,24 @@ def _print_banner() -> None: except Exception: pass # use default _SHEEP - # Animated typewriter reveal + # Animated typewriter reveal — speed from skin config try: + from runtime.cli.skins import get_skin, get_current_skin_name + skin = get_skin(get_current_skin_name()) + animation_speed = skin.get("animation_speed", 0.0005) + text_style = skin.get("panel_style", {}).get("text", "bold white") + except Exception: + animation_speed = 0.0005 + text_style = "bold white" + try: + from rich.markup import render as _render_markup accumulated = Text("") with Live(accumulated, console=console, refresh_per_second=120, transient=False) as live: for ch in banner: - accumulated.append_text(Text(ch, style="bold white")) - live.update(accumulated) - time.sleep(0.0005) + accumulated.append(ch) + # Re-render with markup so colors appear inline + live.update(Text.from_markup(str(accumulated))) + time.sleep(animation_speed) except Exception: # Fallback: plain print if terminal doesn't support Live console.print(banner) @@ -219,74 +229,74 @@ def _print_help() -> None: groups = [ ("Run", [ - ("/task add|list|done|start", "Manage task list with criteria"), - ("/test <target>", "Full 11-step test pipeline"), - ("/run <target>", "Plan + execute (quick)"), - ("/plan <target>", "Plan only, no execution"), + ("!task add|list|done|start", "Manage task list with criteria"), + ("!test <target>", "Full 11-step test pipeline"), + ("!run <target>", "Plan + execute (quick)"), + ("!plan <target>", "Plan only, no execution"), ]), ("Data & API", [ - ("/data users|related <N>", "Generate test data"), - ("/api gen|test", "OpenAPI contract testing"), - ("/cross env <e1> <e2>", "Cross-environment test run"), + ("!data users|related <N>", "Generate test data"), + ("!api gen|test", "OpenAPI contract testing"), + ("!cross env <e1> <e2>", "Cross-environment test run"), ]), ("Quality", [ - ("/regression", "Regression detection vs baseline"), - ("/flaky list|quarantine", "Flaky test management"), - ("/prioritize", "Prioritize by git changes"), - ("/clean", "Clean temp data (preserves deliverables)"), + ("!regression", "Regression detection vs baseline"), + ("!flaky list|quarantine", "Flaky test management"), + ("!prioritize", "Prioritize by git changes"), + ("!clean", "Clean temp data (preserves deliverables)"), ]), ("Info", [ - ("/update", "Check for newer version"), - ("/progress", "Test coverage matrix"), - ("/status", "Session, model, conversation stats"), - ("/tools", "List agents + skills with status"), - ("/ls", "Quick list experts + skills"), - ("/doctor [--agents]", "Environment health check"), - ("/ready", "Release readiness score"), + ("!update", "Check for newer version"), + ("!progress", "Test coverage matrix"), + ("!status", "Session, model, conversation stats"), + ("!tools", "List agents + skills with status"), + ("!ls", "Quick list experts + skills"), + ("!doctor [--agents]", "Environment health check"), + ("!ready", "Release readiness score"), ]), ("Control", [ - ("/model [provider] [model]", "Switch LLM (Tab to complete)"), - ("/lang [zh|en|zh-en]", "Switch UI language"), - ("/skin [name]", "Switch CLI theme (4 skins)"), - ("/fc", "Fix last typo (like thefuck)"), - ("/! /1..9", "Command history / re-run"), - ("/alias add|list", "Command shortcuts"), - ("/personality [name]", "Set agent persona (loads expert)"), - ("/clear", "Reset conversation memory"), - ("/undo", "Remove last exchange from memory"), - ("/retry", "Re-run last prompt after undo"), - ("/setup [--preset]", "Generate config files"), - ("/check [--e2e]", "Framework self-test"), + ("!model [provider] [model]", "Switch LLM (Tab to complete)"), + ("!lang [zh|en|zh-en]", "Switch UI language"), + ("!skin [name]", "Switch CLI theme (4 skins)"), + ("!fc", "Fix last typo (like thefuck)"), + ("!1..9", "Command history / re-run"), + ("!alias add|list", "Command shortcuts"), + ("!personality [name]", "Set agent persona (loads expert)"), + ("!clear", "Reset conversation memory"), + ("!undo", "Remove last exchange from memory"), + ("!retry", "Re-run last prompt after undo"), + ("!setup [--preset]", "Generate config files"), + ("!check [--e2e]", "Framework self-test"), ]), ("Automation", [ - ("/hook add|list|prebuilt", "Lifecycle hooks (before/after/error)"), + ("!hook add|list|prebuilt", "Lifecycle hooks (before/after/error)"), ]), ("Learning", [ - ("/distill", "Save last execution as reusable skill"), + ("!distill", "Save last execution as reusable skill"), ]), ("Memory", [ - ("/remember <fact>", "Save fact to MEMORY.md"), - ("/forget <keyword>", "Remove facts by keyword"), - ("/nudge", "Scan session for facts worth remembering"), - ("/memory", "Show MEMORY.md contents"), + ("!remember <fact>", "Save fact to MEMORY.md"), + ("!forget <keyword>", "Remove facts by keyword"), + ("!nudge", "Scan session for facts worth remembering"), + ("!memory", "Show MEMORY.md contents"), ]), ("Workspace", [ - ("/ws add|list|switch|auto", "Manage project workspaces"), + ("!ws add|list|switch|auto", "Manage project workspaces"), ]), ("Gateway", [ - ("/gateway", "IM platform connection status"), + ("!gateway", "IM platform connection status"), ]), ("Session", [ - ("/cost", "Token usage and cost estimate"), - ("/cache [clear]", "LLM response cache stats/clear"), - ("/insights [days]", "Cross-session usage analytics"), - ("/sessions", "List saved sessions"), - ("/resume <id>", "Load a saved session"), - ("/export", "Export conversation to markdown"), - ("/compact", "Summarize and compress context"), - ("/context", "Full conversation history"), - ("/help", "This help"), - ("/quit (Ctrl+D)", "Save session and exit"), + ("!cost", "Token usage and cost estimate"), + ("!cache [clear]", "LLM response cache stats/clear"), + ("!insights [days]", "Cross-session usage analytics"), + ("!sessions", "List saved sessions"), + ("!resume <id>", "Load a saved session"), + ("!save", "Export conversation to markdown"), + ("!compact", "Summarize and compress context"), + ("!context", "Full conversation history"), + ("!help", "This help"), + ("!quit (Ctrl+D)", "Save session and exit"), ]), ] @@ -337,7 +347,7 @@ def _diagnose_error(exc: Exception) -> str | None: # Invalid request / bad gateway from LLM if any(k in _msg for k in ("500", "502", "503", "internal", "bad gateway")): provider = _current_provider() - return f"{provider} service returned a server error. The provider may be down — try again or switch with [cyan]/model[/]." + return f"{provider} service returned a server error. The provider may be down — try again or switch with [cyan]!model[/]." # General: give the error message itself as info, with next steps return None @@ -452,7 +462,7 @@ def _handle_natural_language(text: str) -> None: ds = decision.model_dump() if hasattr(decision, "model_dump") else {} nodes = ds.get("dag", ds.get("nodes", [])) if len(set(n.get("kind", "") for n in nodes)) >= 2: - # Stash trace for /distill command + # Stash trace for !distill command _last_trace = (text, ds) console.print(" [dim]💡 Multi-agent pattern detected. Run [cyan]/distill[/] to save as reusable skill.[/]") @@ -494,9 +504,9 @@ def _handle_natural_language(text: str) -> None: console.print(f" [yellow]💡 {_hint}[/]") elif _err_msg: console.print(f" [dim]{_err_msg}[/]") - console.print(" [dim]Run [cyan]/help[/] for commands, [cyan]/doctor[/] for health check.[/]") + console.print(" [dim]Run [cyan]!help[/] for commands, [cyan]!doctor[/] for health check.[/]") else: - console.print(" [dim]Run [cyan]/doctor[/] to check environment, [cyan]/help[/] for commands.[/]") + console.print(" [dim]Run [cyan]!doctor[/] to check environment, [cyan]!help[/] for commands.[/]") mem.add("assistant", f"[Error: {type(_exc).__name__}]") @@ -507,7 +517,7 @@ def _handle_natural_language(text: str) -> None: # ── Slash Dispatch ───────────────────────────────────────────── def _handle_slash(text: str) -> None: - parts = text.lstrip("/").strip().split(maxsplit=1) + parts = text.lstrip("!").strip().split(maxsplit=1) name = parts[0].lower() args = parts[1] if len(parts) > 1 else "" @@ -530,7 +540,7 @@ def _handle_slash(text: str) -> None: f"[dim]Did you mean [/][cyan]/{suggestion}[/][dim]? Run [/][cyan]/fc[/][cyan][/][dim] to fix.[/]" ) else: - console.print(f"[red]Unknown: /{name}[/] [dim](/help for commands)[/]") + console.print(f"[red]Unknown: /{name}[/] [dim](!help for commands)[/]") return try: @@ -548,7 +558,7 @@ def _handle_slash(text: str) -> None: else: err_msg = str(_exc)[:200] console.print(f"[red]✗ {type(_exc).__name__}: {err_msg}[/]") - console.print("[dim]/help for commands, /doctor for health check.[/]") + console.print("[dim]!help for commands, !doctor for health check.[/]") def _save_session() -> None: @@ -720,8 +730,8 @@ def _check_version(): if not user_input: continue - # Multi-line detection: code blocks + /ml command - if user_input.strip() == "/ml" or user_input.strip() == "/multiline": + # Multi-line detection: code blocks + !ml command + if user_input.strip() == "!ml" or user_input.strip() == "!multiline": user_input = _read_multiline(session) if not user_input: continue @@ -730,7 +740,7 @@ def _check_version(): pass # Alias expansion: check non-slash input against aliases - if not user_input.startswith("/"): + if not user_input.startswith("!"): from runtime.cli.aliases import expand_alias expanded = expand_alias(user_input) if expanded: @@ -738,13 +748,13 @@ def _check_version(): user_input = expanded # Record in command history (non-slash only) - if not user_input.startswith("/"): + if not user_input.startswith("!"): _cmd_history.append(user_input) if len(_cmd_history) > 10: _cmd_history.pop(0) try: - if user_input.startswith("/"): + if user_input.startswith("!"): _handle_slash(user_input) else: _handle_natural_language(user_input) @@ -752,4 +762,4 @@ def _check_version(): break except Exception as exc: console.print(f"[red]Error: {exc}[/]") - console.print("[dim]REPL continuing — /help for commands.[/]") + console.print("[dim]REPL continuing — !help for commands.[/]") diff --git a/runtime/cli/main.py b/runtime/cli/main.py index 3440721f..c5e3dbaa 100644 --- a/runtime/cli/main.py +++ b/runtime/cli/main.py @@ -35,19 +35,29 @@ def _version_callback( raise typer.Exit(0) -# Register command modules -from runtime.cli.commands.bootstrap import register as _reg_bootstrap # noqa: E402 -from runtime.cli.commands.catalog import register as _reg_catalog # noqa: E402 -from runtime.cli.commands.demo import register as _reg_demo # noqa: E402 -from runtime.cli.commands.doctor import register as _reg_doctor # noqa: E402 -from runtime.cli.commands.export import register as _reg_export # noqa: E402 -from runtime.cli.commands.init import register as _reg_init # noqa: E402 -from runtime.cli.commands.market import register as _reg_market # noqa: E402 -from runtime.cli.commands.readiness import register as _reg_readiness # noqa: E402 -from runtime.cli.commands.run import register_run as _reg_run # noqa: E402 -from runtime.cli.commands.selftest import register as _reg_selftest # noqa: E402 -from runtime.cli.commands.gateway import register as _reg_gateway # noqa: E402 -from runtime.cli.commands.test_coordinator import register as _reg_test_coordinator # noqa: E402 +# Auto-discover CLI commands from slash command registry. +# Commands with cli_module set are exposed as typer CLI commands. +from runtime.cli.slash_commands import COMMAND_REGISTRY as _REG # noqa: E402 + +_seen: set[str] = set() +for _cmd in _REG: + if not _cmd.cli_module: + continue + if _cmd.cli_module in _seen: + continue + _seen.add(_cmd.cli_module) + _mod = __import__(f"runtime.cli.commands.{_cmd.cli_module}", fromlist=["register"]) # noqa: E402 + _mod.register(app) + +# Manual registrations — modules that register multiple commands or sub-apps +# Manual registrations — CLI-only or name-collision commands +import runtime.cli.commands.bootstrap as _reg_bootstrap # noqa: E402 +import runtime.cli.commands.export as _reg_export # noqa: E402 +_reg_bootstrap.register(app) +_reg_export.register(app) +import runtime.cli.commands.market as _reg_market # noqa: E402 +_reg_market.register(app) + # P3 #19 daemon mode (inline — simple enough) @app.command(name="serve", help="Start 7x24 daemon (FastAPI + scheduler)") def _serve( @@ -57,18 +67,5 @@ def _serve( from runtime.cli.commands.serve import serve serve(host, port) -_reg_bootstrap(app) -_reg_catalog(app) -_reg_demo(app) -_reg_doctor(app) -_reg_export(app) -_reg_init(app) -_reg_market(app) -_reg_readiness(app) -_reg_run(app) -_reg_selftest(app) -_reg_gateway(app) -_reg_test_coordinator(app) - if __name__ == "__main__": app() diff --git a/runtime/cli/skins.py b/runtime/cli/skins.py index 389e5e9f..1ba97cca 100644 --- a/runtime/cli/skins.py +++ b/runtime/cli/skins.py @@ -251,6 +251,425 @@ "info": "[ INFO ]", }, }, + + "cat": { + "name": "cat", + "description": "curious testing companion", + "animation_speed": 0.0008, + "panel_style": {"text": "bold bright_yellow", "border": "bright_yellow"}, + "banner": r""" +[bold bright_yellow] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_yellow] ▄█▌ ₍˶ᵔ ⃟ ᵜ˶₎ ▐█阄阄[/] +[bold bright_yellow] ██▌ 🧶 🐾 ▐██[/] +[bold bright_yellow] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim yellow] 🐾 🧶 🐾 🧶[/] +[bold white] Test-Agent [bright_yellow]v{version}[/bright_yellow][/] +[dim] {experts} experts · {skills} skills[/] +[dim yellow] !help — curious testing companion[/] +""", + "prompt_style": {"prompt": "bold bright_yellow", "prompt.dim": "dim"}, + "colors": {"primary": "bright_yellow", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "dog": { + "name": "dog", + "description": "WOOF! enthusiastic test runner", + "animation_speed": 0.0006, + "panel_style": {"text": "bold bright_yellow", "border": "bright_yellow"}, + "banner": r""" +[bold bright_yellow] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_yellow] ▄█▌ U コ U ▐█阄阄[/] +[bold bright_yellow] ██▌ ⚽ 🺴 ▐██[/] +[bold bright_yellow] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim yellow] 🺴 ⚽ 🺴 ⚽[/] +[bold white] Test-Agent [bright_yellow]v{version}[/bright_yellow][/] +[dim] {experts} experts · {skills} skills[/] +[dim yellow] !help — WOOF! enthusiastic test runner[/] +""", + "prompt_style": {"prompt": "bold bright_yellow", "prompt.dim": "dim"}, + "colors": {"primary": "bright_yellow", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "owl": { + "name": "owl", + "description": "wisdom begins with a question", + "animation_speed": 0.002, + "panel_style": {"text": "bold magenta", "border": "magenta"}, + "banner": r""" +[bold magenta] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold magenta] ▄█▌ (◉▿◉) ▐█阄阄[/] +[bold magenta] ██▌ 📖 ✦ ▐██[/] +[bold magenta] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim magenta] ✦ 📖 ✦ 📖[/] +[bold white] Test-Agent [magenta]v{version}[/magenta][/] +[dim] {experts} experts · {skills} skills[/] +[dim magenta] !help — wisdom begins with a question[/] +""", + "prompt_style": {"prompt": "bold magenta", "prompt.dim": "dim"}, + "colors": {"primary": "magenta", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "fox": { + "name": "fox", + "description": "clever tests, zero bugs", + "animation_speed": 0.0008, + "panel_style": {"text": "bold bright_red", "border": "bright_red"}, + "banner": r""" +[bold bright_red] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_red] ▄█▌ (◕‿◕✿) ▐█阄阄[/] +[bold bright_red] ██▌ 🔥 🍂 ▐██[/] +[bold bright_red] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_red] 🍂 🔥 🍂 🔥[/] +[bold white] Test-Agent [bright_red]v{version}[/bright_red][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_red] !help — clever tests, zero bugs[/] +""", + "prompt_style": {"prompt": "bold bright_red", "prompt.dim": "dim"}, + "colors": {"primary": "bright_red", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "frog": { + "name": "frog", + "description": "hop into testing", + "animation_speed": 0.0006, + "panel_style": {"text": "bold green", "border": "green"}, + "banner": r""" +[bold green] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold green] ▄█▌ ( :3 ) ▐█阄阄[/] +[bold green] ██▌ 🺷 💧 ▐██[/] +[bold green] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim green] 💧 🺷 💧 🺷[/] +[bold white] Test-Agent [green]v{version}[/green][/] +[dim] {experts} experts · {skills} skills[/] +[dim green] !help — hop into testing[/] +""", + "prompt_style": {"prompt": "bold green", "prompt.dim": "dim"}, + "colors": {"primary": "green", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "penguin": { + "name": "penguin", + "description": "stay cool, test well", + "animation_speed": 0.001, + "panel_style": {"text": "bold bright_cyan", "border": "bright_cyan"}, + "banner": r""" +[bold bright_cyan] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_cyan] ▄█▌ (°◡°♡) ▐█阄阄[/] +[bold bright_cyan] ██▌ ❄ 🧊 ▐██[/] +[bold bright_cyan] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_cyan] 🧊 ❄ 🧊 ❄[/] +[bold white] Test-Agent [bright_cyan]v{version}[/bright_cyan][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_cyan] !help — stay cool, test well[/] +""", + "prompt_style": {"prompt": "bold bright_cyan", "prompt.dim": "dim"}, + "colors": {"primary": "bright_cyan", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "bunny": { + "name": "bunny", + "description": "hop to quick testing", + "animation_speed": 0.0004, + "panel_style": {"text": "bold bright_magenta", "border": "bright_magenta"}, + "banner": r""" +[bold bright_magenta] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_magenta] ▄█▌ (/.\) ▐█阄阄[/] +[bold bright_magenta] ██▌ 🥕 🌱 ▐██[/] +[bold bright_magenta] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_magenta] 🌱 🥕 🌱 🥕[/] +[bold white] Test-Agent [bright_magenta]v{version}[/bright_magenta][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_magenta] !help — hop to quick testing[/] +""", + "prompt_style": {"prompt": "bold bright_magenta", "prompt.dim": "dim"}, + "colors": {"primary": "bright_magenta", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "panda": { + "name": "panda", + "description": "take it easy, test well", + "animation_speed": 0.0011, + "panel_style": {"text": "bold bright_white", "border": "bright_white"}, + "banner": r""" +[bold bright_white] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_white] ▄█▌ ㅜ(◕ᵗ◕✿)ノ ▐█阄阄[/] +[bold bright_white] ██▌ 🎋 🎍 ▐██[/] +[bold bright_white] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_white] 🎍 🎋 🎍 🎋[/] +[bold white] Test-Agent [bright_white]v{version}[/bright_white][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_white] !help — take it easy, test well[/] +""", + "prompt_style": {"prompt": "bold bright_white", "prompt.dim": "dim"}, + "colors": {"primary": "bright_white", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "sunflower": { + "name": "sunflower", + "description": "let your tests bloom", + "animation_speed": 0.0009, + "panel_style": {"text": "bold bright_yellow", "border": "bright_yellow"}, + "banner": r""" +[bold bright_yellow] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_yellow] ▄█▌ ✿◕‿◕✿ ▐█阄阄[/] +[bold bright_yellow] ██▌ 🌻 🐝 ▐██[/] +[bold bright_yellow] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_yellow] 🐝 🌻 🐝 🌻[/] +[bold white] Test-Agent [bright_yellow]v{version}[/bright_yellow][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_yellow] !help — let your tests bloom[/] +""", + "prompt_style": {"prompt": "bold bright_yellow", "prompt.dim": "dim"}, + "colors": {"primary": "bright_yellow", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "cactus": { + "name": "cactus", + "description": "resilient testing", + "animation_speed": 0.0009, + "panel_style": {"text": "bold green", "border": "green"}, + "banner": r""" +[bold green] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold green] ▄█▌ (◠‿◠) ▐█阄阄[/] +[bold green] ██▌ 🌵 🌺 ▐██[/] +[bold green] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim green] 🌺 🌵 🌺 🌵[/] +[bold white] Test-Agent [green]v{version}[/green][/] +[dim] {experts} experts · {skills} skills[/] +[dim green] !help — resilient testing[/] +""", + "prompt_style": {"prompt": "bold green", "prompt.dim": "dim"}, + "colors": {"primary": "green", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "whale": { + "name": "whale", + "description": "dive deep into testing", + "animation_speed": 0.002, + "panel_style": {"text": "bold bright_blue", "border": "bright_blue"}, + "banner": r""" +[bold bright_blue] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_blue] ▄█▌ (◉▿◉❀) ▐█阄阄[/] +[bold bright_blue] ██▌ 💧 🐟 ▐██[/] +[bold bright_blue] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_blue] 🐟 💧 🐟 💧[/] +[bold white] Test-Agent [bright_blue]v{version}[/bright_blue][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_blue] !help — dive deep into testing[/] +""", + "prompt_style": {"prompt": "bold bright_blue", "prompt.dim": "dim"}, + "colors": {"primary": "bright_blue", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "dolphin": { + "name": "dolphin", + "description": "leap into quality testing", + "animation_speed": 0.0004, + "panel_style": {"text": "bold bright_cyan", "border": "bright_cyan"}, + "banner": r""" +[bold bright_cyan] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_cyan] ▄█▌ (◕▿◕✿) ▐█阄阄[/] +[bold bright_cyan] ██▌ 💦 〰 ▐██[/] +[bold bright_cyan] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_cyan] 〰 💦 〰 💦[/] +[bold white] Test-Agent [bright_cyan]v{version}[/bright_cyan][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_cyan] !help — leap into quality testing[/] +""", + "prompt_style": {"prompt": "bold bright_cyan", "prompt.dim": "dim"}, + "colors": {"primary": "bright_cyan", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "turtle": { + "name": "turtle", + "description": "steady wins the race", + "animation_speed": 0.0025, + "panel_style": {"text": "bold green", "border": "green"}, + "banner": r""" +[bold green] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold green] ▄█▌ (◉ω◉) ▐█阄阄[/] +[bold green] ██▌ 🌿 🥚 ▐██[/] +[bold green] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim green] 🥚 🌿 🥚 🌿[/] +[bold white] Test-Agent [green]v{version}[/green][/] +[dim] {experts} experts · {skills} skills[/] +[dim green] !help — steady wins the race[/] +""", + "prompt_style": {"prompt": "bold green", "prompt.dim": "dim"}, + "colors": {"primary": "green", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "octopus": { + "name": "octopus", + "description": "eight arms, zero bugs", + "animation_speed": 0.0004, + "panel_style": {"text": "bold magenta", "border": "magenta"}, + "banner": r""" +[bold magenta] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold magenta] ▄█▌ (◉⏠◉) ▐█阄阄[/] +[bold magenta] ██▌ 🺸 🺸 ▐██[/] +[bold magenta] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim magenta] 🺸 🺸 🺸 🺸[/] +[bold white] Test-Agent [magenta]v{version}[/magenta][/] +[dim] {experts} experts · {skills} skills[/] +[dim magenta] !help — eight arms, zero bugs[/] +""", + "prompt_style": {"prompt": "bold magenta", "prompt.dim": "dim"}, + "colors": {"primary": "magenta", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "eagle": { + "name": "eagle", + "description": "soar above the codebase", + "animation_speed": 0.0012, + "panel_style": {"text": "bold yellow", "border": "yellow"}, + "banner": r""" +[bold yellow] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold yellow] ▄█▌ ψ(`∇`)ψ ▐█阄阄[/] +[bold yellow] ██▌ ☀ 🏔 ▐██[/] +[bold yellow] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim yellow] 🏔 ☀ 🏔 ☀[/] +[bold white] Test-Agent [yellow]v{version}[/yellow][/] +[dim] {experts} experts · {skills} skills[/] +[dim yellow] !help — soar above the codebase[/] +""", + "prompt_style": {"prompt": "bold yellow", "prompt.dim": "dim"}, + "colors": {"primary": "yellow", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "butterfly": { + "name": "butterfly", + "description": "graceful testing", + "animation_speed": 0.0007, + "panel_style": {"text": "bold bright_magenta", "border": "bright_magenta"}, + "banner": r""" +[bold bright_magenta] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_magenta] ▄█▌ ε(`ᐧ`)っ ▐█阄阄[/] +[bold bright_magenta] ██▌ 🌸 🌼 ▐██[/] +[bold bright_magenta] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_magenta] 🌼 🌸 🌼 🌸[/] +[bold white] Test-Agent [bright_magenta]v{version}[/bright_magenta][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_magenta] !help — graceful testing[/] +""", + "prompt_style": {"prompt": "bold bright_magenta", "prompt.dim": "dim"}, + "colors": {"primary": "bright_magenta", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "parrot": { + "name": "parrot", + "description": "repeatable, reliable", + "animation_speed": 0.0005, + "panel_style": {"text": "bold bright_green", "border": "bright_green"}, + "banner": r""" +[bold bright_green] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_green] ▄█▌ (◕⨊◕✿) ▐█阄阄[/] +[bold bright_green] ██▌ 🌴 🍃 ▐██[/] +[bold bright_green] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_green] 🍃 🌴 🍃 🌴[/] +[bold white] Test-Agent [bright_green]v{version}[/bright_green][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_green] !help — repeatable, reliable[/] +""", + "prompt_style": {"prompt": "bold bright_green", "prompt.dim": "dim"}, + "colors": {"primary": "bright_green", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "dragonfly": { + "name": "dragonfly", + "description": "dart, hover, test", + "animation_speed": 0.0003, + "panel_style": {"text": "bold bright_cyan", "border": "bright_cyan"}, + "banner": r""" +[bold bright_cyan] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_cyan] ▄█▌ ⁽⁽ଘ( ˙Ꭓ˙ )ଓ⁾⁾ ▐█阄阄[/] +[bold bright_cyan] ██▌ 🺷 💨 ▐██[/] +[bold bright_cyan] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_cyan] 💨 🺷 💨 🺷[/] +[bold white] Test-Agent [bright_cyan]v{version}[/bright_cyan][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_cyan] !help — dart, hover, test[/] +""", + "prompt_style": {"prompt": "bold bright_cyan", "prompt.dim": "dim"}, + "colors": {"primary": "bright_cyan", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "lion": { + "name": "lion", + "description": "rule your test kingdom", + "animation_speed": 0.0009, + "panel_style": {"text": "bold bright_yellow", "border": "bright_yellow"}, + "banner": r""" +[bold bright_yellow] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_yellow] ▄█▌ ʕ♛ᵥ♛ʔ ▐█阄阄[/] +[bold bright_yellow] ██▌ 👑 🏆 ▐██[/] +[bold bright_yellow] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_yellow] 🏆 👑 🏆 👑[/] +[bold white] Test-Agent [bright_yellow]v{version}[/bright_yellow][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_yellow] !help — rule your test kingdom[/] +""", + "prompt_style": {"prompt": "bold bright_yellow", "prompt.dim": "dim"}, + "colors": {"primary": "bright_yellow", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "elephant": { + "name": "elephant", + "description": "never forget a test", + "animation_speed": 0.0013, + "panel_style": {"text": "bold bright_white", "border": "bright_white"}, + "banner": r""" +[bold bright_white] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_white] ▄█▌ (◕ᵥ◕)っ🎈 ▐█阄阄[/] +[bold bright_white] ██▌ 🌍 📋 ▐██[/] +[bold bright_white] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_white] 📋 🌍 📋 🌍[/] +[bold white] Test-Agent [bright_white]v{version}[/bright_white][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_white] !help — never forget a test[/] +""", + "prompt_style": {"prompt": "bold bright_white", "prompt.dim": "dim"}, + "colors": {"primary": "bright_white", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "deer": { + "name": "deer", + "description": "swift, elegant testing", + "animation_speed": 0.0007, + "panel_style": {"text": "bold yellow", "border": "yellow"}, + "banner": r""" +[bold yellow] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold yellow] ▄█▌ ◌❲●❳◌ ▐█阄阄[/] +[bold yellow] ██▌ 🌲 🍄 ▐██[/] +[bold yellow] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim yellow] 🍄 🌲 🍄 🌲[/] +[bold white] Test-Agent [yellow]v{version}[/yellow][/] +[dim] {experts} experts · {skills} skills[/] +[dim yellow] !help — swift, elegant testing[/] +""", + "prompt_style": {"prompt": "bold yellow", "prompt.dim": "dim"}, + "colors": {"primary": "yellow", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, + "hedgehog": { + "name": "hedgehog", + "description": "catch every edge case", + "animation_speed": 0.001, + "panel_style": {"text": "bold bright_black", "border": "bright_black"}, + "banner": r""" +[bold bright_black] 阄阄阄阄阄阄阄阄阄阄阈阈阈阈阈阈[/] +[bold bright_black] ▄█▌ (◉ᵥ◉)🍄 ▐█阄阄[/] +[bold bright_black] ██▌ 🍂 🌰 ▐██[/] +[bold bright_black] 阀█阄阄阄阄阄阄阄阄阄阄阄阄█阀[/] +[dim bright_black] 🌰 🍂 🌰 🍂[/] +[bold white] Test-Agent [bright_black]v{version}[/bright_black][/] +[dim] {experts} experts · {skills} skills[/] +[dim bright_black] !help — catch every edge case[/] +""", + "prompt_style": {"prompt": "bold bright_black", "prompt.dim": "dim"}, + "colors": {"primary": "bright_black", "success": "bright_green", "error": "bright_red", "warning": "yellow", "dim": "dim"}, + "icons": {"ok": "✓", "fail": "✗", "warn": "⚠", "info": "💡"}, + }, } diff --git a/runtime/cli/slash_commands.py b/runtime/cli/slash_commands.py index 20f045ee..60be7b3c 100644 --- a/runtime/cli/slash_commands.py +++ b/runtime/cli/slash_commands.py @@ -18,6 +18,7 @@ class CommandDef: args_hint: str = "" handler: Callable | None = None nl_triggers: list[str] = field(default_factory=list) # 自然语言触发词 + cli_module: str = "" # 非空=暴露为CLI命令,值为模块名(如"bootstrap") COMMAND_REGISTRY: list[CommandDef] = [] @@ -27,8 +28,13 @@ def register(name: str, description: str, *, description_zh: str = "", aliases: list[str] | None = None, args_hint: str = "", - nl_triggers: list[str] | None = None): - """Decorator: register a slash command handler with bilingual metadata.""" + nl_triggers: list[str] | None = None, + cli_module: str = ""): + """Decorator: register a slash command handler with bilingual metadata. + + cli_module — if non-empty, also exposed as a CLI command via + `runtime.cli.commands.<cli_module>.register(app)`. + """ def decorator(fn): COMMAND_REGISTRY.append( @@ -40,6 +46,7 @@ def decorator(fn): args_hint=args_hint, handler=fn, nl_triggers=nl_triggers or [], + cli_module=cli_module, ) ) return fn @@ -52,7 +59,7 @@ def decorator(fn): def resolve(name: str) -> CommandDef | None: """Look up command by name or alias.""" - name = name.lstrip("/").strip().lower() + name = name.lstrip("!").strip().lower() for cmd in COMMAND_REGISTRY: if cmd.name == name or name in cmd.aliases: return cmd @@ -71,7 +78,7 @@ def resolve_nl(text: str) -> CommandDef | None: def closest(name: str) -> str | None: """Find closest matching command via edit distance (thefuck pattern).""" - name = name.lstrip("/").strip().lower() + name = name.lstrip("!").strip().lower() if not name: return None best, best_dist = None, 999 @@ -107,6 +114,15 @@ def all_commands() -> list[CommandDef]: return list(COMMAND_REGISTRY) +def _exec_cli(command: str, args: str) -> None: + """Run a tagent CLI command via subprocess.""" + import subprocess, sys + cmd = [sys.executable, "-m", "runtime.cli.main", command] + if args.strip(): + cmd.extend(args.split()) + subprocess.run(cmd) + + def _run_with_argv(cmd: list[str], fn) -> None: """Execute fn with temporary sys.argv, restoring afterward.""" import sys @@ -160,11 +176,12 @@ def _cmd_quit(args: str) -> None: @register("test", "Full test pipeline (11-step)", description_zh="完整测试流程(11步)", - aliases=["tc"], args_hint="<target>", nl_triggers=["测试", "跑测试", "完整测试", "test"]) + aliases=["tc"], args_hint="<target>", nl_triggers=["测试", "跑测试", "完整测试", "test"], + cli_module="test_coordinator") def _cmd_test(args: str) -> None: if not args.strip(): from runtime.cli._shared import console - console.print("[red]Usage: /test <path|URL|text>[/]") + console.print("[red]Usage: !test <path|URL|text>[/]") return from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline TestCoordinatorPipeline().run(args.strip()) @@ -172,74 +189,74 @@ def _cmd_test(args: str) -> None: @register("run", "Plan + execute (quick)", description_zh="快速规划执行", - aliases=["r"], args_hint="<target>", nl_triggers=["运行", "执行", "快速测试", "跑一下"]) + aliases=["r"], args_hint="<target>", nl_triggers=["运行", "执行", "快速测试", "跑一下"], + cli_module="run") def _cmd_run(args: str) -> None: if not args.strip(): from runtime.cli._shared import console - console.print("[red]Usage: /run <path|URL|text>[/]") + console.print("[red]Usage: !run <path|URL|text>[/]") return - from runtime.cli.commands.run import run - _run_with_argv(["tagent", "run"] + args.split(), run) + _exec_cli("run", args) @register("plan", "Plan only", description_zh="仅规划不执行", - aliases=["p"], args_hint="<target>", nl_triggers=["计划", "规划", "只规划", "不执行", "出计划"]) + aliases=["p"], args_hint="<target>", nl_triggers=["计划", "规划", "只规划", "不执行", "出计划"], + cli_module="run") def _cmd_plan(args: str) -> None: if not args.strip(): from runtime.cli._shared import console - console.print("[red]Usage: /plan <path|URL|text>[/]") + console.print("[red]Usage: !plan <path|URL|text>[/]") return - from runtime.cli.commands.run import plan - _run_with_argv(["tagent", "plan"] + args.split(), plan) + _exec_cli("plan", args) @register("doctor", "Health check", description_zh="健康检查", - aliases=["health"], args_hint="[--agents] [--probe]", nl_triggers=["健康检查", "检查一下", "诊断", "体检", "环境检查"]) + aliases=["health"], args_hint="[--agents] [--probe]", nl_triggers=["健康检查", "检查一下", "诊断", "体检", "环境检查"], + cli_module="doctor") def _cmd_doctor(args: str) -> None: - from runtime.cli.commands.doctor import doctor - _run_with_argv(["tagent", "doctor"] + (args.split() if args.strip() else []), doctor) + _exec_cli("doctor", args) -@register("ls", "List experts + skills", +@register("catalog", "List experts + skills", description_zh="列出专家和技能", - aliases=["list", "catalog"], nl_triggers=["列出", "列表", "目录", "有哪些", "显示所有"]) + aliases=["ls", "list"], nl_triggers=["列出", "列表", "目录", "有哪些", "显示所有"], + cli_module="catalog") def _cmd_ls(args: str) -> None: - from runtime.cli.commands.catalog import catalog - _run_with_argv(["tagent", "catalog"], catalog) + _exec_cli("catalog", args) -@register("setup", "Generate config", +@register("init", "Generate config", description_zh="生成配置文件", - aliases=["init"], args_hint="[--preset ...]", nl_triggers=["设置", "配置", "初始化", "生成配置"]) + aliases=["setup"], args_hint="[--preset ...]", nl_triggers=["设置", "配置", "初始化", "生成配置"], + cli_module="init") def _cmd_setup(args: str) -> None: - from runtime.cli.commands.init import init_project - _run_with_argv(["tagent", "init"] + (args.split() if args.strip() else []), init_project) + _exec_cli("init", args) -@register("ready", "Release readiness", +@register("readiness", "Release readiness", description_zh="发布就绪检查", - aliases=["readiness"], nl_triggers=["就绪", "发布检查", "准备好了吗", "上线检查"]) + aliases=["ready"], nl_triggers=["就绪", "发布检查", "准备好了吗", "上线检查"], + cli_module="readiness") def _cmd_ready(args: str) -> None: - from runtime.cli.commands.readiness import readiness - _run_with_argv(["tagent", "readiness"] + (args.split() if args.strip() else []), readiness) + _exec_cli("readiness", args) -@register("check", "Framework self-test", +@register("selftest", "Framework self-test", description_zh="框架自检", - aliases=["selftest"], args_hint="[--e2e] [--strict]", nl_triggers=["自检", "框架检查", "验证"]) + aliases=["check"], args_hint="[--e2e] [--strict]", nl_triggers=["自检", "框架检查", "验证"], + cli_module="selftest") def _cmd_check(args: str) -> None: - from runtime.cli.commands.selftest import selftest - _run_with_argv(["tagent", "selftest"] + (args.split() if args.strip() else []), selftest) + _exec_cli("selftest", args) @register("demo", "Quick demo", description_zh="快速演示", - args_hint="[--real-llm]", nl_triggers=["演示", "demo", "试一下"]) + args_hint="[--real-llm]", nl_triggers=["演示", "demo", "试一下"], + cli_module="demo") def _cmd_demo(args: str) -> None: - from runtime.cli.commands.demo import demo - _run_with_argv(["tagent", "demo"] + (args.split() if args.strip() else []), demo) + _exec_cli("demo", args) # ═══════════════════════════════════════════════════════════════════ @@ -314,7 +331,8 @@ def _repl_sessions(args): _cmd_sessions(args) @register("resume", "Load session", description_zh="恢复会话", nl_triggers=["恢复", "继续", "加载会话", "resume"]) def _repl_resume(args): _cmd_resume(args) -@register("export", "Export conversation", description_zh="导出对话", nl_triggers=["导出", "导出对话", "保存对话", "export"]) +@register("save", "Export conversation", description_zh="导出对话", + aliases=["export"], nl_triggers=["导出", "导出对话", "保存对话", "save"]) def _repl_export(args): _cmd_export(args) @register("compact", "Compress context", description_zh="压缩上下文", nl_triggers=["压缩", "精简", "总结", "摘要"]) @@ -371,7 +389,8 @@ def _repl_alias(args): _cmd_alias(args) @register("ws", "Workspace manager", description_zh="工作区管理", nl_triggers=["工作区", "工作空间", "切换项目"]) def _repl_ws(args): _cmd_ws(args) -@register("gateway", "Gateway status", description_zh="网关状态", nl_triggers=["网关", "消息平台", "即时通讯"]) +@register("gateway", "Gateway status", description_zh="网关状态", nl_triggers=["网关", "消息平台", "即时通讯"], + cli_module="gateway") def _repl_gateway(args): _cmd_gateway(args) @register("task", "Task list", description_zh="任务管理", nl_triggers=["任务", "任务列表", "代办", "todo"]) diff --git a/runtime/config/settings.py b/runtime/config/settings.py index 831691a4..0730baf8 100644 --- a/runtime/config/settings.py +++ b/runtime/config/settings.py @@ -78,6 +78,20 @@ class Settings(BaseSettings): notification_webhook_url: str = Field(default="") error_report_recipients: str = Field(default="") + # ── External integrations ── + dingtalk_app_key: str = Field(default="") + dingtalk_app_secret: str = Field(default="") + dingtalk_agent_id: str = Field(default="") + dingtalk_webhook_url: str = Field(default="") + telegram_bot_token: str = Field(default="") + telegram_chat_id: str = Field(default="") + zentao_url: str = Field(default="") + zentao_account: str = Field(default="") + zentao_password: str = Field(default="") + github_token: str = Field(default="") + github_repo: str = Field(default="") + prd_http_token: str = Field(default="") + # ── Enterprise / CI ── proxy_url: str = Field(default="") trusted_ca_bundle: str = Field(default="") @@ -88,7 +102,8 @@ class Settings(BaseSettings): def model_post_init(self, _context: object) -> None: """Resolve relative Path fields to absolute after model init.""" root = self.project_root - for attr in ("experts_dir", "skills_dir", "scripts_dir", "workspace_dir"): + for attr in ("experts_dir", "skills_dir", "scripts_dir", "workspace_dir", + "config_dir", "templates_dir"): p = getattr(self, attr) if not p.is_absolute(): object.__setattr__(self, attr, (root / p).resolve()) diff --git a/runtime/docker-compose.yml b/runtime/docker-compose.yml index aae22d77..8799dbed 100644 --- a/runtime/docker-compose.yml +++ b/runtime/docker-compose.yml @@ -18,12 +18,12 @@ services: retries: 10 minio: - image: minio/minio:RELEASE.2024-12-18T13-15-44Z + image: minio/minio:${MINIO_TAG:-RELEASE.2024-12-18T13-15-44Z} container_name: tagent-minio command: server /data --console-address ":9001" environment: - MINIO_ROOT_USER: tagent - MINIO_ROOT_PASSWORD: tagent-secret + MINIO_ROOT_USER: ${MINIO_ROOT_USER:-tagent} + MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-tagent-secret} ports: - "9000:9000" - "9001:9001" @@ -35,19 +35,19 @@ services: retries: 10 minio-init: - image: minio/mc:RELEASE.2024-11-21T17-21-54Z + image: minio/mc:${MINIO_MC_TAG:-RELEASE.2024-11-21T17-21-54Z} depends_on: minio: condition: service_healthy entrypoint: > /bin/sh -c " - mc alias set local http://minio:9000 tagent tagent-secret && + mc alias set local http://minio:9000 ${MINIO_ROOT_USER:-tagent} ${MINIO_ROOT_PASSWORD:-tagent-secret} && mc mb --ignore-existing local/tagent-evidence && exit 0 " prefect-server: - image: prefecthq/prefect:2.20-python3.11 + image: prefecthq/prefect:${PREFECT_TAG:-2.20-python3.11} container_name: tagent-prefect command: prefect server start --host 0.0.0.0 environment: @@ -59,7 +59,7 @@ services: # Observability profile: docker compose --profile observability up tempo: profiles: ["observability"] - image: grafana/tempo:2.6.0 + image: grafana/tempo:${TEMPO_TAG:-2.6.0} command: ["-config.file=/etc/tempo.yaml"] volumes: - ./observability/tempo.yaml:/etc/tempo.yaml:ro @@ -70,13 +70,13 @@ services: loki: profiles: ["observability"] - image: grafana/loki:3.2.0 + image: grafana/loki:${LOKI_TAG:-3.2.0} ports: - "3100:3100" grafana: profiles: ["observability"] - image: grafana/grafana:11.3.0 + image: grafana/grafana:${GRAFANA_TAG:-11.3.0} environment: GF_AUTH_ANONYMOUS_ENABLED: "true" GF_AUTH_ANONYMOUS_ORG_ROLE: Admin diff --git a/runtime/gateway/platforms/dingtalk.py b/runtime/gateway/platforms/dingtalk.py index eb99e7ab..c05e189c 100644 --- a/runtime/gateway/platforms/dingtalk.py +++ b/runtime/gateway/platforms/dingtalk.py @@ -11,39 +11,45 @@ from __future__ import annotations import os +import threading import time from loguru import logger +from runtime.config.settings import get_settings from runtime.gateway.base import DeliveryResult, Message, Platform, is_safe_webhook_url, register _ACCESS_TOKEN: str | None = None _ACCESS_TOKEN_EXPIRY: float = 0 +_token_lock = threading.Lock() def _get_access_token(app_key: str, app_secret: str) -> str | None: - """Obtain DingTalk access_token (cached, 2h TTL).""" + """Obtain DingTalk access_token (cached, 2h TTL). Thread-safe.""" global _ACCESS_TOKEN, _ACCESS_TOKEN_EXPIRY import httpx if _ACCESS_TOKEN and time.time() < _ACCESS_TOKEN_EXPIRY - 120: return _ACCESS_TOKEN - try: - r = httpx.get( - "https://oapi.dingtalk.com/gettoken", - params={"appkey": app_key, "appsecret": app_secret}, - timeout=10, - ) - data = r.json() - token = data.get("access_token") - if token: - _ACCESS_TOKEN = token - _ACCESS_TOKEN_EXPIRY = time.time() + data.get("expires_in", 7200) - return token - except Exception as e: - logger.warning("DingTalk access_token fetch failed: {}", e) - return None + with _token_lock: + if _ACCESS_TOKEN and time.time() < _ACCESS_TOKEN_EXPIRY - 120: + return _ACCESS_TOKEN + try: + r = httpx.get( + "https://oapi.dingtalk.com/gettoken", + params={"appkey": app_key, "appsecret": app_secret}, + timeout=10, + ) + data = r.json() + token = data.get("access_token") + if token: + _ACCESS_TOKEN = token + _ACCESS_TOKEN_EXPIRY = time.time() + data.get("expires_in", 7200) + return token + except Exception as e: + logger.warning("DingTalk access_token fetch failed: {}", e) + return None @register("dingtalk") @@ -71,10 +77,11 @@ async def configure( agent_id: str | None = None, **_kwargs, ) -> None: - self.webhook = webhook_url or os.getenv("DINGTALK_WEBHOOK_URL") - self.app_key = app_key or os.getenv("DINGTALK_APP_KEY") - self.app_secret = app_secret or os.getenv("DINGTALK_APP_SECRET") - self.agent_id = agent_id or os.getenv("DINGTALK_AGENT_ID") + s = get_settings() + self.webhook = webhook_url or s.dingtalk_webhook_url or os.getenv("DINGTALK_WEBHOOK_URL") + self.app_key = app_key or s.dingtalk_app_key or os.getenv("DINGTALK_APP_KEY") + self.app_secret = app_secret or s.dingtalk_app_secret or os.getenv("DINGTALK_APP_SECRET") + self.agent_id = agent_id or s.dingtalk_agent_id or os.getenv("DINGTALK_AGENT_ID") async def send(self, msg: Message, *, target: str | None = None) -> DeliveryResult: """Send message. Uses API mode when target is a userid, webhook otherwise.""" diff --git a/runtime/gateway/platforms/qqbot.py b/runtime/gateway/platforms/qqbot.py index 3d083280..b14c26d1 100644 --- a/runtime/gateway/platforms/qqbot.py +++ b/runtime/gateway/platforms/qqbot.py @@ -11,6 +11,7 @@ from __future__ import annotations import os +import threading import time from loguru import logger @@ -19,31 +20,35 @@ _ACCESS_TOKEN: str | None = None _ACCESS_TOKEN_EXPIRY: float = 0 +_token_lock = threading.Lock() def _get_access_token(app_id: str, client_secret: str) -> str | None: - """Obtain QQ Bot access_token (cached, 2h TTL).""" + """Obtain QQ Bot access_token (cached, 2h TTL). Thread-safe.""" global _ACCESS_TOKEN, _ACCESS_TOKEN_EXPIRY import httpx if _ACCESS_TOKEN and time.time() < _ACCESS_TOKEN_EXPIRY - 60: return _ACCESS_TOKEN - try: - r = httpx.post( - "https://bots.qq.com/app/getAppAccessToken", - json={"appId": app_id, "clientSecret": client_secret}, - timeout=15, - ) - data = r.json() - token = data.get("access_token") - if token: - _ACCESS_TOKEN = token - _ACCESS_TOKEN_EXPIRY = time.time() + data.get("expires_in", 7200) - return token - except Exception as e: - logger.warning("QQ Bot access_token fetch failed: {}", e) - return None + with _token_lock: + if _ACCESS_TOKEN and time.time() < _ACCESS_TOKEN_EXPIRY - 60: + return _ACCESS_TOKEN + try: + r = httpx.post( + "https://bots.qq.com/app/getAppAccessToken", + json={"appId": app_id, "clientSecret": client_secret}, + timeout=15, + ) + data = r.json() + token = data.get("access_token") + if token: + _ACCESS_TOKEN = token + _ACCESS_TOKEN_EXPIRY = time.time() + data.get("expires_in", 7200) + return token + except Exception as e: + logger.warning("QQ Bot access_token fetch failed: {}", e) + return None @register("qqbot") diff --git a/runtime/gateway/platforms/telegram.py b/runtime/gateway/platforms/telegram.py index 35f3dfee..eb26ad3a 100644 --- a/runtime/gateway/platforms/telegram.py +++ b/runtime/gateway/platforms/telegram.py @@ -4,6 +4,7 @@ import os +from runtime.config.settings import get_settings from runtime.gateway.base import DeliveryResult, Message, Platform, register @@ -14,8 +15,9 @@ def __init__(self) -> None: self.default_chat: str | None = None async def configure(self, *, token: str | None = None, chat_id: str | None = None, **_kwargs) -> None: - self.token = token or os.getenv("TELEGRAM_BOT_TOKEN") - self.default_chat = chat_id or os.getenv("TELEGRAM_CHAT_ID") + s = get_settings() + self.token = token or s.telegram_bot_token or os.getenv("TELEGRAM_BOT_TOKEN") + self.default_chat = chat_id or s.telegram_chat_id or os.getenv("TELEGRAM_CHAT_ID") async def send(self, msg: Message, *, target: str | None = None) -> DeliveryResult: try: diff --git a/runtime/mcp/client.py b/runtime/mcp/client.py index 0ca2235a..5a4c2d3b 100644 --- a/runtime/mcp/client.py +++ b/runtime/mcp/client.py @@ -39,8 +39,10 @@ class McpToolResult: def _find_config() -> Path | None: """Locate the .mcp.json config file.""" from runtime.config.settings import get_settings + s = get_settings() candidates = [ - get_settings().config_dir / ".mcp.json", + s.config_dir / ".mcp.json", + s.project_root / ".mcp.json", ] for p in candidates: if p.is_file(): diff --git a/runtime/mcp/knowledge_base/server.py b/runtime/mcp/knowledge_base/server.py index 802fd9c2..211a5b99 100644 --- a/runtime/mcp/knowledge_base/server.py +++ b/runtime/mcp/knowledge_base/server.py @@ -23,6 +23,7 @@ DEFAULT_EMBED_MODEL = os.getenv("TAGENT_EMBED_MODEL", "openai/text-embedding-3-small") DEFAULT_DIM = int(os.getenv("TAGENT_EMBED_DIM", "1536")) +_EMBED_DEGRADED = False # set True when embedding falls back to stub def _vec_literal(vec: list[float]) -> str: @@ -53,6 +54,8 @@ async def _embed(text: str) -> list[float]: resp = await litellm.aembedding(model=DEFAULT_EMBED_MODEL, input=[text]) return resp.data[0]["embedding"] except Exception as e: + global _EMBED_DEGRADED + _EMBED_DEGRADED = True logger.warning("embedding failed, fallback to stub: {}", e) return _embed_stub(text) @@ -60,7 +63,7 @@ async def _embed(text: str) -> list[float]: @tool_decision_logged("embed") async def tool_embed(text: str) -> dict: vec = await _embed(text) - return {"dim": len(vec), "model": DEFAULT_EMBED_MODEL, "sample": vec[:8]} + return {"dim": len(vec), "model": DEFAULT_EMBED_MODEL, "sample": vec[:8], "degraded": _EMBED_DEGRADED} def _is_postgres() -> bool: @@ -151,6 +154,7 @@ async def tool_search_similar(text: str, top_k: int = 5, source_type: str = "cas ).mappings().all() return { "count": len(rows), + "degraded": _EMBED_DEGRADED, "results": [ { "id": r["id"], diff --git a/runtime/orchestrator/agents/base.py b/runtime/orchestrator/agents/base.py index 223fcfc2..7d2a1d47 100644 --- a/runtime/orchestrator/agents/base.py +++ b/runtime/orchestrator/agents/base.py @@ -11,6 +11,8 @@ from loguru import logger +from runtime.router.llm_client import _strip_json_fences + @dataclass(slots=True) class RunnerContext: @@ -146,11 +148,7 @@ def run(self, ctx: RunnerContext) -> RunnerResult: @staticmethod def _parse_json(raw: str) -> dict[str, Any]: - raw = raw.strip() - if raw.startswith("```"): - raw = raw[3:-3].strip() if raw.endswith("```") else raw[3:] - if "\n" in raw: - _, raw = raw.split("\n", 1) + raw = _strip_json_fences(raw) start = raw.find("{") end = raw.rfind("}") if start < 0 or end < 0: diff --git a/runtime/pyproject.toml b/runtime/pyproject.toml index c3e52efa..444c46ac 100644 --- a/runtime/pyproject.toml +++ b/runtime/pyproject.toml @@ -56,7 +56,7 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = [".."] -include = ["runtime*"] +include = ["runtime*", "utils*"] [tool.ruff] line-length = 110 @@ -72,6 +72,7 @@ ignore = ["E501"] asyncio_mode = "auto" testpaths = ["tests"] addopts = "--cov-branch" +# CI overrides addopts to add: --cov=runtime --cov=utils --cov-fail-under=30 log_cli = false filterwarnings = [ "ignore::DeprecationWarning:prefect.*", diff --git a/runtime/router/llm_client.py b/runtime/router/llm_client.py index 25bc6785..35efa289 100644 --- a/runtime/router/llm_client.py +++ b/runtime/router/llm_client.py @@ -8,6 +8,9 @@ from loguru import logger +# Suppress litellm remote cost-map fetch noise (5s timeout in air-gapped envs) +os.environ.setdefault("LITELLM_SUPPRESS_DEBUG_INFO", "1") + from runtime.config.settings import get_settings PROVIDER_MODEL_MAP: dict[str, str] = { @@ -20,6 +23,22 @@ } +def _resolve_model(provider: str) -> str: + """Resolve provider → model name. Env override: TAGENT_LLM_MODEL_<PROVIDER>.""" + env_key = f"TAGENT_LLM_MODEL_{provider.upper()}" + return os.getenv(env_key, PROVIDER_MODEL_MAP.get(provider, provider)) + + +def _strip_json_fences(raw: str) -> str: + """Strip markdown code fences + language tag from LLM output.""" + raw = raw.strip() + if raw.startswith("```"): + raw = raw[3:-3].strip() if raw.endswith("```") else raw[3:] + if "\n" in raw: + _, raw = raw.split("\n", 1) + return raw + + def _call_responses_api(provider: str, model: str, system: str, user: str, temperature: float, max_tokens: int | None, json_mode: bool) -> str: @@ -97,7 +116,7 @@ def _call(self, provider: str, system: str, user: str, temperature: float, *, ma from runtime.router.model_router import select_model model = select_model(user, provider) except ImportError: - model = PROVIDER_MODEL_MAP.get(provider, provider) + model = _resolve_model(provider) cached = get_cached(provider, model, system, user, temperature) if cached is not None: return cached @@ -111,7 +130,7 @@ def _call(self, provider: str, system: str, user: str, temperature: float, *, ma from runtime.router.model_router import select_model model = select_model(user, provider) except ImportError: - model = PROVIDER_MODEL_MAP.get(provider, provider) + model = _resolve_model(provider) # Allow env var override for any provider (supports any model / 中转站) if os.environ.get("TAGENT_LLM_MODEL"): model = os.environ["TAGENT_LLM_MODEL"] @@ -157,13 +176,7 @@ def _call(self, provider: str, system: str, user: str, temperature: float, *, ma @staticmethod def _extract_json(raw: str) -> dict[str, Any]: - raw = raw.strip() - if raw.startswith("```"): - # Strip exactly one fenced code block marker - raw = raw[3:-3].strip() if raw.endswith("```") else raw[3:] - # strip leading lang tag e.g. ```json - if "\n" in raw: - _, raw = raw.split("\n", 1) + raw = _strip_json_fences(raw) start = raw.find("{") end = raw.rfind("}") if start < 0 or end < 0: diff --git a/runtime/tests/conftest.py b/runtime/tests/conftest.py index 3f57e398..716b81f1 100644 --- a/runtime/tests/conftest.py +++ b/runtime/tests/conftest.py @@ -2,26 +2,12 @@ from __future__ import annotations +import logging import os -import sys from pathlib import Path import pytest -# Inject utils/ and all subdirectories into sys.path -# V1.x: utils/ reorganized from flat into 12 functional subdirectories -_PROJECT_ROOT = Path(__file__).resolve().parents[2] -if str(_PROJECT_ROOT) not in sys.path: - sys.path.insert(0, str(_PROJECT_ROOT)) - -_UTILS_DIR = _PROJECT_ROOT / "utils" -if _UTILS_DIR.is_dir() and str(_UTILS_DIR) not in sys.path: - sys.path.insert(0, str(_UTILS_DIR)) - for _sub in _UTILS_DIR.iterdir(): - if _sub.is_dir() and not _sub.name.startswith(("_", ".")) and str(_sub) not in sys.path: - sys.path.insert(0, str(_sub)) - - @pytest.fixture(autouse=True) def _env_isolation(tmp_path: Path, monkeypatch): """Isolate each test from real DB/MinIO/Prefect + reset shared state.""" @@ -45,3 +31,21 @@ def _env_isolation(tmp_path: Path, monkeypatch): @pytest.fixture() def project_root() -> Path: return Path(__file__).resolve().parents[2] + + +@pytest.fixture(scope="session", autouse=True) +def _prefect_log_cleanup(): + """Remove Prefect Rich handlers before pytest teardown to prevent + 'I/O operation on closed file' errors when Rich tries to write to + a console that pytest already closed.""" + yield + for logger_name in list(logging.root.manager.loggerDict): + lg = logging.getLogger(logger_name) + for h in getattr(lg, "handlers", [])[:]: + mod = type(h).__module__ + if "prefect" in mod or "rich" in mod: + lg.removeHandler(h) + for h in logging.root.handlers[:]: + mod = type(h).__module__ + if "prefect" in mod or "rich" in mod: + logging.root.removeHandler(h) diff --git a/runtime/tests/test_completer.py b/runtime/tests/test_completer.py index d18b8299..52d081e5 100644 --- a/runtime/tests/test_completer.py +++ b/runtime/tests/test_completer.py @@ -96,19 +96,19 @@ def _completions(text: str) -> list[str]: return [comp.text for comp in c.get_completions(doc, None)] def test_partial_command_prefix(self): - comps = self._completions("/he") + comps = self._completions("!he") assert "help" in comps def test_exact_command_prefix(self): - comps = self._completions("/status") + comps = self._completions("!status") assert "status" in comps def test_no_match_returns_empty(self): - comps = self._completions("/xyznonexistent999") + comps = self._completions("!xyznonexistent999") assert len(comps) == 0 def test_empty_after_slash(self): - comps = self._completions("/") + comps = self._completions("!") # All commands should match empty prefix assert len(comps) > 5 # multiple commands available assert "help" in comps or any("help" == c for c in comps) @@ -126,29 +126,29 @@ def _completions(text: str) -> list[str]: return [comp.text for comp in c.get_completions(doc, None)] def test_partial_provider_claude(self): - comps = self._completions("/model cl") + comps = self._completions("!model cl") assert "claude" in comps def test_partial_provider_deepseek(self): - comps = self._completions("/model dee") + comps = self._completions("!model dee") assert "deepseek" in comps def test_partial_provider_openai(self): - comps = self._completions("/model op") + comps = self._completions("!model op") assert "openai" in comps def test_provider_no_match(self): - comps = self._completions("/model xyznon") + comps = self._completions("!model xyznon") assert len(comps) == 0 def test_second_arg_no_completion(self): """After /model <provider> <model_name>, no completion.""" - comps = self._completions("/model claude sonnet") + comps = self._completions("!model claude sonnet") assert len(comps) == 0 def test_model_prefix_only(self): """Ensure /model gets command completion not provider completion.""" - comps = self._completions("/mod") + comps = self._completions("!mod") assert "model" in comps # command name completion @@ -199,12 +199,12 @@ def _completions(text: str) -> list[str]: def test_help_not_duplicated(self): """'help' is in both registry and builtins — appears once.""" - comps = self._completions("/hel") + comps = self._completions("!hel") assert comps.count("help") == 1 def test_quit_not_duplicated(self): """'quit' is in both registry and builtins — appears once.""" - comps = self._completions("/qu") + comps = self._completions("!qu") assert comps.count("quit") == 1 diff --git a/runtime/tests/test_utils_absentee.py b/runtime/tests/test_utils_absentee.py index b2e08c7f..d5820000 100644 --- a/runtime/tests/test_utils_absentee.py +++ b/runtime/tests/test_utils_absentee.py @@ -7,9 +7,7 @@ import sys from pathlib import Path -_utils_dir = Path(__file__).resolve().parents[2] / "utils" -if str(_utils_dir) not in sys.path: - sys.path.insert(0, str(_utils_dir)) +# utils package installed via pip install -e runtime/ # ═══════════════════════════════════════════════════════════════ @@ -18,12 +16,12 @@ class TestListGroups: def test_all_9_groups_present(self): - from absentee_scenario_injector import list_groups + from utils.security.absentee_scenario_injector import list_groups groups = list_groups() assert len(groups) == 9 def test_each_group_has_label(self): - from absentee_scenario_injector import list_groups + from utils.security.absentee_scenario_injector import list_groups for g in list_groups(): assert g["id"] assert g["label"] @@ -36,34 +34,34 @@ def test_each_group_has_label(self): class TestQueryScenarios: def test_query_all_returns_all(self): - from absentee_scenario_injector import SCENARIOS, query_scenarios + from utils.security.absentee_scenario_injector import SCENARIOS, query_scenarios assert len(query_scenarios()) == len(SCENARIOS) def test_query_by_group(self): - from absentee_scenario_injector import query_scenarios + from utils.security.absentee_scenario_injector import query_scenarios results = query_scenarios(groups=["visual_impairment"]) assert len(results) >= 3 assert all(s.group == "visual_impairment" for s in results) def test_query_by_severity(self): - from absentee_scenario_injector import query_scenarios + from utils.security.absentee_scenario_injector import query_scenarios results = query_scenarios(severity="P0") assert len(results) > 0 assert all(s.severity == "P0" for s in results) def test_query_by_tags(self): - from absentee_scenario_injector import query_scenarios + from utils.security.absentee_scenario_injector import query_scenarios results = query_scenarios(tags=["screen-reader"]) assert len(results) >= 1 assert any("screen-reader" in s.tags for s in results) def test_query_combined(self): - from absentee_scenario_injector import query_scenarios + from utils.security.absentee_scenario_injector import query_scenarios results = query_scenarios(groups=["visual_impairment"], severity="P0") assert all(s.group == "visual_impairment" and s.severity == "P0" for s in results) def test_query_empty_group(self): - from absentee_scenario_injector import query_scenarios + from utils.security.absentee_scenario_injector import query_scenarios results = query_scenarios(groups=["nonexistent_group"]) assert len(results) == 0 @@ -74,29 +72,29 @@ def test_query_empty_group(self): class TestInjectScenarios: def test_inject_all(self): - from absentee_scenario_injector import SCENARIOS, inject_scenarios + from utils.security.absentee_scenario_injector import SCENARIOS, inject_scenarios results = inject_scenarios() # Default min_severity=P2 includes all assert len(results) == len(SCENARIOS) def test_inject_p0_only(self): - from absentee_scenario_injector import inject_scenarios + from utils.security.absentee_scenario_injector import inject_scenarios results = inject_scenarios(min_severity="P0") assert all(s["severity"] == "P0" for s in results) def test_inject_with_count_limit(self): - from absentee_scenario_injector import inject_scenarios + from utils.security.absentee_scenario_injector import inject_scenarios results = inject_scenarios(count=5) assert len(results) == 5 def test_inject_specific_group(self): - from absentee_scenario_injector import inject_scenarios + from utils.security.absentee_scenario_injector import inject_scenarios results = inject_scenarios(groups=["mental_crisis"]) assert len(results) >= 3 assert all(s["group"] == "mental_crisis" for s in results) def test_injected_has_required_fields(self): - from absentee_scenario_injector import inject_scenarios + from utils.security.absentee_scenario_injector import inject_scenarios results = inject_scenarios(count=1) s = results[0] for field in ["id", "group", "severity", "title", "description", "test_steps", "expected"]: @@ -111,7 +109,7 @@ def test_injected_has_required_fields(self): class TestGenerateCharter: def test_generates_markdown(self): - from absentee_scenario_injector import generate_charter, query_scenarios + from utils.security.absentee_scenario_injector import generate_charter, query_scenarios scenarios = query_scenarios(groups=["visual_impairment"], severity="P0") charter = generate_charter(scenarios[0], module="login", duration_min=45) assert "# Charter:" in charter @@ -121,7 +119,7 @@ def test_generates_markdown(self): assert "## 预期结果" in charter def test_batch_generates_files(self, tmp_path): - from absentee_scenario_injector import generate_batch_charters + from utils.security.absentee_scenario_injector import generate_batch_charters paths = generate_batch_charters( groups=["mental_crisis"], severity="P0", output_dir=str(tmp_path), @@ -139,7 +137,7 @@ def test_batch_generates_files(self, tmp_path): class TestCoverageReport: def test_full_coverage(self): - from absentee_scenario_injector import coverage_report, inject_scenarios + from utils.security.absentee_scenario_injector import coverage_report, inject_scenarios scenarios = inject_scenarios() report = coverage_report(scenarios) assert report["total_absentee_groups"] == 9 @@ -147,7 +145,7 @@ def test_full_coverage(self): assert len(report["groups_missing"]) == 0 def test_partial_coverage(self): - from absentee_scenario_injector import coverage_report, inject_scenarios + from utils.security.absentee_scenario_injector import coverage_report, inject_scenarios scenarios = inject_scenarios(groups=["visual_impairment", "elderly"]) report = coverage_report(scenarios) assert report["groups_covered"] == 2 @@ -155,7 +153,7 @@ def test_partial_coverage(self): assert len(report["groups_missing"]) == 7 def test_empty_coverage(self): - from absentee_scenario_injector import coverage_report + from utils.security.absentee_scenario_injector import coverage_report report = coverage_report([]) assert report["groups_covered"] == 0 assert report["coverage_pct"] == 0.0 @@ -167,7 +165,7 @@ def test_empty_coverage(self): class TestExport: def test_export_json(self, tmp_path): - from absentee_scenario_injector import export_injection_plan, inject_scenarios + from utils.security.absentee_scenario_injector import export_injection_plan, inject_scenarios scenarios = inject_scenarios(groups=["elderly"]) path = export_injection_plan(scenarios, output_dir=str(tmp_path)) assert Path(path).exists() @@ -176,7 +174,7 @@ def test_export_json(self, tmp_path): assert "coverage" in data def test_ci_summary(self): - from absentee_scenario_injector import ci_summary, inject_scenarios + from utils.security.absentee_scenario_injector import ci_summary, inject_scenarios scenarios = inject_scenarios(groups=["visual_impairment", "mental_crisis"]) text = ci_summary(scenarios) assert "visual_impairment" in text or "视觉" in text diff --git a/runtime/tests/test_utils_bug_tracker.py b/runtime/tests/test_utils_bug_tracker.py index 6ea12618..34f2d328 100644 --- a/runtime/tests/test_utils_bug_tracker.py +++ b/runtime/tests/test_utils_bug_tracker.py @@ -8,19 +8,17 @@ import pytest -_utils_dir = Path(__file__).resolve().parents[2] / "utils" -if str(_utils_dir) not in sys.path: - sys.path.insert(0, str(_utils_dir)) +# utils package installed via pip install -e runtime/ class TestBugTrackerBase: def test_cannot_instantiate_abstract(self): - from bug_tracker_base import BugTrackerBase + from utils.trackers.bug_tracker_base import BugTrackerBase with pytest.raises(TypeError): BugTrackerBase() # type: ignore[abstract] def test_concrete_subclass_instantiable(self): - from bug_tracker_base import BugTrackerBase + from utils.trackers.bug_tracker_base import BugTrackerBase class FakeTracker(BugTrackerBase): def submit_bug(self, title, description, severity, attachments=None, reproduce_steps=""): @@ -43,7 +41,7 @@ def query_open_bugs(self, filters=None): assert tracker.get_status("BUG-1")["status"] == "open" def test_missing_method_fails(self): - from bug_tracker_base import BugTrackerBase + from utils.trackers.bug_tracker_base import BugTrackerBase class IncompleteTracker(BugTrackerBase): def submit_bug(self, title, description, severity, attachments=None, reproduce_steps=""): @@ -55,27 +53,27 @@ def submit_bug(self, title, description, severity, attachments=None, reproduce_s class TestTrackerRegistry: def test_zentao_registered(self): - from bug_tracker_base import TRACKER_REGISTRY + from utils.trackers.bug_tracker_base import TRACKER_REGISTRY assert "zentao" in TRACKER_REGISTRY def test_jira_registered(self): - from bug_tracker_base import TRACKER_REGISTRY + from utils.trackers.bug_tracker_base import TRACKER_REGISTRY assert "jira" in TRACKER_REGISTRY def test_github_registered(self): - from bug_tracker_base import TRACKER_REGISTRY + from utils.trackers.bug_tracker_base import TRACKER_REGISTRY assert "github" in TRACKER_REGISTRY def test_linear_registered(self): - from bug_tracker_base import TRACKER_REGISTRY + from utils.trackers.bug_tracker_base import TRACKER_REGISTRY assert "linear" in TRACKER_REGISTRY def test_webhook_registered(self): - from bug_tracker_base import TRACKER_REGISTRY + from utils.trackers.bug_tracker_base import TRACKER_REGISTRY assert "webhook" in TRACKER_REGISTRY def test_all_registry_values_are_basetracker_subclasses(self): - from bug_tracker_base import TRACKER_REGISTRY, BugTrackerBase + from utils.trackers.bug_tracker_base import TRACKER_REGISTRY, BugTrackerBase for name, cls in TRACKER_REGISTRY.items(): if name == "zentao": # Legacy: ZentaoBugManager not yet migrated to BugTrackerBase ABC @@ -86,12 +84,12 @@ def test_all_registry_values_are_basetracker_subclasses(self): class TestCreateBugManager: def test_returns_none_for_unknown_tracker(self, monkeypatch): monkeypatch.delenv("BUG_TRACKER", raising=False) - from bug_tracker_base import create_bug_manager + from utils.trackers.bug_tracker_base import create_bug_manager assert create_bug_manager("nonexistent-tracker") is None def test_returns_instance_for_webhook(self, monkeypatch): monkeypatch.setenv("WEBHOOK_BUG_URL", "https://example.com/webhook") - from bug_tracker_base import create_bug_manager + from utils.trackers.bug_tracker_base import create_bug_manager mgr = create_bug_manager("webhook") assert mgr is not None assert type(mgr).__name__ == "WebhookBugManager" diff --git a/runtime/tests/test_utils_evidence_chain.py b/runtime/tests/test_utils_evidence_chain.py index 5c40cb86..e4891b7f 100644 --- a/runtime/tests/test_utils_evidence_chain.py +++ b/runtime/tests/test_utils_evidence_chain.py @@ -7,8 +7,7 @@ import pytest -sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "utils")) -from evidence_chain import ( # noqa: E402 +from utils.reporting.evidence_chain import ( # noqa: E402 ChainOfCustody, EvidenceItem, EvidencePackage, diff --git a/runtime/tests/test_utils_fairness.py b/runtime/tests/test_utils_fairness.py index 33726dc4..dfe3de83 100644 --- a/runtime/tests/test_utils_fairness.py +++ b/runtime/tests/test_utils_fairness.py @@ -10,9 +10,7 @@ import numpy as np import pytest -_utils_dir = Path(__file__).resolve().parents[2] / "utils" -if str(_utils_dir) not in sys.path: - sys.path.insert(0, str(_utils_dir)) +# utils package installed via pip install -e runtime/ # ═══════════════════════════════════════════════════════════════ @@ -81,14 +79,14 @@ def biased_predictions(): class TestAuditDatasetBias: def test_balanced_dataset_passes(self, balanced_dataset): - from fairness_auditor import audit_dataset_bias + from utils.a11y_i18n.fairness_auditor import audit_dataset_bias y_true, sensitive = balanced_dataset report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) assert report.overall_severity == "pass" assert report.source == "dataset" def test_biased_dataset_detects_representation_gap(self, biased_dataset): - from fairness_auditor import audit_dataset_bias + from utils.a11y_i18n.fairness_auditor import audit_dataset_bias y_true, sensitive = biased_dataset report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"], representation_threshold=0.15) @@ -98,7 +96,7 @@ def test_biased_dataset_detects_representation_gap(self, biased_dataset): assert not repr_result.passed def test_biased_dataset_detects_label_imbalance(self, biased_dataset): - from fairness_auditor import audit_dataset_bias + from utils.a11y_i18n.fairness_auditor import audit_dataset_bias y_true, sensitive = biased_dataset report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) label_result = next(r for r in report.fairness_results @@ -106,19 +104,19 @@ def test_biased_dataset_detects_label_imbalance(self, biased_dataset): assert not label_result.passed def test_recommendations_generated_for_biased(self, biased_dataset): - from fairness_auditor import audit_dataset_bias + from utils.a11y_i18n.fairness_auditor import audit_dataset_bias y_true, sensitive = biased_dataset report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) assert len(report.recommendations) > 0 def test_mismatched_group_names_raises(self, balanced_dataset): - from fairness_auditor import audit_dataset_bias + from utils.a11y_i18n.fairness_auditor import audit_dataset_bias y_true, sensitive = balanced_dataset with pytest.raises(ValueError): audit_dataset_bias(y_true, sensitive, group_names=["only_one"]) def test_repr_custom_threshold(self, biased_dataset): - from fairness_auditor import audit_dataset_bias + from utils.a11y_i18n.fairness_auditor import audit_dataset_bias y_true, sensitive = biased_dataset # Very permissive threshold → should pass report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"], @@ -134,21 +132,21 @@ def test_repr_custom_threshold(self, biased_dataset): class TestAuditModelFairness: def test_perfect_predictions_pass_all_metrics(self, fair_predictions): - from fairness_auditor import audit_model_fairness + from utils.a11y_i18n.fairness_auditor import audit_model_fairness y_true, y_pred, sensitive = fair_predictions report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) assert report.overall_severity == "pass" assert all(r.passed for r in report.fairness_results) def test_biased_predictions_detected(self, biased_predictions): - from fairness_auditor import audit_model_fairness + from utils.a11y_i18n.fairness_auditor import audit_model_fairness y_true, y_pred, sensitive = biased_predictions report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) # At least equal_opportunity should fail (TPR gap) assert report.overall_severity in ("warning", "fail") def test_disparate_impact_computed(self, fair_predictions): - from fairness_auditor import audit_model_fairness + from utils.a11y_i18n.fairness_auditor import audit_model_fairness y_true, y_pred, sensitive = fair_predictions report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) di = next(r for r in report.fairness_results if r.metric == "disparate_impact") @@ -156,7 +154,7 @@ def test_disparate_impact_computed(self, fair_predictions): assert di.value <= 1.0 def test_group_metrics_populated(self, fair_predictions): - from fairness_auditor import audit_model_fairness + from utils.a11y_i18n.fairness_auditor import audit_model_fairness y_true, y_pred, sensitive = fair_predictions report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["X", "Y"]) assert len(report.groups) == 2 @@ -166,7 +164,7 @@ def test_group_metrics_populated(self, fair_predictions): assert g.fpr is not None def test_all_6_metrics_present(self, biased_predictions): - from fairness_auditor import audit_model_fairness + from utils.a11y_i18n.fairness_auditor import audit_model_fairness y_true, y_pred, sensitive = biased_predictions report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) metric_names = {r.metric for r in report.fairness_results} @@ -197,13 +195,13 @@ def intersectional_data(self): return y_true, y_pred, {"gender": gender, "race": race} def test_intersectional_groups_created(self, intersectional_data): - from fairness_auditor import audit_intersectional + from utils.a11y_i18n.fairness_auditor import audit_intersectional y_true, y_pred, sensitive = intersectional_data report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=5) assert len(report.groups) >= 2 def test_intersectional_metrics_present(self, intersectional_data): - from fairness_auditor import audit_intersectional + from utils.a11y_i18n.fairness_auditor import audit_intersectional y_true, y_pred, sensitive = intersectional_data report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=5) metric_names = {r.metric for r in report.fairness_results} @@ -211,7 +209,7 @@ def test_intersectional_metrics_present(self, intersectional_data): assert "intersectional_accuracy_gap" in metric_names def test_small_groups_filtered(self, intersectional_data): - from fairness_auditor import audit_intersectional + from utils.a11y_i18n.fairness_auditor import audit_intersectional y_true, y_pred, sensitive = intersectional_data # With high min_group_size, all groups should be filtered report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=1000) @@ -225,7 +223,7 @@ def test_small_groups_filtered(self, intersectional_data): class TestAuditDecisionFairness: def test_fair_decisions_pass(self): - from fairness_auditor import audit_decision_fairness + from utils.a11y_i18n.fairness_auditor import audit_decision_fairness rng = np.random.RandomState(42) decisions = rng.choice([0, 1], 200, p=[0.5, 0.5]).astype(float) sensitive = np.array([0] * 100 + [1] * 100) @@ -234,7 +232,7 @@ def test_fair_decisions_pass(self): assert report.overall_severity in ("pass", "warning") def test_biased_decisions_detected(self): - from fairness_auditor import audit_decision_fairness + from utils.a11y_i18n.fairness_auditor import audit_decision_fairness rng = np.random.RandomState(42) # Group 0: 80% approved, Group 1: 20% approved d0 = rng.choice([0, 1], 100, p=[0.2, 0.8]).astype(float) @@ -245,7 +243,7 @@ def test_biased_decisions_detected(self): assert report.overall_severity == "fail" def test_decision_groups_match(self): - from fairness_auditor import audit_decision_fairness + from utils.a11y_i18n.fairness_auditor import audit_decision_fairness decisions = np.array([1, 1, 0, 0, 1, 0]) sensitive = np.array([0, 0, 0, 1, 1, 1]) report = audit_decision_fairness(decisions, sensitive, group_names=["X", "Y"]) @@ -260,7 +258,7 @@ def test_decision_groups_match(self): class TestExport: def test_export_creates_file(self, balanced_dataset, tmp_path): - from fairness_auditor import audit_dataset_bias, export_bias_report + from utils.a11y_i18n.fairness_auditor import audit_dataset_bias, export_bias_report y_true, sensitive = balanced_dataset report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) path = export_bias_report(report, output_dir=str(tmp_path)) @@ -271,7 +269,7 @@ def test_export_creates_file(self, balanced_dataset, tmp_path): assert len(data["fairness_results"]) == 2 def test_summary_contains_metrics(self, fair_predictions): - from fairness_auditor import audit_model_fairness, summary + from utils.a11y_i18n.fairness_auditor import audit_model_fairness, summary y_true, y_pred, sensitive = fair_predictions report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) text = summary(report) @@ -279,7 +277,7 @@ def test_summary_contains_metrics(self, fair_predictions): assert "equal_opportunity" in text def test_summary_shows_severity(self, fair_predictions): - from fairness_auditor import audit_model_fairness, summary + from utils.a11y_i18n.fairness_auditor import audit_model_fairness, summary y_true, y_pred, sensitive = fair_predictions report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) text = summary(report) diff --git a/runtime/tests/test_utils_i18n_taboo.py b/runtime/tests/test_utils_i18n_taboo.py index 35fe8642..c1244678 100644 --- a/runtime/tests/test_utils_i18n_taboo.py +++ b/runtime/tests/test_utils_i18n_taboo.py @@ -6,11 +6,9 @@ import sys from pathlib import Path -_utils_dir = Path(__file__).resolve().parents[2] / "utils" -if str(_utils_dir) not in sys.path: - sys.path.insert(0, str(_utils_dir)) +# utils package installed via pip install -e runtime/ -from i18n_checker import ( # noqa: E402 +from utils.a11y_i18n.i18n_checker import ( # noqa: E402 audit_sacred_contexts, audit_taboo_colors, audit_taboo_holidays, diff --git a/runtime/tests/test_utils_quality_gate.py b/runtime/tests/test_utils_quality_gate.py index d7f82fb2..f6a40c58 100644 --- a/runtime/tests/test_utils_quality_gate.py +++ b/runtime/tests/test_utils_quality_gate.py @@ -9,10 +9,7 @@ import xml.etree.ElementTree as ET from pathlib import Path -# Ensure utils is importable -_utils_dir = Path(__file__).resolve().parents[2] / "utils" -if str(_utils_dir) not in sys.path: - sys.path.insert(0, str(_utils_dir)) +# utils package installed via pip install -e runtime/ # ── ci_quality_gate tests ────────────────────────────────────────────── @@ -28,7 +25,7 @@ def make_junit_xml(self, tests: int, failures: int, errors: int, skipped: int) - return ET.tostring(root, encoding="unicode") def test_all_pass(self): - from ci_quality_gate import parse_junit + from utils.quality.ci_quality_gate import parse_junit with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: f.write(self.make_junit_xml(100, 0, 0, 0)) path = f.name @@ -42,7 +39,7 @@ def test_all_pass(self): Path(path).unlink() def test_mixed_failures(self): - from ci_quality_gate import parse_junit + from utils.quality.ci_quality_gate import parse_junit with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: f.write(self.make_junit_xml(50, 5, 2, 3)) path = f.name @@ -58,11 +55,11 @@ def test_mixed_failures(self): Path(path).unlink() def test_missing_file(self): - from ci_quality_gate import parse_junit + from utils.quality.ci_quality_gate import parse_junit assert parse_junit("/nonexistent/path.xml") is None def test_empty_file(self): - from ci_quality_gate import parse_junit + from utils.quality.ci_quality_gate import parse_junit with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: f.write("not xml") path = f.name @@ -75,8 +72,8 @@ def test_empty_file(self): class TestCheckSmoke: def test_pass(self): - import ci_quality_gate as m - from ci_quality_gate import check_smoke + import utils.quality.ci_quality_gate as m + from utils.quality.ci_quality_gate import check_smoke m.GATES["smoke"]["min_pass_rate_pct"] = 95 with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: @@ -91,8 +88,8 @@ def test_pass(self): Path(path).unlink() def test_fail_below_threshold(self): - import ci_quality_gate as m - from ci_quality_gate import check_smoke + import utils.quality.ci_quality_gate as m + from utils.quality.ci_quality_gate import check_smoke m.GATES["smoke"]["min_pass_rate_pct"] = 95 with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: @@ -112,7 +109,7 @@ def make_coverage_xml(self, line_rate: float) -> str: return ET.tostring(root, encoding="unicode") def test_pass_above_threshold(self): - from ci_quality_gate import check_coverage + from utils.quality.ci_quality_gate import check_coverage with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: f.write(self.make_coverage_xml(0.85)) path = f.name @@ -123,7 +120,7 @@ def test_pass_above_threshold(self): Path(path).unlink() def test_fail_below_threshold(self): - from ci_quality_gate import check_coverage + from utils.quality.ci_quality_gate import check_coverage with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: f.write(self.make_coverage_xml(0.55)) path = f.name @@ -138,7 +135,7 @@ def test_fail_below_threshold(self): class TestQualityGateEngine: def test_builtin_defaults_load(self): - from quality_gate_engine import _builtin_defaults + from utils.quality.quality_gate_engine import _builtin_defaults cfg = _builtin_defaults() assert "smoke" in cfg assert cfg["smoke"]["min_pass_rate_pct"] == 95 @@ -146,12 +143,12 @@ def test_builtin_defaults_load(self): assert cfg["performance_full"]["min_tps"] == 100 def test_engine_init_default(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") assert "smoke" in engine.config def test_engine_smoke_pass(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") engine.config["smoke"]["min_pass_rate_pct"] = 90 @@ -166,7 +163,7 @@ def test_engine_smoke_pass(self): Path(path).unlink() def test_engine_smoke_fail(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") engine.config["smoke"]["min_pass_rate_pct"] = 95 @@ -181,7 +178,7 @@ def test_engine_smoke_fail(self): Path(path).unlink() def test_engine_coverage(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: @@ -195,14 +192,14 @@ def test_engine_coverage(self): Path(path).unlink() def test_engine_release_missing_gates(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") ok, msg = engine.check_release() assert not ok assert "smoke" in msg.lower() def test_engine_release_all_pass(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") engine.config["release"]["require_smoke"] = False engine.config["release"]["require_regression"] = False @@ -211,14 +208,14 @@ def test_engine_release_all_pass(self): assert ok def test_engine_summary_json(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") engine._record("smoke", True, "ok") data = engine.summary_json() assert data["overall_pass"] is True def test_engine_performance_parse(self): - from quality_gate_engine import QualityGateEngine + from utils.quality.quality_gate_engine import QualityGateEngine engine = QualityGateEngine(config_path="/nonexistent/config.yaml") engine.config["performance_ci_quick"] = { "min_tps": 20, "max_p95_ms": 800, "max_avg_ms": 400, "max_error_pct": 1.0 diff --git a/runtime/tests/test_utils_silent_failure.py b/runtime/tests/test_utils_silent_failure.py index 000a5ed6..93e7834d 100644 --- a/runtime/tests/test_utils_silent_failure.py +++ b/runtime/tests/test_utils_silent_failure.py @@ -10,9 +10,7 @@ import numpy as np import pytest -_utils_dir = Path(__file__).resolve().parents[2] / "utils" -if str(_utils_dir) not in sys.path: - sys.path.insert(0, str(_utils_dir)) +# utils package installed via pip install -e runtime/ # ═══════════════════════════════════════════════════════════════ @@ -56,13 +54,13 @@ def declining_data(): class TestDetectThresholdDrift: def test_stable_data_silent(self, stable_data): - from silent_failure_detector import detect_threshold_drift + from utils.security.silent_failure_detector import detect_threshold_drift r = detect_threshold_drift("test_metric", stable_data, threshold=200) assert r.severity == "silent" assert r.trend_pvalue is not None def test_trending_up_impending(self, trending_up_data): - from silent_failure_detector import detect_threshold_drift + from utils.security.silent_failure_detector import detect_threshold_drift r = detect_threshold_drift( "latency_ms", trending_up_data, threshold=200, drift_pct_threshold=0.10, @@ -72,12 +70,12 @@ def test_trending_up_impending(self, trending_up_data): assert r.mean_shift_pct > 0 def test_breached_detected(self, breached_data): - from silent_failure_detector import detect_threshold_drift + from utils.security.silent_failure_detector import detect_threshold_drift r = detect_threshold_drift("error_rate", breached_data, threshold=200) assert r.severity == "breached" def test_direction_below(self, declining_data): - from silent_failure_detector import detect_threshold_drift + from utils.security.silent_failure_detector import detect_threshold_drift r = detect_threshold_drift( "pass_rate", declining_data, threshold=0.80, direction="below", @@ -87,12 +85,12 @@ def test_direction_below(self, declining_data): assert r.current_mean < r.baseline_mean or r.trend_slope < 0 def test_insufficient_data(self): - from silent_failure_detector import detect_threshold_drift + from utils.security.silent_failure_detector import detect_threshold_drift r = detect_threshold_drift("sparse", [1.0, 2.0], threshold=10) assert "Insufficient" in r.recommendation def test_baseline_points_used(self, trending_up_data): - from silent_failure_detector import detect_threshold_drift + from utils.security.silent_failure_detector import detect_threshold_drift rng = np.random.RandomState(42) baseline = list(rng.normal(100, 3, 50)) # stable baseline r = detect_threshold_drift( @@ -102,19 +100,19 @@ def test_baseline_points_used(self, trending_up_data): assert r.baseline_mean < 105 # baseline should be near 100 def test_mann_kendall_detects_trend(self, trending_up_data): - from silent_failure_detector import _mann_kendall + from utils.security.silent_failure_detector import _mann_kendall arr = np.asarray(trending_up_data) p = _mann_kendall(arr) assert p < 0.05 # strong upward trend def test_mann_kendall_no_trend(self, stable_data): - from silent_failure_detector import _mann_kendall + from utils.security.silent_failure_detector import _mann_kendall arr = np.asarray(stable_data) p = _mann_kendall(arr) assert p > 0.01 # no significant trend (M-K noisy with n=30) def test_linear_trend_slope(self, trending_up_data): - from silent_failure_detector import _linear_trend + from utils.security.silent_failure_detector import _linear_trend arr = np.asarray(trending_up_data) slope = _linear_trend(arr) assert slope > 0 # upward slope @@ -126,7 +124,7 @@ def test_linear_trend_slope(self, trending_up_data): class TestBatchDetect: def test_batch_all_stable(self, stable_data): - from silent_failure_detector import MetricConfig, batch_detect + from utils.security.silent_failure_detector import MetricConfig, batch_detect cfgs = [ MetricConfig("m1", "custom", stable_data, 200), MetricConfig("m2", "custom", stable_data, 200), @@ -136,7 +134,7 @@ def test_batch_all_stable(self, stable_data): assert report.silent_count == 2 def test_batch_one_breached(self, stable_data, breached_data): - from silent_failure_detector import MetricConfig, batch_detect + from utils.security.silent_failure_detector import MetricConfig, batch_detect cfgs = [ MetricConfig("stable", "custom", stable_data, 200), MetricConfig("breached", "custom", breached_data, 200), @@ -146,7 +144,7 @@ def test_batch_one_breached(self, stable_data, breached_data): assert report.breached_count >= 1 def test_batch_one_impending(self, stable_data, trending_up_data): - from silent_failure_detector import MetricConfig, batch_detect + from utils.security.silent_failure_detector import MetricConfig, batch_detect cfgs = [ MetricConfig("stable", "custom", stable_data, 200), MetricConfig("trending", "custom", trending_up_data, 200), @@ -161,25 +159,25 @@ def test_batch_one_impending(self, stable_data, trending_up_data): class TestSourceCollectors: def test_collect_from_tracing(self, trending_up_data): - from silent_failure_detector import collect_from_tracing + from utils.security.silent_failure_detector import collect_from_tracing r = collect_from_tracing(trending_up_data, threshold_ms=200) assert r.source == "tracing" assert r.metric_name == "trace_duration_p95_ms" def test_collect_from_web_vitals(self, trending_up_data): - from silent_failure_detector import collect_from_web_vitals + from utils.security.silent_failure_detector import collect_from_web_vitals r = collect_from_web_vitals("LCP_ms", trending_up_data, threshold=4000) assert r.source == "web_vitals" assert "LCP_ms" in r.metric_name def test_collect_from_prometheus_counter(self, trending_up_data): - from silent_failure_detector import collect_from_prometheus_counter + from utils.security.silent_failure_detector import collect_from_prometheus_counter r = collect_from_prometheus_counter("agent_errors", trending_up_data, threshold=10) assert r.source == "prometheus" assert "agent_errors" in r.metric_name def test_collect_from_prometheus_gauge_below(self, declining_data): - from silent_failure_detector import collect_from_prometheus_gauge + from utils.security.silent_failure_detector import collect_from_prometheus_gauge r = collect_from_prometheus_gauge( "pass_rate", declining_data, threshold=0.80, direction="below", ) @@ -192,7 +190,7 @@ def test_collect_from_prometheus_gauge_below(self, declining_data): class TestSlidingWindow: def test_push_and_get(self): - from silent_failure_detector import SlidingWindowStore + from utils.security.silent_failure_detector import SlidingWindowStore store = SlidingWindowStore(max_points=5) for v in [1, 2, 3, 4, 5, 6, 7]: store.push("latency", v) @@ -201,7 +199,7 @@ def test_push_and_get(self): assert vals == [3, 4, 5, 6, 7] def test_get_all(self): - from silent_failure_detector import SlidingWindowStore + from utils.security.silent_failure_detector import SlidingWindowStore store = SlidingWindowStore() store.push("a", 1) store.push("a", 2) @@ -210,7 +208,7 @@ def test_get_all(self): assert len(all_data) == 2 def test_clear(self): - from silent_failure_detector import SlidingWindowStore + from utils.security.silent_failure_detector import SlidingWindowStore store = SlidingWindowStore() store.push("x", 1) store.clear("x") @@ -223,7 +221,7 @@ def test_clear(self): class TestExport: def test_export_json(self, stable_data, tmp_path): - from silent_failure_detector import MetricConfig, batch_detect, export_report + from utils.security.silent_failure_detector import MetricConfig, batch_detect, export_report report = batch_detect([MetricConfig("m1", "custom", stable_data, 200)]) path = export_report(report, output_dir=str(tmp_path)) assert Path(path).exists() @@ -231,7 +229,7 @@ def test_export_json(self, stable_data, tmp_path): assert data["overall_severity"] == "pass" def test_ci_summary(self, stable_data): - from silent_failure_detector import MetricConfig, batch_detect, ci_summary + from utils.security.silent_failure_detector import MetricConfig, batch_detect, ci_summary report = batch_detect([MetricConfig("m1", "custom", stable_data, 200)]) text = ci_summary(report) assert "PASS" in text diff --git a/runtime/tests/test_utils_taboo_matrix.py b/runtime/tests/test_utils_taboo_matrix.py index 0071c658..87ec8c50 100644 --- a/runtime/tests/test_utils_taboo_matrix.py +++ b/runtime/tests/test_utils_taboo_matrix.py @@ -6,11 +6,9 @@ import sys from pathlib import Path -_utils_dir = Path(__file__).resolve().parents[2] / "utils" -if str(_utils_dir) not in sys.path: - sys.path.insert(0, str(_utils_dir)) +# utils package installed via pip install -e runtime/ -from taboo_matrix import ( # noqa: E402 +from utils.design.taboo_matrix import ( # noqa: E402 SACRED_CONTEXTS, TABOO_COLORS, TABOO_HOLIDAYS, diff --git a/runtime/tutor/i18n.py b/runtime/tutor/i18n.py index 1c611a5e..ec665037 100644 --- a/runtime/tutor/i18n.py +++ b/runtime/tutor/i18n.py @@ -73,14 +73,27 @@ def get_lang() -> Lang: - raw = os.getenv("TAGENT_LANG", "zh").lower() + raw = os.getenv("TAGENT_LANG", "").lower() if raw in ("zh", "zh-cn", "chinese", "中文"): return "zh" if raw in ("en", "english", "英文"): return "en" if raw in ("zh-en", "bilingual", "双语"): return "zh-en" - return "zh" + if raw: + return "zh" # unknown value → default zh + # Auto-detect from system locale + try: + import locale + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + loc = locale.getdefaultlocale()[0] or "" + if loc.lower().startswith("zh"): + return "zh" + except Exception: + pass + return "en" # global non-Chinese default def set_lang(lang: Lang | str) -> None: diff --git a/utils/__init__.py b/utils/__init__.py index 77446e6a..44d5e739 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,2 +1,31 @@ # SPDX-License-Identifier: MIT """Test-Agent utils package.""" + +import logging as _logging + + +class _LoguruBridge(_logging.Handler): + """Redirect stdlib logging records to loguru.""" + + def emit(self, record: _logging.LogRecord) -> None: + try: + from loguru import logger as _loguru_logger + except ImportError: + return + try: + level = _loguru_logger.level(record.levelname).name + except ValueError: + level = record.levelno + frame: _logging.FrameType | None = _logging.currentframe() + depth = 2 + while frame and frame.f_code.co_filename == _logging.__file__: + frame = frame.f_back + depth += 1 + _loguru_logger.opt(depth=depth, exception=record.exc_info).log( + level, record.getMessage() + ) + + +_root = _logging.getLogger() +if not any(isinstance(h, _LoguruBridge) for h in _root.handlers): + _root.addHandler(_LoguruBridge()) diff --git a/utils/a11y_i18n/a11y_scanner.py b/utils/a11y_i18n/a11y_scanner.py index 79e36955..c5d78ca0 100644 --- a/utils/a11y_i18n/a11y_scanner.py +++ b/utils/a11y_i18n/a11y_scanner.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use a11y_scanner_v2 instead. This file will be removed in V1.2. """ 无障碍 / Accessibility 测试(WCAG 2.1) 被引用方:UX / 易用性 / 合规 diff --git a/utils/a11y_i18n/i18n_checker.py b/utils/a11y_i18n/i18n_checker.py index 36f6c701..67142f65 100644 --- a/utils/a11y_i18n/i18n_checker.py +++ b/utils/a11y_i18n/i18n_checker.py @@ -147,11 +147,6 @@ def format_check_examples(lang: str) -> Dict: def _load_taboo_matrix(): """Lazy-load taboo_matrix to avoid circular import at module level.""" - from pathlib import Path as _Path - import sys as _sys - _here = _Path(__file__).resolve().parent - if str(_here) not in _sys.path: - _sys.path.insert(0, str(_here)) from utils.design.taboo_matrix import ( TABOO_WORDS, TABOO_COLORS, TABOO_NUMBERS, TABOO_HOLIDAYS, SACRED_CONTEXTS, diff --git a/utils/data/data_factory.py b/utils/data/data_factory.py index 52c13b05..3006d8ce 100644 --- a/utils/data/data_factory.py +++ b/utils/data/data_factory.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use data_factory_v2 instead. This file will be removed in V1.2. """ 测试数据工厂 - Faker + Factory Boy 生成标准化测试数据 被引用方:05-数据准备 agent / data-preparation skill / conftest.py diff --git a/utils/data/db_test_helper.py b/utils/data/db_test_helper.py index fa26b398..e885ecba 100644 --- a/utils/data/db_test_helper.py +++ b/utils/data/db_test_helper.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use db_test_helper_v2 instead. This file will be removed in V1.2. """ 数据库测试工具:CRUD / 事务 ACID / 迁移 / 备份恢复 / 慢查询 / 死锁 被引用方:05-数据准备 + 安全/可靠性测试 diff --git a/utils/design/prd_loader.py b/utils/design/prd_loader.py index 38e58fc3..b35a4958 100644 --- a/utils/design/prd_loader.py +++ b/utils/design/prd_loader.py @@ -278,8 +278,9 @@ def _load_url(url: str) -> Dict: """ import requests headers = {"User-Agent": "Mozilla/5.0 PRDLoader/1.0"} - if os.getenv("PRD_HTTP_TOKEN"): - headers["Authorization"] = f"Bearer {os.environ['PRD_HTTP_TOKEN']}" + token = os.getenv("TAGENT_PRD_HTTP_TOKEN") or os.getenv("PRD_HTTP_TOKEN") + if token: + headers["Authorization"] = f"Bearer {token}" r = requests.get(url, headers=headers, timeout=20) r.raise_for_status() diff --git a/utils/design/suite_minimizer.py b/utils/design/suite_minimizer.py index ea7351f8..c401bc69 100644 --- a/utils/design/suite_minimizer.py +++ b/utils/design/suite_minimizer.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use suite_minimizer_v2 instead. This file will be removed in V1.2. """ 测试套件减重(Suite Minimization)- 检测重复用例 / 冗余覆盖 被引用方:testcase-designer / 测试质量 diff --git a/utils/paths.py b/utils/paths.py index 704dcc03..4f59764a 100644 --- a/utils/paths.py +++ b/utils/paths.py @@ -1,6 +1,10 @@ """Test-Agent workspace output path helpers. -All paths follow: workspace/测试报告/{PROJECT_NAME}[/{run_id}]/{sub_path}""" +All paths follow: workspace/<output_dir>/{PROJECT_NAME}[/{run_id}]/{sub_path} + +Output dir auto-detects locale: zh_CN → 测试报告, else → test-reports. +Override with TAGENT_OUTPUT_DIR env var. +""" import os import uuid @@ -10,6 +14,18 @@ _RUN_ID: str | None = None +def _is_chinese_locale() -> bool: + try: + import locale + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + loc = locale.getdefaultlocale()[0] or "" + return loc.lower().startswith("zh") + except Exception: + return False + + def get_project_name() -> str: return os.getenv("PROJECT_NAME", "default") @@ -17,7 +33,11 @@ def get_project_name() -> str: def get_output_base(project: str | None = None) -> Path: if project is None: project = get_project_name() - return Path("workspace/测试报告") / project + custom = os.getenv("TAGENT_OUTPUT_DIR") + if custom: + return Path(custom) / project + default = "workspace/测试报告" if _is_chinese_locale() else "workspace/test-reports" + return Path(default) / project def get_output_dir(sub_path: str = "", run_id: str | None = None) -> Path: diff --git a/utils/performance/chaos_helper.py b/utils/performance/chaos_helper.py index 18840d29..f8619321 100644 --- a/utils/performance/chaos_helper.py +++ b/utils/performance/chaos_helper.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use chaos_helper_v2 instead. This file will be removed in V1.2. """ 混沌工程:故障注入(CPU/内存/磁盘/网络/进程杀死) 被引用方:16-可靠性稳定性 agent / chaos-test skill diff --git a/utils/quality/flaky_detector.py b/utils/quality/flaky_detector.py index 97bce66e..c4ee8063 100644 --- a/utils/quality/flaky_detector.py +++ b/utils/quality/flaky_detector.py @@ -10,9 +10,7 @@ from pathlib import Path from typing import Dict, List -import sys -sys.path.insert(0, str(Path(__file__).parent.parent)) -from paths import get_output_dir, current_run_id +from utils.paths import get_output_dir, current_run_id logger = logging.getLogger(__name__) diff --git a/utils/reporting/evidence_chain.py b/utils/reporting/evidence_chain.py index 9dd69292..1a5afac3 100644 --- a/utils/reporting/evidence_chain.py +++ b/utils/reporting/evidence_chain.py @@ -21,9 +21,7 @@ from pathlib import Path from typing import Any -import sys -sys.path.insert(0, str(Path(__file__).parent.parent)) -from paths import get_output_dir, current_run_id +from utils.paths import get_output_dir, current_run_id logger = logging.getLogger(__name__) diff --git a/utils/reporting/generate_report.py b/utils/reporting/generate_report.py index f926a546..632c0776 100644 --- a/utils/reporting/generate_report.py +++ b/utils/reporting/generate_report.py @@ -6,15 +6,13 @@ import json import logging import os -import sys from datetime import datetime from pathlib import Path from typing import Dict, List, Optional import requests -sys.path.insert(0, str(Path(__file__).parent.parent)) -from paths import get_output_dir, current_run_id +from utils.paths import get_output_dir, current_run_id logger = logging.getLogger(__name__) diff --git a/utils/security/api_security_scanner.py b/utils/security/api_security_scanner.py index 91173546..0abed394 100644 --- a/utils/security/api_security_scanner.py +++ b/utils/security/api_security_scanner.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use api_security_scanner_v2 instead. This file will be removed in V1.2. """ API 安全测试 - OWASP API Top 10 覆盖 - API1: BOLA(越权访问对象) diff --git a/utils/testing/bdd_runner.py b/utils/testing/bdd_runner.py index 15b0b719..a40da0fc 100644 --- a/utils/testing/bdd_runner.py +++ b/utils/testing/bdd_runner.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use bdd_runner_v2 instead. This file will be removed in V1.2. """ BDD(Behavior-Driven Development)/ 验收测试 - pytest-bdd 包装 被引用方:03-用例设计 + 06-自动化脚本(验收测试场景) diff --git a/utils/testing/soak_runner.py b/utils/testing/soak_runner.py index fc8855f9..8a8da462 100644 --- a/utils/testing/soak_runner.py +++ b/utils/testing/soak_runner.py @@ -11,9 +11,7 @@ from pathlib import Path from typing import Callable, Dict, List, Optional -import sys -sys.path.insert(0, str(Path(__file__).parent.parent)) -from paths import get_output_dir, current_run_id +from utils.paths import get_output_dir, current_run_id logger = logging.getLogger(__name__) diff --git a/utils/testing/state_machine_tester.py b/utils/testing/state_machine_tester.py index 3f97ef1c..2c9b9ab9 100644 --- a/utils/testing/state_machine_tester.py +++ b/utils/testing/state_machine_tester.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: MIT +# DEPRECATED: use state_machine_tester_v2 instead. This file will be removed in V1.2. """ 状态迁移测试(State Transition Testing) 被引用方:03-用例设计 agent / testcase-design skill diff --git a/utils/trackers/github_bug_manager.py b/utils/trackers/github_bug_manager.py index 6a3879d8..1868f51c 100644 --- a/utils/trackers/github_bug_manager.py +++ b/utils/trackers/github_bug_manager.py @@ -37,8 +37,8 @@ def __init__( token: str | None = None, repo: str | None = None, ): - self.token = token or os.getenv("GITHUB_TOKEN", "") - self.repo = repo or os.getenv("GITHUB_REPO", "") + self.token = token or os.getenv("TAGENT_GITHUB_TOKEN") or os.getenv("GITHUB_TOKEN", "") + self.repo = repo or os.getenv("TAGENT_GITHUB_REPO") or os.getenv("GITHUB_REPO", "") self.session = requests.Session() self.session.headers.update({ "Accept": "application/vnd.github+json", diff --git a/utils/trackers/zentao_bug_manager.py b/utils/trackers/zentao_bug_manager.py index b220b17b..4f864444 100644 --- a/utils/trackers/zentao_bug_manager.py +++ b/utils/trackers/zentao_bug_manager.py @@ -38,11 +38,12 @@ def __init__( ): self.base_url = ( base_url + or os.getenv("TAGENT_ZENTAO_URL") or os.getenv("TEST_ZENTAO_URL") or os.getenv("ZENTAO_BASE_URL", "") ).rstrip("/") - self.account = account or os.getenv("ZENTAO_ACCOUNT", "") - self.password = password or os.getenv("ZENTAO_PASSWORD", "") + self.account = account or os.getenv("TAGENT_ZENTAO_ACCOUNT") or os.getenv("ZENTAO_ACCOUNT", "") + self.password = password or os.getenv("TAGENT_ZENTAO_PASSWORD") or os.getenv("ZENTAO_PASSWORD", "") self.session = requests.Session() self.token: Optional[str] = None if not self.base_url: From 7058defb54e34dc9113425ca9b08c8151e9ecd19 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Fri, 12 Jun 2026 20:28:48 +0800 Subject: [PATCH 2/5] =?UTF-8?q?fix:=20CI=20utils=20import=20test=20?= =?UTF-8?q?=E2=80=94=20use=20full=20package=20paths=20from=20project=20roo?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ci.yml: remove cd utils + sys.path.insert hack, import as utils.xxx from project root (was causing 'No module named utils' for quality.flaky_detector which uses 'from utils.paths import') - utils/__init__.py: centralized project root path guard, replaces 9 scattered sys.path.insert calls eliminated in audit fix --- .github/workflows/ci.yml | 11 ++++------- utils/__init__.py | 9 +++++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 53cf7a44..1f147989 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -264,15 +264,12 @@ jobs: - name: 核心 utils 导入测试 run: | - cd utils python -c " - import sys, os - sys.path.insert(0, '.') modules = [ - 'protocols.api_retry_util', 'data.data_factory', 'data.data_masking', - 'reporting.excel_generator', 'quality.flaky_detector', 'performance.jmeter_csv_exporter', - 'performance.jmeter_result_parser', 'infra.regression_scope', - 'quality.ci_quality_gate', 'design.prd_loader', + 'utils.protocols.api_retry_util', 'utils.data.data_factory', 'utils.data.data_masking', + 'utils.reporting.excel_generator', 'utils.quality.flaky_detector', 'utils.performance.jmeter_csv_exporter', + 'utils.performance.jmeter_result_parser', 'utils.infra.regression_scope', + 'utils.quality.ci_quality_gate', 'utils.design.prd_loader', ] failed = [] for m in modules: diff --git a/utils/__init__.py b/utils/__init__.py index 44d5e739..4c1fd88f 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,6 +1,15 @@ # SPDX-License-Identifier: MIT """Test-Agent utils package.""" +import sys as _sys +from pathlib import Path as _Path + +# Ensure project root is importable regardless of cwd. +# Replaces 9 scattered sys.path.insert calls across utils/ submodules. +_project_root = str(_Path(__file__).resolve().parent.parent) +if _project_root not in _sys.path: + _sys.path.insert(0, _project_root) + import logging as _logging From 7c8ed2115e12d6848b5fe3527f100458645a32f8 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Fri, 12 Jun 2026 20:32:29 +0800 Subject: [PATCH 3/5] fix: add missing 'import sys' to CI utils import test The removal of 'import sys, os' also removed the 'sys' that sys.exit() depends on. All 10 module imports pass but the script exits 1 due to NameError on sys. --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1f147989..7e853399 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -265,6 +265,7 @@ jobs: - name: 核心 utils 导入测试 run: | python -c " + import sys modules = [ 'utils.protocols.api_retry_util', 'utils.data.data_factory', 'utils.data.data_masking', 'utils.reporting.excel_generator', 'utils.quality.flaky_detector', 'utils.performance.jmeter_csv_exporter', From f5e5b5c665d8d54ff928c4a3537f18bde4846fc5 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Fri, 12 Jun 2026 21:15:31 +0800 Subject: [PATCH 4/5] chore: remove internal governance labels and hardcoded version refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove all 主宪章/铁律/不可逆禁止/决策不可逆 labels across 190+ files - Remove internal §XX section references (ISTQB/hermes external refs kept) - Remove hardcoded version strings (V1.x, V1.0.0, etc.) from comments/docstrings - VERSION file remains single source of truth (V1.0.0) - Preserve all business semantics: '(mock 兜底, selftest 允许)' etc. - All .py imports verified: experts=16, skills=32 --- .github/workflows/ci.yml | 6 +- .github/workflows/selftest-weekly.yml | 4 +- .pre-commit-config.yaml | 10 +- ...71\347\233\256\345\257\274\350\210\252.md" | 36 ++-- CHANGELOG.md | 188 +++++++++--------- CONTRIBUTING.md | 2 +- FULL_GUIDE.md | 2 +- ROADMAP.md | 60 +++--- SECURITY.md | 2 +- ai/CLAUDE.md | 2 +- ...13\350\257\225\346\211\247\350\241\214.md" | 2 +- "ai/agents/08-Bug\347\256\241\347\220\206.md" | 2 +- ...27\351\200\217\346\265\213\350\257\225.md" | 20 +- ...46\350\275\275\346\265\213\350\257\225.md" | 24 +-- ai/agents/README.md | 2 +- ai/skills/agent-introspection-debugging.md | 16 +- ai/skills/automotive-hil-loop-test.md | 4 +- ai/skills/automotive-ota-update-test.md | 2 +- ai/skills/automotive-test.md | 10 +- ai/skills/build-your-own-x-explorer.md | 10 +- ai/skills/e2e-testing.md | 14 +- ai/skills/eval-harness.md | 6 +- ai/skills/pentest-coordinator.md | 10 +- ai/skills/pentest-exploit.md | 4 +- ai/skills/pentest-report.md | 4 +- ai/skills/pentest-vuln.md | 2 +- ai/skills/security-review.md | 6 +- ai/skills/tdd-workflow.md | 12 +- ai/skills/testcase-design.md | 4 +- ai/skills/verification-loop.md | 16 +- ai/skills/zentao-bug-submission.md | 2 +- ci/INDEX.md | 2 +- deploy/config/.mcp.json | 2 +- deploy/config/INDEX.md | 2 +- deploy/config/conftest.py | 2 +- deploy/config/llm-providers.md | 6 +- deploy/config/mcp-server-impl.md | 2 +- deploy/config/requirements.txt | 4 +- deploy/config/templates/INDEX.md | 4 +- deploy/config/templates/STARTUP.md.tpl | 2 +- deploy/config/templates/base.env.tpl | 4 +- deploy/config/templates/base.tagent.yml.tpl | 10 +- deploy/config/templates/matrix.yaml | 6 +- deploy/marketplace/INDEX.md | 8 +- deploy/marketplace/_safety_policy.yaml | 4 +- deploy/marketplace/registry.json | 2 +- deploy/profiles/INDEX.md | 4 +- deploy/profiles/compliance/INDEX.md | 6 +- docs/INDEX.md | 4 +- docs/MASTER_PLAN.md | 4 +- docs/STYLE.md | 2 +- docs/assets/demo-script-v1.12.md | 8 +- docs/assets/terminalizer-config.yml | 2 +- ...51\230\262mock\351\227\255\347\216\257.md" | 10 +- docs/case-studies/INDEX.md | 10 +- docs/charter/01-vision-dimensions.md | 10 +- docs/charter/03-agentchat-protocol.md | 2 +- docs/charter/05-install-deploy.md | 2 +- docs/charter/06-test-architecture.md | 2 +- docs/charter/07-runtime-license.md | 12 +- ...30\347\211\251\346\270\205\345\215\225.md" | 2 +- ...77\347\224\250\346\211\213\345\206\214.md" | 2 +- ...50\347\275\262\350\257\264\346\230\216.md" | 2 +- ...15\347\275\256\346\270\205\345\215\225.md" | 2 +- .../03-foundation/test-pyramid-2024.zh.md | 6 +- docs/theory/04-strategy/shift-left.zh.md | 2 +- .../05-methods/equivalence-partitioning.zh.md | 2 +- docs/theory/06-protocols/http-https.zh.md | 2 +- docs/theory/08-gates/flaky-vs-reruns.zh.md | 2 +- docs/theory/12-process/bug-lifecycle.zh.md | 4 +- docs/theory/13-build-your-own/INDEX.md | 6 +- docs/theory/13-build-your-own/byox-bot.zh.md | 4 +- .../byox-programming-language.zh.md | 2 +- .../byox-search-engine.zh.md | 2 +- .../13-build-your-own/byox-web-browser.zh.md | 2 +- docs/theory/INDEX.md | 8 +- docs/theory/_authority_sources.yaml | 2 +- docs/theory/_schema.yaml | 4 +- examples/INDEX.md | 4 +- examples/_smoke_prd.md | 2 +- runtime/INDEX.md | 2 +- runtime/api/deps.py | 2 +- runtime/api/main.py | 2 +- runtime/backends/INDEX.md | 2 +- runtime/backends/__init__.py | 2 +- runtime/backends/base.py | 2 +- runtime/backends/daytona.py | 4 +- runtime/backends/docker.py | 2 +- runtime/backends/local.py | 2 +- runtime/backends/modal.py | 4 +- runtime/backends/singularity.py | 2 +- runtime/backends/ssh.py | 2 +- runtime/backends/vercel_sandbox.py | 2 +- runtime/cli/config.py | 4 +- runtime/config/safety.py | 4 +- runtime/docker-compose.app.yml | 2 +- runtime/essence_watcher/INDEX.md | 10 +- runtime/essence_watcher/__init__.py | 4 +- .../essence_watcher/apply_policy.example.yaml | 4 +- runtime/essence_watcher/runner.py | 4 +- runtime/exporters/INDEX.md | 10 +- runtime/exporters/__init__.py | 2 +- runtime/gateway/__init__.py | 2 +- runtime/gateway/base.py | 2 +- runtime/gateway/session.py | 2 +- runtime/healthcheck/INDEX.md | 6 +- runtime/healthcheck/__init__.py | 2 +- runtime/healthcheck/llm_smoke.py | 2 +- runtime/init/INDEX.md | 10 +- runtime/init/__init__.py | 2 +- runtime/init/wizard.py | 4 +- runtime/learning_loop/INDEX.md | 2 +- runtime/learning_loop/__init__.py | 4 +- runtime/learning_loop/curator.py | 4 +- runtime/learning_loop/session_search.py | 2 +- runtime/learning_loop/user_model.py | 2 +- runtime/marketplace/INDEX.md | 10 +- runtime/marketplace/__init__.py | 2 +- runtime/marketplace/catalog.py | 2 +- runtime/marketplace/installer.py | 6 +- runtime/marketplace/verifier.py | 2 +- runtime/mcp/INDEX.md | 6 +- runtime/mcp/__init__.py | 8 +- runtime/mcp/base.py | 10 +- runtime/mcp/compliance_checker/__init__.py | 4 +- runtime/mcp/compliance_checker/server.py | 4 +- runtime/mcp/defect_tracker/__init__.py | 2 +- runtime/mcp/defect_tracker/base.py | 2 +- runtime/mcp/defect_tracker/server.py | 6 +- runtime/mcp/evidence_vault/server.py | 2 +- runtime/mcp/protocol_adapter/adapters.py | 2 +- runtime/mcp/protocol_adapter/base.py | 2 +- runtime/mcp/test_orchestrator/server.py | 2 +- runtime/orchestrator/adapters/experts.py | 30 +-- runtime/orchestrator/adapters/scripts.py | 4 +- runtime/orchestrator/agents/INDEX.md | 18 +- runtime/orchestrator/agents/__init__.py | 30 +-- .../orchestrator/agents/automotive_tester.py | 4 +- runtime/orchestrator/agents/base.py | 6 +- runtime/orchestrator/agents/bug_manager.py | 6 +- runtime/orchestrator/agents/env_manager.py | 4 +- runtime/orchestrator/agents/mobile_tester.py | 4 +- runtime/orchestrator/agents/pentest_tester.py | 6 +- runtime/orchestrator/agents/system_tester.py | 4 +- runtime/orchestrator/agents/test_executor.py | 2 +- runtime/orchestrator/agents/test_lead.py | 12 +- runtime/orchestrator/agents/visual_tester.py | 4 +- runtime/orchestrator/direct.py | 4 +- runtime/orchestrator/flows.py | 4 +- runtime/orchestrator/skills/__init__.py | 4 +- .../skills/agent_introspection_debugging.py | 2 +- .../skills/automotive_adas_scenario.py | 2 +- .../skills/automotive_can_bus_test.py | 2 +- .../skills/automotive_hil_loop_test.py | 2 +- .../skills/automotive_ota_update_test.py | 2 +- .../orchestrator/skills/automotive_test.py | 2 +- .../skills/build_your_own_x_explorer.py | 6 +- runtime/orchestrator/skills/eval_harness.py | 2 +- runtime/orchestrator/skills/mobile_test.py | 2 +- runtime/orchestrator/skills/pentest_api.py | 4 +- .../skills/pentest_coordinator.py | 12 +- .../orchestrator/skills/pentest_exploit.py | 6 +- runtime/orchestrator/skills/pentest_recon.py | 4 +- runtime/orchestrator/skills/pentest_report.py | 4 +- runtime/orchestrator/skills/pentest_vuln.py | 4 +- runtime/orchestrator/skills/pentest_web.py | 4 +- runtime/orchestrator/skills/system_test.py | 2 +- runtime/orchestrator/skills/visual_test.py | 2 +- runtime/registry/registry.py | 4 +- runtime/router/retrieval.py | 2 +- runtime/router/router.py | 4 +- runtime/router/schema.py | 2 +- runtime/scheduler/INDEX.md | 2 +- runtime/scheduler/injection_scan.py | 6 +- runtime/scheduler/jobs.py | 2 +- runtime/scheduler/scheduler.py | 4 +- runtime/storage/objects.py | 2 +- runtime/subagent/INDEX.md | 2 +- runtime/subagent/aux_client.py | 2 +- runtime/subagent/pool.py | 4 +- runtime/subagent/spawn.py | 2 +- runtime/tests/test_agent_runners.py | 16 +- runtime/tests/test_cli_config.py | 2 +- runtime/tests/test_impl_status_filter.py | 22 +- runtime/tests/test_router.py | 4 +- runtime/tests/test_router_real.py | 8 +- runtime/tests/test_skill_runners.py | 34 ++-- runtime/tutor/INDEX.md | 2 +- runtime/tutor/__init__.py | 2 +- runtime/tutor/eval_replay.py | 4 +- runtime/tutor/explainer.py | 6 +- runtime/tutor/feedback.py | 2 +- runtime/tutor/graph.py | 2 +- runtime/tutor/i18n.py | 2 +- runtime/tutor/theory_kb.py | 2 +- runtime/tutor/verbosity.py | 2 +- runtime/web/INDEX.md | 6 +- scripts/INDEX.md | 4 +- scripts/analyze-usage.py | 8 +- utils/README.md | 2 +- utils/a11y_i18n/a11y_scanner.py | 2 +- utils/data/data_factory.py | 2 +- utils/data/db_test_helper.py | 2 +- utils/design/suite_minimizer.py | 2 +- utils/performance/chaos_helper.py | 2 +- utils/platforms/mobile_driver.py | 2 +- utils/reporting/generate_report.py | 4 +- utils/security/ai_adversarial.py | 2 +- utils/security/api_security_scanner.py | 2 +- utils/testing/bdd_runner.py | 2 +- utils/testing/state_machine_tester.py | 2 +- workspace/_demo/STARTUP.md | 2 +- workspace/_demo/tagent.yml | 10 +- 213 files changed, 645 insertions(+), 645 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e853399..9c484a3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -347,7 +347,7 @@ jobs: { echo "❌ .gitignore 未排除源 MD"; exit 1; } echo "✅ 敏感本地文档受保护" - - name: 验证 .gitignore 排除精髓库(V1.10 §34) + - name: 验证 .gitignore 排除精髓库 run: | if [ -d "_精髓库" ] || [ -d "**/_精髓库" ]; then echo "❌ 私有知识库不应出现在仓库" @@ -357,7 +357,7 @@ jobs: { echo "❌ .gitignore 未排除私有知识库"; exit 1; } echo "✅ 私有知识库受保护" - # ===== 8. L2 self-test(mock LLM,主宪章 §33,V1.10)===== + # ===== 8. L2 self-test===== selftest-mock: name: L2 self-test · mock LLM e2e runs-on: ubuntu-latest @@ -375,7 +375,7 @@ jobs: # 兜底:确保关键运行时模块在 pip install pydantic pydantic-settings typer rich loguru pyyaml openpyxl factory-boy faker prefect defusedxml prompt-toolkit - - name: L1 frontmatter lint(主宪章 §33 第 1 层) + - name: L1 frontmatter lint run: | python -m runtime.healthcheck.agent_smoke diff --git a/.github/workflows/selftest-weekly.yml b/.github/workflows/selftest-weekly.yml index da1d2840..2f90138e 100644 --- a/.github/workflows/selftest-weekly.yml +++ b/.github/workflows/selftest-weekly.yml @@ -1,4 +1,4 @@ -name: 周自检 · L4 真 LLM e2e(主宪章 §33) +name: 周自检 · L4 真 LLM e2e on: schedule: @@ -12,7 +12,7 @@ permissions: env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true -# 仅 main 分支 push 触发,fork PR 不跑(防 secrets 泄露,主宪章 §30) +# 仅 main 分支 push 触发,fork PR 不跑 jobs: selftest-weekly: name: L4 · 真 LLM e2e diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 99f9b29d..42ca2721 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: trailing-whitespace exclude: \.md$ - id: end-of-file-fixer - # upstream skill 目录不动(主宪章 §14 §27 darwin-skill / karpathy-guidelines 禁本地 fork) + # upstream skill 目录不动 exclude: ^skills/(darwin-skill|karpathy-guidelines)/ - id: check-yaml - id: check-json @@ -56,10 +56,10 @@ repos: pass_filenames: false always_run: true - # 防上游引用误入(主宪章 §29 + V1.10 铁律):上游参考是私有跨项目知识,绝不入 Test-Agent repo + # 防上游引用误入:上游参考是私有跨项目知识,绝不入 Test-Agent repo - id: forbid-essence-library name: 禁止提交 upstream 参考路径(私有跨项目知识) - entry: bash -c 'M=$(git diff --cached --name-only | grep -E "(^|/)_?精髓库(/|$)" || true); if [ -n "$M" ]; then echo "❌ 禁止提交私有上游参考内容(主宪章 §29 + V1.10 铁律)"; echo "命中文件:"; echo "$M"; echo "处置:git rm --cached <file> && 移出 repo"; exit 1; fi' + entry: bash -c 'M=$(git diff --cached --name-only | grep -E "(^|/)_?精髓库(/|$)" || true); if [ -n "$M" ]; then echo "❌ 禁止提交私有上游参考内容"; echo "命中文件:"; echo "$M"; echo "处置:git rm --cached <file> && 移出 repo"; exit 1; fi' language: system pass_filenames: false always_run: true @@ -72,7 +72,7 @@ repos: pass_filenames: false always_run: true - # L1 自检:agent/skill frontmatter lint(主宪章 §33,V1.10) + # L1 自检:agent/skill frontmatter lint - id: agent-frontmatter-lint name: Agent/Skill frontmatter L1 自检 entry: bash -c 'cd "$(git rev-parse --show-toplevel)" && python -m runtime.healthcheck.agent_smoke || exit 1' @@ -102,7 +102,7 @@ repos: # MD040(fenced-code-language) / MD014(dollar-prefix) / MD009(trailing) / MD012(multi-blank) / MD010(hard-tab) # / MD025(single-h1) / MD026(trailing-punct):中文项目常见 nit,与现有 disable 风格一致 args: ['--disable', 'MD013', 'MD033', 'MD041', 'MD036', 'MD022', 'MD031', 'MD032', 'MD024', 'MD034', 'MD040', 'MD014', 'MD025', 'MD026', 'MD050', 'MD049', 'MD007', 'MD035', 'MD038', 'MD039', '--'] - # upstream skill dirs 不改本地:darwin-skill / karpathy-guidelines(主宪章 §14 §27) + # upstream skill dirs 不改本地:darwin-skill / karpathy-guidelines exclude: ^(skills/(darwin-skill|karpathy-guidelines)/.*)$ # 配置:项目根 .markdownlint.json 自定义规则 diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index 3c657be6..4af95e78 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -7,7 +7,7 @@ ## 零、V1.1.0 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 79 脚本**保持不动**(宪章铁律),runtime 仅作可执行调度层。 +> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 79 脚本**保持不动**(宪章规则),runtime 仅作可执行调度层。 | 模块 | 路径 | 用途 | |------|------|------| @@ -16,10 +16,10 @@ | 编排 | `runtime/orchestrator/` | Prefect 2.x flow + Direct 降级执行器(双轨) | | API | `runtime/api/` | FastAPI 入口,多格式输入(PDF/Word/MD/exe/APK/IPA/Docker/URL/口头) | | CLI | `runtime/cli/` | `tagent run|plan|catalog|doctor|search|install|verify|export|selftest|init|demo` | -| 导出 | `runtime/exporters/` | 用例多格式导出:xmind/markmap/opml(V1.9,用户自选) | -| 自检 | `runtime/healthcheck/` | 4 层 selftest:L1 lint/L2 mock/L3 真 LLM/L4 周自检(V1.10) | -| 配置 init | `runtime/init/` | `tagent init` 5 分钟自动组装 .env + tagent.yml + STARTUP.md(V1.12,8 类型 × 6 平台 × 5 LLM × 6 BugTracker × 6 通知 = 8640 组合) | -| 真 agent | `runtime/orchestrator/agents/` | 5 核心 expert LLM 真跑:requirements/automation/test-executor/bug-manager/test-lead(V1.14) | +| 导出 | `runtime/exporters/` | 用例多格式导出:xmind/markmap/opml | +| 自检 | `runtime/healthcheck/` | 4 层 selftest:L1 lint/L2 mock/L3 真 LLM/L4 周自检 | +| 配置 init | `runtime/init/` | `tagent init` 5 分钟自动组装 .env + tagent.yml + STARTUP.md | +| 真 agent | `runtime/orchestrator/agents/` | 5 核心 expert LLM 真跑:requirements/automation/test-executor/bug-manager/test-lead | | 飞轮 | `runtime/storage/` | Postgres+pgvector + MinIO + Alembic | | 观测 | `runtime/observability/` | OpenTelemetry + Loguru | | 配置 | `runtime/config/settings.py` | pydantic-settings(`TAGENT_*` env) | @@ -52,7 +52,7 @@ | `/smoke-test` | `ai/skills/smoke-test.md` | P0 冒烟(≥95% 门禁) | | `/test-coordinator` | `ai/skills/test-coordinator.md` | 完整流程编排(自动平台路由) | | `/regression-test` | `ai/skills/regression-test.md` | P0+P1 回归 + Flaky + JMeter 基线对比 | -| `/testcase-design` | `ai/skills/testcase-design.md` | 默认 4 Sheet Excel,V1.9 加 xmind/markmap/opml 多格式自选 | +| `/testcase-design` | `ai/skills/testcase-design.md` | 默认 4 Sheet Excel | `/python-script-gen` | `ai/skills/python-script-gen.md` | pytest UI/API 脚本 | | `/jmeter-script-gen` | `ai/skills/jmeter-script-gen.md` | JMeter JMX(双模式 ci_quick/full) | | `/data-preparation` | `ai/skills/data-preparation.md` | 测试数据 + JMeter CSV | @@ -271,7 +271,7 @@ --- -## V1.2.0 · MCP 6 件套(2026-05-11) +## · MCP 6 件套(2026-05-11) | MCP server | 路径 | 工具 | |------------|------|------| @@ -283,11 +283,11 @@ | compliance-checker | `runtime/mcp/compliance_checker/` | list_profiles/get_profile/check_compliance | 合规规则插槽:`profiles/compliance/`(10 框架空载 YAML) -Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) +Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + L2 测试套件) --- -## V1.3.0 · Hermes-inspired 5 模块(2026-05-11) +## · Hermes-inspired 5 模块(2026-05-11) | 模块 | 路径 | 用途 | |------|------|------| @@ -297,11 +297,11 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) | 后端 | `runtime/backends/` | local/docker/ssh/singularity/modal/daytona/vercel_sandbox | | Gateway | `runtime/gateway/` | telegram/discord/slack/wechat/feishu/dingtalk/email/webhook | -运行时 prompt 全扫 / 决策不可逆 / 隔离 client / Backend+Platform 抽象。 +运行时 prompt 全扫 / 不可逆操作 / 隔离 client / Backend+Platform 抽象。 --- -## V1.4.0 · 教学层(2026-05-12) +## · 教学层(2026-05-12) 用户部署后可**边用边学**:exec(老手)/learn(新手)/silent(CI) 三模式 + zh/en/zh-en 三语言。 @@ -316,7 +316,7 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) --- -## V1.5.0 · GBrain-inspired 强化(2026-05-12) +## · GBrain-inspired 强化(2026-05-12) | 能力 | 路径 | |------|------| @@ -328,7 +328,7 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) --- -## V1.6.0 · 渗透&安全 + 车载&自动驾驶(2026-05-12) +## · 渗透&安全 + 车载&自动驾驶(2026-05-12) ### 2 新专家(平台扩展 6+7) @@ -356,11 +356,11 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) 总数:14 专家 → **16** | 14 skill → **32** -installing/upstream-licensing 收录铁律。 +installing/upstream-licensing 收录规则。 --- -## V1.7.0 · Karpathy + ECC + Essence 自动汲取(2026-05-12) +## · Karpathy + ECC + Essence 自动汲取(2026-05-12) ### 新 skill(7 个) @@ -382,11 +382,11 @@ installing/upstream-licensing 收录铁律。 总数:14 → 16 专家 | 14 → **32** skill -marketplace/agent-introspection/essence-watcher 铁律。 +marketplace/agent-introspection/essence-watcher 规则。 --- -## V1.8.0 · byox + Marketplace(2026-05-12) +## · byox + Marketplace(2026-05-12) ### 教学层 KB 扩 13 类 @@ -404,4 +404,4 @@ marketplace/agent-introspection/essence-watcher 铁律。 | `runtime/marketplace/` | catalog + verifier + installer | | CLI `tagent search/list/install/uninstall/verify` | 5 子命令 | -byox/build-your-own 收录铁律。 +byox/build-your-own 收录规则。 diff --git a/CHANGELOG.md b/CHANGELOG.md index 1761cc69..19fa84cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -250,7 +250,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ### Changed - README: 数字诚实化(8640→~12 CI-validated; 95% aspirational→removed; 32 skills→30 active+2 vision) -- 00-项目导航/ROADMAP: 清除12处主宪章内部引用 +- 00-项目导航/ROADMAP: 清除12处内部引用 - generate_report.py: 143行→6 helper functions - mobile_driver.py: 107行→_build_monkey_cmd+_analyze_monkey_log @@ -300,37 +300,37 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ## [v1.32.0] - 2026-05-17 > **V1.15.0 → V1.32.0 (2026-05-15 ~ 2026-05-17) 共 17 版累积**。 -> expert rollout 收尾 (V1.15-V1.20) + skill rollout 全 16/16 完成 (V1.21-V1.31)。 +> expert rollout 收尾 + skill rollout 全 16/16 完成 。 > 版本历史见 [ROADMAP.md](ROADMAP.md#进度跟踪) 进度跟踪表。 -### Added (V1.15-V1.20 · expert rollout 收尾 — 6 expert 真 LLM 落地) +### Added - **6 expert runner LLM-driven** (`runtime/orchestrator/agents/`): - - `env_manager.py` (V1.15) · 测试环境管理 — LLM 读 PRD → env_checks + prep_steps + dependencies + risks - - `mobile_tester.py` (V1.16) · 移动端 — Android/iOS 原生 + 小程序 - - `visual_tester.py` (V1.17) · 视觉/游戏 — 图像识别 + OCR + AI 视觉对比 - - `system_tester.py` (V1.18) · 系统集成 — IoT/串口/MQTT/音视频/追踪/消息队列 - - `pentest_tester.py` (V1.19) · 渗透安全 — 5 攻击域 + recon/vuln/exploit/reporting 4 阶段 - - `automotive_tester.py` (V1.20) · 车载/自动驾驶 — ISO 26262 ASIL + CAN/LIN/FlexRay + ECU/ADAS/IVI/V2X + - `env_manager.py` · 测试环境管理 — LLM 读 PRD → env_checks + prep_steps + dependencies + risks + - `mobile_tester.py` · 移动端 — Android/iOS 原生 + 小程序 + - `visual_tester.py` · 视觉/游戏 — 图像识别 + OCR + AI 视觉对比 + - `system_tester.py` · 系统集成 — IoT/串口/MQTT/音视频/追踪/消息队列 + - `pentest_tester.py` · 渗透安全 — 5 攻击域 + recon/vuln/exploit/reporting 4 阶段 + - `automotive_tester.py` · 车载/自动驾驶 — ISO 26262 ASIL + CAN/LIN/FlexRay + ECU/ADAS/IVI/V2X - expert 16/16 **全 active** (11 production + 5 script, 0 rollout) -### Added (V1.21-V1.31 · skill rollout — 16 skill LLM-driven 全落地) +### Added -- **SkillRunner 基础设施** (V1.21): `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco +- **SkillRunner 基础设施** : `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco - **16 skill runner LLM-driven** (`runtime/orchestrator/skills/`): - - pentest-coordinator (V1.21) · 渗透流程主编排 (5 阶段 + 授权检查 + 子代理池) - - mobile-test (V1.23) · Android/iOS 双平台 + 小程序 - - visual-test (V1.24) · Airtest + OCR + SSIM 视觉对比 - - pentest-recon + pentest-vuln (V1.25) · 侦察 + 漏洞 (5 攻击域 hybrid) - - system-test (V1.26) · IoT/音视频/追踪/消息队列 4 场景 - - eval-harness (V1.27) · pass@k / Jaccard@k / top-1 stability / latency + 安全护栏 - - pentest-api + pentest-web (V1.28) · API 安全 + Web 应用安全 - - pentest-exploit + pentest-report (V1.29) · PoC 验证 + 报告生成 - - automotive-test + automotive-can-bus-test (V1.30) · 10 阶段主编排 + CAN 协议测试 - - automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test (V1.31) · ADAS 场景 + OTA 升级 + HIL 环测 + - pentest-coordinator · 渗透流程主编排 (5 阶段 + 授权检查 + 子代理池) + - mobile-test · Android/iOS 双平台 + 小程序 + - visual-test · Airtest + OCR + SSIM 视觉对比 + - pentest-recon + pentest-vuln · 侦察 + 漏洞 (5 攻击域 hybrid) + - system-test · IoT/音视频/追踪/消息队列 4 场景 + - eval-harness · pass@k / Jaccard@k / top-1 stability / latency + 安全护栏 + - pentest-api + pentest-web · API 安全 + Web 应用安全 + - pentest-exploit + pentest-report · PoC 验证 + 报告生成 + - automotive-test + automotive-can-bus-test · 10 阶段主编排 + CAN 协议测试 + - automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test · ADAS 场景 + OTA 升级 + HIL 环测 - skill 23/32 production + 7 script + 0 rollout + 2 vision -### Added (V1.22 · CLI + 多 provider) +### Added (CLI + 多 provider) - **tagent config CLI** (`runtime/cli/config.py`): `tagent config use/set/unset/list/show` - **多 provider 通用 env 通道**: `LLM_PROVIDER` + `LLM_API_KEY` + `LLM_MODEL` 通用 env, 6 provider 内置 (claude/openai/gemini/qwen/deepseek/ollama) @@ -347,7 +347,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ### Changed - **数字升级**: 14 expert → 16 expert, 14 skill → 32 skill -- **防 mock 双 layer** (V1.14.0+1): registry 单源 frontmatter + router warn + orchestrator hard block +- **防 mock 双 layer** : registry 单源 frontmatter + router warn + orchestrator hard block - vendor-neutral 命名规范 (禁 zhipu/deepseek/openai 厂商名) - README/ROADMAP 数字诚实化 (多轮校准) @@ -363,7 +363,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ > **首次正式版本切节**(W7-2, 2026-05-14): V1.1.0 到 V1.14.0 共 13 个内部 alpha 累积归入本节。后续新变更入 [Unreleased]。 -### Added(V1.14.0 · 5 核心 expert 真 LLM 落地 + 录制脚本 · 2026-05-12) +### Added(5 核心 expert 真 LLM 落地 + 录制脚本 · 2026-05-12) > 起因:战略参谋诚实交底——V1.13 的 selftest 100% PASS 是"骨架通"不是"内涵通",16 expert 里 11 个仍是 no-op。用户授权 C 路线(5 核心 expert 真 LLM)+ B(录制脚本)。 @@ -375,7 +375,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **真 LLM** 时:调 `aux_client.complete()` → 解析 JSON → 落盘 → 给下游 - INDEX.md 文档化 5 runner schema + 上下游 - **adapter wiring**(`runtime/orchestrator/adapters/experts.py`): - - `execute_node` 先查 `AGENT_RUNNERS`(优先 V1.14),fallback `SCRIPT_MAP`(主宪章 §9 不破坏) + - `execute_node` 先查 `AGENT_RUNNERS`(优先 V1.14),fallback `SCRIPT_MAP` - `_upstream_outputs` 缓存:每 runner 产物给下游 RunnerContext.upstream - `reset_upstream_cache()` 由 flow 每 run 开头调 - SCRIPT_MAP 路径排除 `artifact_text/lang/mode` 防多行文本炸 argparse @@ -392,11 +392,11 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `record-demo-asciinema.sh`:`asciinema rec` 自动录,产 .cast 可上传 asciinema.org 或转 GIF/SVG - `record-demo-obs.sh`:OBS / QuickTime 屏幕录制配套(用户摁录制 → 跑此脚本,节奏自动) - `docs/assets/terminalizer-config.yml`:精修 V1.14 配置(Catppuccin Mocha 主题 + UTF-8 + stub LLM env) -- **主宪章 §40 真 agent 落地 canon**:5 核心 + 11 fallback + 加新 runner 流程 + RunnerContext / RunnerResult 协议 -- 烟测:**9/9 strict PASS · 5 真 runner 产物全落盘**(原 V1.13 8/8 是 3 script + 5 no-op,V1.14 是 5 真 runner + 3 script + 1 no-op) +- **真 agent 落地 canon**:5 核心 + 11 fallback + 加新 runner 流程 + RunnerContext / RunnerResult 协议 +- 烟测:**9/9 strict PASS · 5 真 runner 产物全落盘**(原 V1.13 8/8 是 3 script + 5 no-op - 版本 V1.13.0 → **V1.14.0** -### Added(V1.13.0 · README hero 重写 + `tagent demo` + 30 秒 demo 录制脚本 · 2026-05-12) +### Added(README hero 重写 + `tagent demo` + 30 秒 demo 录制脚本 · 2026-05-12) - **`tagent demo` 子命令**:0 API key / 0 配置一键跑通 4 步——init minimal preset + L1 lint + L2 e2e + 产物清单 - 自动 stub LLM + 重置 settings 缓存,避免 `_kernel` 模块加载顺序问题 @@ -413,7 +413,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - 烟测 `tagent demo` 产 36+ 文件全过 · L1/L3 strict 不破 - 版本 V1.12.0 → **V1.13.0** -### Added(V1.12.0 · `tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 2026-05-12) +### Added(`tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 2026-05-12) - **新模块 `runtime/init/`**: - `matrix.py`:`load_matrix()` 加载 `config/templates/matrix.yaml`(单源真理) @@ -433,30 +433,30 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - 测试类型:web/api/mobile/desktop/iot/car/ai_model/security - 平台:linux/windows/mac/android/ios/embedded - LLM:claude/openai/qwen/deepseek/ollama - - BugTracker(主宪章 §37):zentao/jira/github/gitlab/linear/webhook - - 通知(主宪章 §36):wechat/feishu/dingtalk/slack/email/teams(可多选) + - BugTracker:zentao/jira/github/gitlab/linear/webhook + - 通知:wechat/feishu/dingtalk/slack/email/teams(可多选) - **加新选项**:改 `matrix.yaml` 一处,wizard/CLI 自动列出(无需改代码) - **STARTUP.md 启动指南**:含填占位清单 + 装依赖 hint + 健康检查 + 烟雾跑通命令 + 推荐 skill 顺序 + 出错对照表 - 烟测:5 preset × 全过 + 8 测试类型组合全过 - L1 + L3 strict 不破:agents=16/16 skills=32/≥25 + selftest 8/8 100% - 版本 V1.11.0 → **V1.12.0** -### Fixed(V1.11.0 · 同步铁律批改 + BugTracker/多端 canon + n7 修 · 2026-05-12) +### Fixed(同步规则批改 + BugTracker/多端 canon + n7 修 · 2026-05-12) -- **同步铁律(§1)执行**:17 文件批改"三端通知"→"多端通知";"禅道 Bug 提交"项目级框架→"BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook)" +- **同步规则()执行**:17 文件批改"三端通知"→"多端通知";"禅道 Bug 提交"项目级框架→"BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook)" - `00-项目导航.md` · `agents/{01,07,08,09}.md` · `agents/README.md` · `skills/{README,test-coordinator,zentao-bug-submission}.md` · `config/mcp-server-impl.md` · `utils/{README.md,api_retry_util.py}` · `ci/{INDEX,CICD集成说明}.md` · `docs/getting-started/{交付物清单,使用手册,配置清单}.md` · `examples/web-demo/README.md` · `CONTRIBUTING.md` · `FULL_GUIDE.md` - **adapter 修 V1.10 n7 bug**:`runtime/orchestrator/adapters/experts.py` 加 `SCRIPT_DEFAULT_ARGS` + `_ensure_fixture()` 通用机制 - 现 `tagent selftest --e2e --strict` **100% PASS 8/8**(原 88% 7/8) - generate_report.py 默认注入 `--data=workspace/测试报告/{项目名}/_selftest_summary.json`,fixture 自动生成 -- **主宪章扩**: - - §36 多端通知 canon(扩 §6,6 渠道权威清单 + env 字段 + 业务语言铁律) - - §37 BugTracker canon(扩 §12,6 adapter 权威清单 + measurement env + 措辞规范) - - §10 五铭文 + §6 MCP 接入:"三端通知" → "多端通知" +- **扩**: + - 多端通知 canon(扩 ,6 渠道权威清单 + env 字段 + 业务语言规则) + - BugTracker canon(扩 ,6 adapter 权威清单 + measurement env + 措辞规范) + - 五铭文 + MCP 接入:"三端通知" → "多端通知" - VERSION:1.10.0 → **1.11.0** -### Added(V1.10.0 · 4 层自检 + 精髓库三重防线 + 字体粗细 · 2026-05-12) +### Added(4 层自检 + 精髓库三重防线 + 字体粗细 · 2026-05-12) -- **4 层自检铁律(主宪章 §33)**: +- **4 层自检规则**: - L1 frontmatter lint(无 LLM):`runtime/healthcheck/agent_smoke.py` + pre-push hook - L2 CI mock e2e(stub LLM,0 成本):`selftest-mock` job 每 push 跑 - L3 真 LLM(~$4/release):`tagent doctor --agents --probe` + `tagent selftest --e2e` @@ -466,20 +466,20 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `tagent doctor --agents [--probe]`:L1 + 可选 L3 LLM ping 16 agent - `tagent selftest --e2e [--strict] [--pass-threshold 0.80]`:整体 e2e - **`LLMClient.complete()`**:plain text completion(原仅 `complete_json`),probe 用 -- **上游引用三重防线(主宪章 §34)**: +- **上游引用三重防线**: - `.gitignore` 加 upstream 排除规则 - - pre-commit hook 防止误入(主宪章 §29 + V1.10 铁律) + - pre-commit hook 防止误入 - CI file-count job 双校验 -- **字体粗细统一(主宪章 §35)**:`docs/STYLE.md`(标题 ≤3 级,`**bold**` 仅 3 场景,中英空格) +- **字体粗细统一**:`docs/STYLE.md`(标题 ≤3 级,`**bold**` 仅 3 场景,中英空格) - **补缺顶级 INDEX**:`docs/INDEX.md` + `examples/INDEX.md` + `profiles/INDEX.md` + `scripts/INDEX.md` - **pre-tag hook**:`scripts/git-pre-tag.sh` 卡 `git tag v1.x`(7 天内必须有 L3 log) - **fixture**:`examples/_smoke_prd.md` 触发完整 16 agent DAG -- 主宪章扩 §33/§34/§35;VERSION 1.9.0 → **1.10.0** +- 扩 //;VERSION 1.9.0 → **1.10.0** - 烟雾测试:L1 16/16+32/≥25 全过;L2 stub e2e 88% PASS(7/8 节点) -### Added(V1.9.0 · 用例多格式导出 · 用户自选 · 2026-05-12) +### Added(用例多格式导出 · 用户自选 · 2026-05-12) -- **`runtime/exporters/` 新模块**(对标主宪章 §5 多格式 I/O): +- **`runtime/exporters/` 新模块**: - `base.py`:`TestCaseTree` + `TestCaseNode` IR + `Exporter` ABC + `REGISTRY` + `@register` 装饰器 - `xmind.py`:XMind 8/ZEN/2020+ `.xmind`(ZIP:content.json + metadata.json + manifest.json,P0→priority-1 marker 自动转,无第三方 lib) - `markmap.py`:Markmap `.md`(frontmatter + nested headings/list,GitHub README 直渲,markmap.js / VSCode 插件兼容) @@ -493,12 +493,12 @@ _后续累积变更入此节;切版本时移到下方版本节。_ tagent export plan.json --format all --out-dir workspace/测试用例/ ``` - **`/testcase-design` skill 扩**:description 加多格式声明;末尾加 V1.9 思维导图 / 大纲段(Excel 仍是默认) -- **保留**:Excel 4-Sheet(`utils/excel_generator.py`)不动,§27 简洁优先 +- **保留**:Excel 4-Sheet(`utils/excel_generator.py`)不动 - **扩展点 P2 留位**:freemind / plantuml / mermaid-mindmap(按需加) - 烟雾测试:3 exporter × sample TestCaseTree 全过(content.json 解析正常 / OPML XML 解析正常 / Markmap frontmatter 完整) - 版本 V1.8.0 → V1.9.0 -### Added(V1.8.0 · build-your-own-x 教学扩 + Marketplace 4 lane · 2026-05-12) +### Added(build-your-own-x 教学扩 + Marketplace 4 lane · 2026-05-12) - **上游参考扩**:`build-your-own-x.md`(codecrafters/build-your-own-x curated list 萃取);加 INDEX 条目 - **教学层 KB 扩 13 大类**(原 12 → 13,加 `13-build-your-own/`): @@ -509,36 +509,36 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `marketplace/{skills,agents,mcp,hooks}/` 目录 - `marketplace/INDEX.md` + `registry.json` + `_safety_policy.yaml`(4 关安全门 + 3 信任级源) - `runtime/marketplace/`:catalog.py + verifier.py + installer.py + INDEX - - 4 关安全门:签名校验(SHA256/ed25519) + 注入扫(复用 §22 scheduler 模块) + 沙箱试跑(Docker network=none) + darwin 评分(≥75) + - 4 关安全门:签名校验(SHA256/ed25519) + 注入扫(复用 scheduler 模块) + 沙箱试跑(Docker network=none) + darwin 评分(≥75) - **CLI 加 5 子命令**:`tagent search/list/install/uninstall/verify` -- **主宪章 §30**:Marketplace 安全栅栏(4 关铁律 + 3 信任级 + safe-by-default + 不复制官方源 + 卸载只归档 + 紧急 kill switch) -- **主宪章 §31**:教学层扩 13 大类(byox P0/P1/P2 分档 + 预算检查 + essence_only policy) +- ****:Marketplace 安全栅栏(4 关规则 + 3 信任级 + safe-by-default + 不复制官方源 + 卸载只归档 + 紧急 kill switch) +- ****:教学层扩 13 大类(byox P0/P1/P2 分档 + 预算检查 + essence_only policy) - TOC 同步;skill 数升级 - 版本 V1.7.0 → V1.8.0 -### Added(V1.7.0 · Karpathy 4 原则 + ECC 测试加固 + Essence 自动汲取 · 2026-05-12) +### Added(Karpathy 4 原则 + ECC 测试加固 + Essence 自动汲取 · 2026-05-12) - **上游参考扩 2 条目**: - `karpathy-skills.md`(125k★ · LLM 写代码 4 原则元层) - `everything-claude-code.md`(179k★ · AI agent harness 性能优化 200 skill / 53 agent / Homunculus instincts / Selective install) -- **Karpathy 4 原则**(主宪章 §27,元层贯穿):Think Before / Simplicity First / Surgical Changes / Goal-Driven Execution;`skills/karpathy-guidelines/SKILL.md` 部署 upstream 原文(类 darwin-skill 不改本地) -- **ECC 6 测试 skill 入库**(对测试有用的,§28): +- **Karpathy 4 原则**:Think Before / Simplicity First / Surgical Changes / Goal-Driven Execution;`skills/karpathy-guidelines/SKILL.md` 部署 upstream 原文(类 darwin-skill 不改本地) +- **ECC 6 测试 skill 入库**(对测试有用的 - `tdd-workflow` · TDD 80%+ 覆盖 - `verification-loop` · 5-phase verify(build→typecheck→lint→test→coverage) - `e2e-testing` · Playwright + 2FA/TOTP/SSO + 视觉回归 + 录屏 - `eval-harness` · pass@k / Jaccard@k / top-1 / latency Δ - - `security-review` · 代码层白盒 5 维(与 §25 pentest 应用层互补) + - `security-review` · 代码层白盒 5 维(与 pentest 应用层互补) - `agent-introspection-debugging` · 决策回放 + OTel + token + 上下文 -- **Essence 自动汲取**(主宪章 §29):`runtime/essence_watcher/` +- **Essence 自动汲取**:`runtime/essence_watcher/` - parser + tracker(gh API)+ delta_extractor(aux LLM)+ runner - 周期跑;新 commit → LLM 萃取 delta → 写 upstream update 文件 标 `llm-draft-unreviewed` 待审 - `apply_policy.example.yaml`:auto_propose / essence_only / never 三档 - safe-by-default:`tagent.yml essence_watcher.enabled: true` 才跑 -- **主宪章新增 3 节**:§27 Karpathy 4 原则 / §28 ECC 测试加固 / §29 Essence 自动汲取 + TOC 同步 +- **新增 3 节**:Karpathy 4 原则 / ECC 测试加固 / Essence 自动汲取 + TOC 同步 - 数字:14 skill → **32**(原 14 + 7 pentest + 5 automotive + 6 ECC) + `karpathy-guidelines/SKILL.md` upstream 1 个 - 版本 V1.6.0 → V1.7.0 -### Added(V1.6.0 · 渗透&安全 + 车载&自动驾驶 双垂直专家+skill 集 · 2026-05-12) +### Added(渗透&安全 + 车载&自动驾驶 双垂直专家+skill 集 · 2026-05-12) - **上游参考扩**:`pentest-ai-agents.md` 合并萃取 pentagi(黑盒)+ shannon(白盒);10 节;含对比表+应用 checklist - **2 新专家**: @@ -548,25 +548,25 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `pentest-coordinator`(主)/ `pentest-recon` / `pentest-vuln` / `pentest-exploit` / `pentest-web` / `pentest-api` / `pentest-report` - **5 新 automotive skill**: - `automotive-test`(主)/ `automotive-can-bus-test` / `automotive-adas-scenario` / `automotive-ota-update-test` / `automotive-hil-loop-test` -- **主宪章 §25**:渗透 & 安全测试强化(规则化:授权前置 / scope 防护 / prod 禁 / 沙箱 / PoC-only / 不可逆禁止 / 责任披露 / PII scrub) -- **主宪章 §26**:车载 & 自动驾驶强化(规则化:ASIL C/D 必 HIL / L4 极深 / OTA 必回退 / 公开道路授权 / 录波 MDF4 / PII 禁存 / 领域档案签字) -- **主宪章 §2 升级**:专家 14 → 16(核心 9 + 平台扩展 7) -- **TOC 同步**:加 §25 §26 +- ****:渗透 & 安全测试强化(规则化:授权前置 / scope 防护 / prod 禁 / 沙箱 / PoC-only / 不可逆禁止 / 责任披露 / PII scrub) +- ****:车载 & 自动驾驶强化(规则化:ASIL C/D 必 HIL / L4 极深 / OTA 必回退 / 公开道路授权 / 录波 MDF4 / PII 禁存 / 领域档案签字) +- **升级**:专家 14 → 16(核心 9 + 平台扩展 7) +- **TOC 同步**:加 - 数字:14 expert → **16** | 14 skill → **26**(7 pentest + 5 automotive 新增) - 版本 V1.5.0 → V1.6.0 -### Added(V1.5.0 · GBrain-inspired 强化 + 跨项目精髓库扩 · 2026-05-12) +### Added(GBrain-inspired 强化 + 跨项目精髓库扩 · 2026-05-12) - **上游参考扩**:`gbrain.md`(完整 10 节萃取,300+ 行)+ INDEX 更新 - **KB 自连图谱**:`runtime/tutor/graph.py`,零 LLM 抽取 typed link(6 种边:related_to/superseded_by/extends/prerequisite_of/contradicts/tool_implements);BFS walk + backlink-boosted ranking。实测 12 卡 → 40 edges + 44 nodes - **eval 回放**:`runtime/tutor/eval_replay.py`,`TAGENT_EVAL_CAPTURE=1` opt-in;PII 自动 scrub(email/phone/SSN/API-key/card 6 类正则);replay 3 数(Jaccard@k/top-1 stability/latency Δ);默认 off - **safe-by-default yaml 栅栏**:`runtime/config/safety.py` + `tagent.yml.example`;scheduler/curator/backends/gateway/destructive_ops 默认 deny;`assert_allowed` / `gate_*` 工厂函数;缺配置 → `SafeByDefaultBlocked` 异常 -- **主宪章 §24**:GBrain-inspired 强化(自连图谱 + 混合检索 + eval 回放 + safe-by-default + PII 单源)+ TOC 同步 +- ****:GBrain-inspired 强化(自连图谱 + 混合检索 + eval 回放 + safe-by-default + PII 单源)+ TOC 同步 - 版本 V1.4.0 → V1.5.0 -### Added(V1.4.0 · 教学层 · 用户边用边学 · 2026-05-12) +### Added(教学层 · 用户边用边学 · 2026-05-12) -- **主宪章 §23 教学层准则**:exec(老手)/learn(新手)双模式 + 反幻觉 3 层 + 双语切换 + 持续累积 +- **教学层准则**:exec(老手)/learn(新手)双模式 + 反幻觉 3 层 + 双语切换 + 持续累积 - **Theory KB**:`docs/theory/`,12 大类目录(工具/编程/基础理论/策略/方法/协议/平台/门禁/安全/AI测试/合规/流程) - `_schema.yaml`:卡片字段定义(id/category/level/authority/confidence/last_reviewed) - `_authority_sources.yaml`:权威源白名单(国际 ISTQB/IEEE/ISO/IEC/NIST/OWASP/MITRE/Google/Microsoft/Fowler/arXiv/ICSE/ISSTA + 中国 GB/T/等保/阿里/腾讯/美团/字节/CCF + AI HF/Anthropic/OpenAI/DeepEval + 经典书 Beizer/Myers/Crispin/Kaner) @@ -584,7 +584,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **反幻觉**:实测 unknown-id 正确标记"该领域未收录,慎用" - 版本 V1.3.0 → V1.4.0 -### Added(V1.3.0 · Hermes-inspired 5 模块 + 跨项目精髓库 · 2026-05-11) +### Added(Hermes-inspired 5 模块 + 跨项目精髓库 · 2026-05-11) - **跨项目上游参考**:已建立 - `INDEX.md`:精髓库索引 @@ -595,51 +595,51 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `runtime/learning_loop/`:封闭学习循环(curator 闲置触发 + FTS5 跨会话搜 + 用户画像);`curator.py / session_search.py / user_model.py`;只归档不删 - `runtime/backends/`:7 执行后端(`local/docker/ssh/singularity/modal/daytona/vercel_sandbox`);统一 `BaseExecutionEnv` 7 方法;Modal/Daytona 提供 serverless hibernate - `runtime/gateway/`:多平台 messaging(`telegram/discord/slack/wechat/feishu/dingtalk/email/webhook` 8 平台);统一 `Platform.send/configure`;`session.py` 跨平台对话连续 -- **主宪章 §22**:Hermes-inspired 扩展能力章节(规则化);TOC 同步更新 +- ****:Hermes-inspired 扩展能力章节(规则化);TOC 同步更新 - 版本 V1.2.0 → V1.3.0 -### Added(V1.2.0 · M2 MCP 6 件套 + Web UI + 真模型路由 + 飞轮回灌 · 2026-05-11) +### Added(M2 MCP 6 件套 + Web UI + 真模型路由 + 飞轮回灌 · 2026-05-11) -- **MCP 6 件套全部实现**(主宪章 §16): +- **MCP 6 件套全部实现**: - `runtime/mcp/test_orchestrator/`:包装 runtime/router + orchestrator,5 工具(catalog/plan/run/status/report);Claude Code 可直接调用 - `runtime/mcp/protocol_adapter/`:统一 ProtocolAdapter 抽象 + 5 起步 adapter(HTTP/gRPC/WS/MQTT/Kafka);HTTP 实测 ping 通过 - `runtime/mcp/evidence_vault/`:证据归档 5 工具(upload_evidence/upload_evidence_path/list/get/search),MinIO + Postgres - - `runtime/mcp/defect_tracker/`:工单桥 5 工具(create/get/update/query_bugs/list_trackers),默认 zentao + 预留扩展位(主宪章 §12 契约) + - `runtime/mcp/defect_tracker/`:工单桥 5 工具(create/get/update/query_bugs/list_trackers),默认 zentao + 预留扩展位 - `runtime/mcp/knowledge_base/`:pgvector 向量检索 4 工具(embed/index_case/index_defect/search_similar),LiteLLM embedding + stub 兜底 - `runtime/mcp/compliance_checker/`:行业合规规则 3 工具(list_profiles/get_profile/check_compliance);10 框架 profile 起步空载(SOC2/PCI-DSS/HIPAA/IEC 62304/IEC 61508/ISO 26262/DO-178C/GDPR/PIPL/CCPA) - - 共享基类 `runtime/mcp/base.py`:make_server / run_stdio / @tool_decision_logged(决策落 `workspace/测试报告/{项目名}/decisions/` 符合主宪章 §18-12) + - 共享基类 `runtime/mcp/base.py`:make_server / run_stdio / @tool_decision_logged - **行业合规规则插槽** `profiles/compliance/`:10 框架空载示例 YAML,真规则由领域专家+test-lead 双签签字后入库 - **飞轮回灌路由**(M2-9):`runtime/router/retrieval.py` 历史相似用例 → LLM prompt few-shot;router 透明集成,无 KB 时降级 -- **真模型路由测试套件**(M2-7):`runtime/tests/test_router_real.py` 20 样本(4 类型 × 5)真模型测试;门槛单模型 ≥85%、双模型投票 ≥95%;无 API key 自动 skip;失败自动落 decisions/ 含 seed+模型版本+输入快照(主宪章 §21 横切准则) +- **真模型路由测试套件**(M2-7):`runtime/tests/test_router_real.py` 20 样本(4 类型 × 5)真模型测试;门槛单模型 ≥85%、双模型投票 ≥95%;无 API key 自动 skip;失败自动落 decisions/ 含 seed+模型版本+输入快照 - **Web UI MVP**(M2-8):`runtime/web/` Vite+React 18+TypeScript+shadcn/ui+TanStack Query+React Router v7 - 4 页:Upload(text/file/URL 三模式) / RunStatus(SSE 进度条) / Report(节点结果表) / Catalog(14 专家+14 skill) - - §21 L2 必测项:Playwright E2E 7 用例(功能+边界+异常+兼容+可访问性);axe-core a11y 0 critical 门槛 + - L2 必测项:Playwright E2E 7 用例(功能+边界+异常+兼容+可访问性);axe-core a11y 0 critical 门槛 - 配套 vite 代理 `/api` → FastAPI(:8800) - **`.mcp.json` 升级**:启用 `filesystem` + `test-orchestrator`;其他 5 件套写入 `_pending_servers_v1_2_0_alpha` 段供按需启用 - 版本 V1.1.0 → V1.2.0 -### Added(V1.1.0 · 宪章合一 · darwin-skill 入库 · 2026-05-11) - -- **主宪章扩展(memory `project_test_agent_workflow.md`)**:原 §0-§9 + How to apply 1-6 **字符级保留**;新增 §10-§20 仅承载规则/要求/约束(剔除示例/枚举/参考表): - - §10 灵魂底色:三公理 + 五条铭文 + V1.0.0 锁死 + 双签解锁条件 - - §11 FULL_GUIDE.md 定位补充(优先级链:memory > FULL_GUIDE > README) - - §12 多 Bug Tracker(默认 zentao + 扩展位 `BugTrackerBase` 契约) - - §13 按需安装 + 运行时补装铁律 - - §14 darwin-skill 自进化(棘轮 + Via Negativa 不消费运行数据) - - §15 AgentChat 协作协议(test-lead 中枢 + 反问 3 级预算 + 争议未落档不签发) - - §16 MCP 服务扩展位(6 件套 Phase 2) - - §17 九大簇维度边界(认知地图;承认存在不假装能交付) - - §18 测试架构 + 5 层门禁分层 + Flaky vs Reruns 哲学 - - §19 闭环约定 18 条(扩展 §8 质量闭环) - - §20 Phase 触发条件(不绑月份) +### Added(宪章合一 · darwin-skill 入库 · 2026-05-11) + +- **扩展(memory `project_test_agent_workflow.md`)**:原 -+ How to apply 1-6 **字符级保留**;新增 -仅承载规则/要求/约束(剔除示例/枚举/参考表): + - 灵魂底色:三公理 + 五条铭文 + V1.0.0 锁死 + 双签解锁条件 + - FULL_GUIDE.md 定位补充(优先级链:memory > FULL_GUIDE > README) + - 多 Bug Tracker(默认 zentao + 扩展位 `BugTrackerBase` 契约) + - 按需安装 + 运行时补装规则 + - darwin-skill 自进化(棘轮 + Via Negativa 不消费运行数据) + - AgentChat 协作协议(test-lead 中枢 + 反问 3 级预算 + 争议未落档不签发) + - MCP 服务扩展位(6 件套 Phase 2) + - 九大簇维度边界(认知地图;承认存在不假装能交付) + - 测试架构 + 5 层门禁分层 + Flaky vs Reruns 哲学 + - 闭环约定 18 条(扩展 质量闭环) + - Phase 触发条件(不绑月份) - How to apply 7-12 扩展项(铭文优先级 / 决策可追溯 / 纪要不可删 / darwin 棘轮 / 依赖补装反问 / 修改四关) -- **行业适配参照表全删除**(主宪章 + FULL_GUIDE 双删) +- **行业适配参照表全删除** - **darwin-skill 入库**:`skills/darwin-skill/` 完整部署(SKILL.md + scripts/ + templates/ + assets/ + docs/),upstream 原文不改;13 Skill → 14 Skill -- **FULL_GUIDE.md 优化**:三公理/铭文 + 18 闭环段替换为"已迁主宪章 §X"指引(避免双份维护);Bug Tracker / 按需安装 / darwin / AgentChat 详节保留作为深度参考;附 runtime 章节(M1-11 留存) +- **FULL_GUIDE.md 优化**:三公理/铭文 + 18 闭环段替换为"已迁X"指引(避免双份维护);Bug Tracker / 按需安装 / darwin / AgentChat 详节保留作为深度参考;附 runtime 章节(M1-11 留存) -### Added(V1.1.0 · 运行时层) +### Added(运行时层) -- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 67 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章铁律),`runtime/` 仅作调度层。 +- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 67 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章规则),`runtime/` 仅作调度层。 - `runtime/router/`:AI 路由(LiteLLM 多厂商:Claude/OpenAI/Gemini/Qwen/DeepSeek/Ollama)。被测物 → 专家+Skill DAG。含 stub provider 供 CI 离线测,准确率 5/5 类型(web/api/mobile/desktop/ai-model) - `runtime/registry/`:扫 `agents/*.md` + `skills/*.md` frontmatter 生成统一目录(14 expert + 13 skill,实测通过) - `runtime/orchestrator/`:**双轨**——Prefect 2.x flow(全功能,带 UI/重试/状态机)+ Direct 执行器(无 Prefect 也能跑,ThreadPoolExecutor 并发,降级方案) @@ -679,8 +679,8 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - 新建 `CHANGELOG.md` + `VERSION` 文件,启动语义版本管理。 - **W3 信息架构重塑**: - - `README_DETAIL.md` 改名为 `FULL_GUIDE.md`(宪章§0 文件分发策略:README.md 简明入口 ≤ 200 行 / FULL_GUIDE.md 详细指南) - - 新建 `docs/getting-started/INDEX.md` / `config/INDEX.md` / `ci/INDEX.md`(宪章§3 每目录索引;02/03/05 已有 README.md 等价于 INDEX) + - `README_DETAIL.md` 改名为 `FULL_GUIDE.md`(宪章文件分发策略:README.md 简明入口 ≤ 200 行 / FULL_GUIDE.md 详细指南) + - 新建 `docs/getting-started/INDEX.md` / `config/INDEX.md` / `ci/INDEX.md`(宪章每目录索引;02/03/05 已有 README.md 等价于 INDEX) - `README.md` 头加项目代号 `test-agent-team` + 版本 + License - `README.md` 删除三视角矩阵段(迁移至 FULL_GUIDE.md,避免双份维护) - `README.md` 行数从 240 降至 168 行 @@ -689,7 +689,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `.pre-commit-config.yaml` 加 gitleaks hook(凭据扫描) - `.gitignore` 补漏:`.ruff_cache/` / `*.jtl` / `*.pem` / `*.key` / `*.crt` / `*.p12` / `*.pfx` / `*.jks` / `id_rsa` / `id_ed25519` / `coverage.xml` / `pip-wheel-metadata/` - **W3 收尾 · 方法论沉淀(F'+J+K)**: - - `CONTRIBUTING.md` 末尾追加:**同步铁律段**(联动改动清单速查 + 自动化保障)+ **RACI 协作矩阵浓缩版**(14 专家 × 35 测试维度,含责任边界冲突解决与质量门禁联动) + - `CONTRIBUTING.md` 末尾追加:**同步规则段**(联动改动清单速查 + 自动化保障)+ **RACI 协作矩阵浓缩版**(14 专家 × 35 测试维度,含责任边界冲突解决与质量门禁联动) - `FULL_GUIDE.md` 末尾追加:**测试架构合理性深度章节**(6 子节:金字塔 2024 现代版 / Shift-Left 7 层 / Shift-Right 9 层 / 可观测三柱 + 测试可视化 / 五层质量门禁 + Flaky vs Reruns 哲学 / 调整路径 Phase 2-4 落地点) - 新建 `examples/web-demo/`:8 文件最小可跑 Web 测试示例(pytest + Playwright + Page Object,演示 `https://playwright.dev`,5 分钟跑通) - `FULL_GUIDE.md:395` 漏修补救:`utils/*.py(12 个)` → `67 个,含 __init__.py` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4692f02e..0d0ad511 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -134,7 +134,7 @@ pytest --collect-only --- -## 同步铁律(项目宪章 §1) +## 同步规则(项目宪章 ) 任一文档/代码改动 → 必须同步到所有引用方,并加 `CHANGELOG.md` 条目。 diff --git a/FULL_GUIDE.md b/FULL_GUIDE.md index a686b8e7..1cb27d86 100644 --- a/FULL_GUIDE.md +++ b/FULL_GUIDE.md @@ -5,7 +5,7 @@ > **维护原则**:决策入档、开放问题入档、不打脸的承诺才写。重大决策须更新「📋 开放问题」与「🗺️ 项目当前状态」两节。 **项目名称**:`Test-Agent`(内部代号 `test-agent-team`) -**当前阶段**:Phase 2 前期(V1.0.0 · 16 expert + 32/32 skill active (11 production + 5 script-backed) + 0 rollout + 0 vision) +**当前阶段**:Phase 2 前期(16 expert + 32/32 skill active (11 production + 5 script-backed) + 0 rollout + 0 vision) **版本**:V1.0.0(详见 [VERSION](VERSION) + [CHANGELOG.md](CHANGELOG.md)) **更新日期**:2026-06-04 **模型**:Claude 4.x 系列(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理) diff --git a/ROADMAP.md b/ROADMAP.md index 4423c88f..58f36842 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -5,7 +5,7 @@ > - **expert 16/16 active**(11 production + 5 script);0 rollout。 > - **skill 32/32 active**(23 production + 7 script + 2 vision→production);0 rollout;0 vision。 > - 3 meta-skill(nuwa-skill / darwin-skill / karpathy-guidelines)独立,工具属性,不在 32 业务 skill 数内。 -> - **V1.21.0 新增 SkillRunner 基础设施** (`runtime/orchestrator/skills/` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 runner),解锁 skill 层 LLM-driven 全 16 实装 (V1.21-V1.31)。 +> - **V1.21.0 新增 SkillRunner 基础设施** (`runtime/orchestrator/skills/` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 runner),解锁 skill 层 LLM-driven 全 16 实装 。 ## 当前活跃 expert (16 / 16) — V1.x rollout 收尾 @@ -18,12 +18,12 @@ | `automation-engineer` | Web/API 脚本编写 + 性能测试编排 | | `test-executor` | 测试执行与监控 | | `bug-manager` | Bug 提交与追踪 | -| `env-manager` | 环境检查清单 + 准备步骤(V1.15.0 minimum viable) | -| `mobile-tester` | 移动测试用例 + ADB/Xcode 命令清单(V1.16.0 minimum viable) | -| `visual-tester` | 视觉测试点 + 对比脚本片段 + 容差配置(V1.17.0 minimum viable) | -| `system-tester` | IoT/串口/MQTT 测试用例 + 命令清单 + 协议特定配置(V1.18.0 minimum viable) | -| `pentest-tester` | 5 攻击域渗透测试计划 + 工具清单 + PoC plan(V1.19.0 minimum viable;仅输出计划文本,真执行守护在 utils 层 env gate) | -| `automotive-tester` | ASIL 评估 + HIL 测试 + ADAS 场景 + OTA 升级 + 合规矩阵(V1.20.0 minimum viable;V1.x rollout 收尾) | +| `env-manager` | 环境检查清单 + 准备步骤 | +| `mobile-tester` | 移动测试用例 + ADB/Xcode 命令清单 | +| `visual-tester` | 视觉测试点 + 对比脚本片段 + 容差配置 | +| `system-tester` | IoT/串口/MQTT 测试用例 + 命令清单 + 协议特定配置 | +| `pentest-tester` | 5 攻击域渗透测试计划 + 工具清单 + PoC plan | +| `automotive-tester` | ASIL 评估 + HIL 测试 + ADAS 场景 + OTA 升级 + 合规矩阵 | ### 5 script-backed (已上线) @@ -45,17 +45,17 @@ |-------|------| | `tdd-workflow` | TDD 工作流 | | `e2e-testing` | E2E 测试 | -| `automotive-test` | 车载主编排(V1.31.0 · automotive batch) | -| `automotive-can-bus-test` | CAN总线测试(V1.31.0) | -| `automotive-adas-scenario` | ADAS场景库(V1.31.0) | -| `automotive-ota-update-test` | OTA升级测试(V1.31.0) | -| `automotive-hil-loop-test` | HIL环路测试(V1.31.0) | +| `automotive-test` | 车载主编排(automotive batch) | +| `automotive-can-bus-test` | CAN总线测试 | +| `automotive-adas-scenario` | ADAS场景库 | +| `automotive-ota-update-test` | OTA升级测试 | +| `automotive-hil-loop-test` | HIL环路测试 | | `regression-test` | 回归测试 | | `smoke-test` | 冒烟测试 | | `testcase-design` | 用例设计 | | `test-coordinator` | 测试流程编排 | | `verification-loop` | 5-phase 验证循环 | -| `eval-harness` | LLM 评测编排(V1.27.0 · skill rollout #5) | +| `eval-harness` | LLM 评测编排(skill rollout #5) | ### 7 script-backed (已上线) @@ -100,36 +100,36 @@ | Skill | 范围 | 关联 expert | 状态 | |-------|------|-------------|------| -| `mobile-test` | Android/iOS + 小程序 自动化 | mobile-tester | **done** (V1.23.0 · runtime/orchestrator/skills/mobile_test.py) | -| `visual-test` | 图像识别 + OCR + SSIM 视觉回归 | visual-tester | **done** (V1.24.0 · runtime/orchestrator/skills/visual_test.py) | -| `system-test` | IoT/串口/MQTT/音视频/Jaeger/Kafka | system-tester | **done** (V1.26.0 · runtime/orchestrator/skills/system_test.py) | -| `eval-harness` | LLM 评测(pass@k / Jaccard / stability) | ai-tester(深化) | **done** (V1.27.0 · runtime/orchestrator/skills/eval_harness.py · 5 阶段编排 + 质量门禁 + 安全护栏) | +| `mobile-test` | Android/iOS + 小程序 自动化 | mobile-tester | **done** (runtime/orchestrator/skills/mobile_test.py) | +| `visual-test` | 图像识别 + OCR + SSIM 视觉回归 | visual-tester | **done** (runtime/orchestrator/skills/visual_test.py) | +| `system-test` | IoT/串口/MQTT/音视频/Jaeger/Kafka | system-tester | **done** (runtime/orchestrator/skills/system_test.py) | +| `eval-harness` | LLM 评测(pass@k / Jaccard / stability) | ai-tester(深化) | **done** (runtime/orchestrator/skills/eval_harness.py · 5 阶段编排 + 质量门禁 + 安全护栏) | ### Pentest 7 skill(已全部完成 · SECURITY.md 武器化授权 wiring 已实装) | Skill | 范围 | 状态 | |-------|------|------| -| `pentest-coordinator` | 渗透总编排(授权 → 侦察 → 漏洞 → 利用 → 报告) | **done** (V1.21.0 · runtime/orchestrator/skills/pentest_coordinator.py · 5 阶段编排 + authorization_check + subagent_pool + refuse_conditions) | -| `pentest-recon` | 侦察(被动+主动信息收集) | **done** (V1.25.0) | -| `pentest-vuln` | 漏洞发现(5 攻击域 + SAST/DAST) | **done** (V1.25.0) | -| `pentest-exploit` | 漏洞利用(沙箱 PoC,不真破坏) | **done** (V1.30.0 · pentest batch 2) | -| `pentest-api` | API 渗透(OWASP API Top 10 2023) | **done** (V1.30.0 · pentest batch 2) | -| `pentest-web` | Web 渗透(OWASP Top 10 + ASVS) | **done** (V1.30.0 · pentest batch 2) | -| `pentest-report` | 渗透报告(仅 working PoC 入报告,shannon 哲学) | **done** (V1.30.0 · pentest batch 2) | +| `pentest-coordinator` | 渗透总编排(授权 → 侦察 → 漏洞 → 利用 → 报告) | **done** (runtime/orchestrator/skills/pentest_coordinator.py · 5 阶段编排 + authorization_check + subagent_pool + refuse_conditions) | +| `pentest-recon` | 侦察(被动+主动信息收集) | **done** | +| `pentest-vuln` | 漏洞发现(5 攻击域 + SAST/DAST) | **done** | +| `pentest-exploit` | 漏洞利用(沙箱 PoC,不真破坏) | **done** (pentest batch 2) | +| `pentest-api` | API 渗透(OWASP API Top 10 2023) | **done** (pentest batch 2) | +| `pentest-web` | Web 渗透(OWASP Top 10 + ASVS) | **done** (pentest batch 2) | +| `pentest-report` | 渗透报告(仅 working PoC 入报告,shannon 哲学) | **done** (pentest batch 2) | ### Automotive 5 skill | Skill | 范围 | |-------|------| -| `automotive-test` | 整车主编排(ECU + ADAS + IVI + V2X) | **done** (V1.31.0 · automotive batch) | -| `automotive-can-bus-test` | CAN/CAN-FD/LIN/FlexRay/SOME-IP | **done** (V1.31.0 · automotive batch) | -| `automotive-adas-scenario` | ADAS 场景库 + SOTIF(ISO 21448) | **done** (V1.31.0 · automotive batch) | -| `automotive-ota-update-test` | OTA 升级(UN R156 / GB 44496-2024) | **done** (V1.31.0 · automotive batch) | -| `automotive-hil-loop-test` | HIL/SIL/MIL/PIL 环路 | **done** (V1.31.0 · automotive batch) | +| `automotive-test` | 整车主编排(ECU + ADAS + IVI + V2X) | **done** (automotive batch) | +| `automotive-can-bus-test` | CAN/CAN-FD/LIN/FlexRay/SOME-IP | **done** (automotive batch) | +| `automotive-adas-scenario` | ADAS 场景库 + SOTIF(ISO 21448) | **done** (automotive batch) | +| `automotive-ota-update-test` | OTA 升级(UN R156 / GB 44496-2024) | **done** (automotive batch) | +| `automotive-hil-loop-test` | HIL/SIL/MIL/PIL 环路 | **done** (automotive batch) | --- -## V1.34-V1.36 能力扩展 +## V1.36 能力扩展 - **V1.34**: script_bridge.py 桥接 5 独立脚本进 orchestrator pipeline - **V1.35**: 11 深度审计模块 (flaky guard / API security v2 / data factory v2 / perf orchestrator / event harness / visual regression / ML prioritizer / differential tester / EU AI Act / supply chain) diff --git a/SECURITY.md b/SECURITY.md index 764624a2..23bedeec 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -77,7 +77,7 @@ - [ ] 仅在**自己拥有 / 经书面授权**的系统上运行上述工具 - [ ] 在 `tagent.yml` 显式设置 `pentest.authorized: true`(此为操作者自证授权,不构成第三方授权证明) - [ ] 遵守所在司法管辖区法律: - - **中国**:《刑法》§285-§287(非法侵入 / 破坏 / 非法控制计算机信息系统罪);《网络安全法》§27 / §63 + - **中国**:《刑法》-(非法侵入 / 破坏 / 非法控制计算机信息系统罪);《网络安全法》/ - **美国**:Computer Fraud and Abuse Act(CFAA, 18 U.S.C. § 1030) - **欧盟**:NIS2 Directive(EU 2022/2555) diff --git a/ai/CLAUDE.md b/ai/CLAUDE.md index 751b9494..6dff5211 100644 --- a/ai/CLAUDE.md +++ b/ai/CLAUDE.md @@ -162,7 +162,7 @@ python -c "from runtime.registry.registry import build_catalog; c=build_catalog( 每次提交前问自己: 1. 有没有硬编码路径?→ 应该全部走 settings -2. 新文件放对目录了吗?→ 对照 §3.1 +2. 新文件放对目录了吗?→ 对照 3. 有没有不该提交的文件?→ `git status` 确认 4. 核心功能实测通过了吗?→ 至少 registry catalog 能构建 5. 改 `utils/` 了吗?→ 如果改了,要测双模式都正常 diff --git "a/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" "b/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" index 0c0706d7..ccc70970 100644 --- "a/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" +++ "b/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" @@ -26,7 +26,7 @@ paired_skills: [smoke-test, regression-test] ## 协作输出 - 向 **test-lead**:执行结果 JSON(含 pass_rate / 失败分类 / 性能门禁状态) -- 向 **bug-manager**:failure_type=product_bug 列表(自动批量提交 BugTracker,默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,主宪章 §12) +- 向 **bug-manager**:failure_type=product_bug 列表 - 向 **report-generator**:完整结果 JSON + Allure results + JMeter HTML - 向 **automation-engineer**:test_code_bug 反馈(脚本错误) - 向 **env-manager**:environment_issue 反馈(环境异常) diff --git "a/ai/agents/08-Bug\347\256\241\347\220\206.md" "b/ai/agents/08-Bug\347\256\241\347\220\206.md" index 35b686b0..ab1d9b0e 100644 --- "a/ai/agents/08-Bug\347\256\241\347\220\206.md" +++ "b/ai/agents/08-Bug\347\256\241\347\220\206.md" @@ -1,6 +1,6 @@ --- name: bug-manager -description: Bug管理专家 - 规范提交Bug到BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,主宪章 §12 BugTrackerBase 统一契约),追踪Bug修复进度,验证修复结果,生成Bug统计分析报告。默认实现 utils/zentao_bug_manager.py(权威 severity 1=P0/2=P1/3=P2/4=P3);切换 adapter 由 .env `BUG_TRACKER` 字段指定。 +description: Bug管理专家 - 规范提交Bug到BugTracker,追踪Bug修复进度,验证修复结果,生成Bug统计分析报告。默认实现 utils/zentao_bug_manager.py(权威 severity 1=P0/2=P1/3=P2/4=P3);切换 adapter 由 .env `BUG_TRACKER` 字段指定。 tools: Read, Write, Bash, Grep, Glob EXPERT_IMPL_STATUS: production paired_skills: [zentao-bug-submission] diff --git "a/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" "b/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" index 35b33af9..6e0c7c24 100644 --- "a/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" +++ "b/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" @@ -52,15 +52,15 @@ paired_skills: [pentest-coordinator] **Static-Dynamic Correlation**(shannon 差异化):SAST 发现的不报为"理论风险",必须 exploit agent 真打成功 + 关联源码才入报告。 -## 主宪章铁律(必遵守) +## 规则(必遵守) -- **§24 safe-by-default**:`tagent.yml` `pentest.authorized: true` + `pentest.scope: [list]` 显式才允许;否则拒绝运行 -- **§24 PII scrub**:报告含敏感数据自动脱敏(email/phone/SSN/card) -- **§22 决策不可逆禁止**:不真删数据;不真破坏文件;不 force-push;PoC 仅生成不执行不可逆操作 -- **§22 隔离 client**:子 agent 用 `subagent.aux_client`,不污染主 session -- **§22 沙箱**:危险 exploit 必须在 Docker/VM 内跑,不在 host -- **§21 横切可复现性**:每个 PoC 必带 seed + recordings(Burp HAR / 截图) -- **§21 测试深度 L4**:渗透是 L4 极深,必须含安全渗透 + 决策可追溯 + 监管送审证据链 +- **safe-by-default**:`tagent.yml` `pentest.authorized: true` + `pentest.scope: [list]` 显式才允许;否则拒绝运行 +- **PII scrub**:报告含敏感数据自动脱敏(email/phone/SSN/card) +- **不可逆操作**:不真删数据;不真破坏文件;不 force-push;PoC 仅生成不执行 +- **隔离 client**:子 agent 用 `subagent.aux_client`,不污染主 session +- **沙箱**:危险 exploit 必须在 Docker/VM 内跑,不在 host +- **横切可复现性**:每个 PoC 必带 seed + recordings(Burp HAR / 截图) +- **测试深度 L4**:渗透是 L4 极深,必须含安全渗透 + 决策可追溯 + 监管送审证据链 ## 调用 Skill 速查 @@ -79,7 +79,7 @@ paired_skills: [pentest-coordinator] - 完整渗透报告(Markdown + PDF):仅 PoC 验证漏洞 - 每条漏洞:CWE id + CVSS 评分 + 受影响组件 + PoC 步骤 + 修复建议 + 引用 OWASP/NIST - 证据包:HAR / 截图 / 录屏 / 日志 → `mcp-evidence-vault` -- Bug 单:严重度按 CVSS 映射到 1=P0 / 2=P1 / 3=P2 / 4=P3(主宪章 §18-4) +- Bug 单:严重度按 CVSS 映射到 1=P0 / 2=P1 / 3=P2 / 4=P3 ## 不做的事(Via Negativa) @@ -87,4 +87,4 @@ paired_skills: [pentest-coordinator] - 不做真破坏 — 即使授权也只生成 PoC,不执行 `rm -rf`/`DROP TABLE` - 不报无 PoC 的"理论风险" — 假阳性疲劳是渗透报告头号杀手 - 不在 prod 环境直接打 — 沙箱/staging 优先;真 prod 测必 staging-mirror -- 不存客户 PII 在飞轮(主宪章 §24 PII 单源 scrub) +- 不存客户 PII 在飞轮 diff --git "a/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" "b/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" index 54d58f04..65672cc2 100644 --- "a/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" +++ "b/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" @@ -40,7 +40,7 @@ paired_skills: [automotive-test] | **PIL** Processor-in-the-Loop | 真 ECU + 仿真环境 | 算法+硬件初步 | | **HIL** Hardware-in-the-Loop | 真 ECU + 真 I/O(模拟外设)| 集成 + 故障注入 | -**ASIL C/D 必须 HIL 通过**(主宪章 §21 L4 极深)。 +**ASIL C/D 必须 HIL 通过**。 ## ADAS 场景库 @@ -59,7 +59,7 @@ paired_skills: [automotive-test] | 标准 | 范围 | 触发 | |------|------|------| -| **ISO 26262**(主宪章 §21 已有) | 功能安全 ASIL A-D | 所有汽车 ECU | +| **ISO 26262** | 功能安全 ASIL A-D | 所有汽车 ECU | | **ISO 21448(SOTIF)** | 预期功能安全 | ADAS / AD 必有 | | **UN R155** | 网络安全 CSMS | 2024 起新车强制(欧盟) | | **UN R156** | OTA 升级管理 SUMS | 同 R155 | @@ -79,14 +79,14 @@ paired_skills: [automotive-test] 6. **诊断 DTC 升级前后对比** 7. **UN R156 文档化**:升级日志 + 用户通知 + 回退证据 -## 主宪章铁律 +## 规则 -- **§21 测试深度**:车载 ECU 默认 L3+,ADAS/底盘/转向 L4 极深 -- **§22 决策不可逆禁止**:OTA 包必含回退;真车测试必含 kill-switch -- **§19 行业适配**:接入车载行业必先《领域档案》+ 领域专家签字 -- **§24 safe-by-default**:`tagent.yml` `automotive.fleet_test_authorized: true` 才允许真车数据 -- **§21 横切可复现性**:HIL 测试必含 seed + 录波(MDF/MF4 格式) -- **§18-12 决策可追溯**:ADAS 场景每次过/挂必落 `decisions/` 含场景 id + 仿真版本 + 算法版本 +- **测试深度**:车载 ECU 默认 L3+,ADAS/底盘/转向 L4 极深 +- **不可逆操作**:OTA 包必含回退;真车测试必含 kill-switch +- **行业适配**:接入车载行业必先《领域档案》+ 领域专家签字 +- **safe-by-default**:`tagent.yml` `automotive.fleet_test_authorized: true` 才允许真车数据 +- **横切可复现性**:HIL 测试必含 seed + 录波(MDF/MF4 格式) +- **决策可追溯**:ADAS 场景每次过/挂必落 `decisions/` 含场景 id + 仿真版本 + 算法版本 ## 调用 Skill 速查 @@ -105,11 +105,11 @@ paired_skills: [automotive-test] - HIL 测试录波(MDF/MF4) - 场景库结果矩阵(过/挂/未跑) - 合规审计包(SOTIF / R155 / R156)→ `mcp-compliance-checker` -- Bug 单 → 按 ASIL 严重度映射 P0-P3(主宪章 §18-4) +- Bug 单 → 按 ASIL 严重度映射 P0-P3 ## 不做的事 - 不在公开道路擅自跑 — 法规授权 + 安全员 - 不绕过 ASIL 等级 — 降级必须签字 -- 不存车主 PII / 车 VIN 在飞轮(主宪章 §24) -- 不测试电控涉及生命安全功能时跳过 HIL — 主宪章 §21 L4 铁律 +- 不存车主 PII / 车 VIN 在飞轮 +- 不测试电控涉及生命安全功能时跳过 HIL — L4 规则 diff --git a/ai/agents/README.md b/ai/agents/README.md index 7919cdca..d27c696a 100644 --- a/ai/agents/README.md +++ b/ai/agents/README.md @@ -17,7 +17,7 @@ | 05 | `05-数据准备.md` | data-preparer | test_data.json + jmeter_users.csv | | 06 | `06-自动化脚本.md` | automation-engineer | pytest UI/API 脚本 + 协调 JMeter | | 07 | `07-测试执行.md` | test-executor | 执行结果 JSON + Allure + JMeter | -| 08 | `08-Bug管理.md` | bug-manager | BugTracker Bug ID 列表 + 日报(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,主宪章 §12) | +| 08 | `08-Bug管理.md` | bug-manager | BugTracker Bug ID 列表 + 日报 | | 09 | `09-报告生成.md` | report-generator | Word 报告 + 多端通知(企微/飞书/钉钉/Slack/邮件/Teams) | ### 流程依赖关系 diff --git a/ai/skills/agent-introspection-debugging.md b/ai/skills/agent-introspection-debugging.md index 0edb272a..2e901ec5 100644 --- a/ai/skills/agent-introspection-debugging.md +++ b/ai/skills/agent-introspection-debugging.md @@ -1,6 +1,6 @@ --- name: agent-introspection-debugging -description: "Agent 自省调试 Skill。LLM 决策 / 工具调用 / 上下文 / token / 状态机透明化。失败用例分析 + 决策回放。派生自 ECC 同名 skill(主宪章 §28)。" +description: "Agent 自省调试 Skill。LLM 决策 / 工具调用 / 上下文 / token / 状态机透明化。失败用例分析 + 决策回放。派生自 ECC 同名 skill。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -24,7 +24,7 @@ SKILL_IMPL_STATUS: production | **上下文** | prompt 长度 + 截断点 + 主-子 session 隔离审查 | | **状态机** | Prefect flow run state(`runtime/orchestrator/flows.py`)| -## 决策回放(主宪章 §18-12 满足) +## 决策回放 每次 routing / curator / scheduler 决策已自动落 `decisions/{date}_{tool}_{run_id}.json`。 含:输入快照 + 模型版本 + 阈值 + 判断结论 + 理由。 @@ -51,17 +51,17 @@ flow.run # api.request 总 span ## token 异常诊断 - 单 LLM call > 10k tokens → 输入太大(catalog 没裁?) -- 主-子 session 共享 cache → §22 子代理 aux_client 隔离失效 +- 主-子 session 共享 cache → 子代理 aux_client 隔离失效 - 重复调相同 LLM(无 cache)→ Anthropic prompt cache 没设 ttl -## 与主宪章融合 +## 与融合 -- §18-12 决策可追溯(本 skill 直接消费) -- §22 隔离 client(本 skill 检测违反) -- §21 横切可复现性(本 skill 必带 seed + snapshot) +- 决策可追溯(本 skill 直接消费) +- 隔离 client(本 skill 检测违反) +- 横切可复现性(本 skill 必带 seed + snapshot) ## 不做 - 不只看 LLM 输出(必看输入 + token + 上下文) - 不靠 print 调试(必走 OTel + Loguru 结构化) -- 不删 decisions/(主宪章 §1+§18-16 不可删) +- 不删 decisions/ diff --git a/ai/skills/automotive-hil-loop-test.md b/ai/skills/automotive-hil-loop-test.md index 1efaf22a..5bd8c5ad 100644 --- a/ai/skills/automotive-hil-loop-test.md +++ b/ai/skills/automotive-hil-loop-test.md @@ -17,7 +17,7 @@ SKILL_IMPL_STATUS: production | **PIL** | Processor-in-the-Loop | 真 ECU + 仿真环境 | | **HIL** | Hardware-in-the-Loop | **真 ECU + 真 I/O**(模拟外设)| -**ASIL C / D 必经 HIL**(主宪章 §21 L4 极深);ASIL A/B 可 PIL 替代。 +**ASIL C / D 必经 HIL**;ASIL A/B 可 PIL 替代。 ## HIL 平台 @@ -39,7 +39,7 @@ SKILL_IMPL_STATUS: production - 格式:**MDF 4.x** / MF4(AUTOSAR 标准) - 工具:Vector CANape / ASAM ODS -- 必含 seed + 算法版本 + ECU 固件 hash + 仿真版本(主宪章 §21 可复现性) +- 必含 seed + 算法版本 + ECU 固件 hash + 仿真版本 ## 输出 diff --git a/ai/skills/automotive-ota-update-test.md b/ai/skills/automotive-ota-update-test.md index ff0b5e92..b748cd4f 100644 --- a/ai/skills/automotive-ota-update-test.md +++ b/ai/skills/automotive-ota-update-test.md @@ -28,7 +28,7 @@ SKILL_IMPL_STATUS: production - 升级流量重放:wireshark + scapy - 中断模拟:`utils/chaos_helper.py` 在升级中触发(`runtime/scheduler` 集成) -- 弱网模拟:tc + netem(主宪章已用) +- 弱网模拟:tc + netem ## 输出 diff --git a/ai/skills/automotive-test.md b/ai/skills/automotive-test.md index 7855f8f6..5ae8af43 100644 --- a/ai/skills/automotive-test.md +++ b/ai/skills/automotive-test.md @@ -29,12 +29,12 @@ SKILL_IMPL_STATUS: production | 9 合规审计 | `compliance/engine.py` + 行业规则库(ISO 26262/SOTIF/R155/R156 Phase 2) | | 10 报告 + Bug 单 | `report-generator` | -## 主宪章铁律 +## 规则 -- §21 L4 极深:ADAS/底盘/转向必 HIL + 形式化验证 -- §24 safe-by-default:`automotive.fleet_test_authorized: true` + `automotive.test_lab: <id>` 才允许真车数据 -- §22 不可逆禁止:OTA 必含回退;真车 kill-switch 必有 -- §19 行业适配:接入车载行业必《领域档案》+ 主机厂签字 +- L4 极深:ADAS/底盘/转向必 HIL + 形式化验证 +- safe-by-default:`automotive.fleet_test_authorized: true` + `automotive.test_lab: <id>` 才允许真车数据 +- 不可逆操作:OTA 必含回退;真车 kill-switch 必有 +- 行业适配:接入车载行业必《领域档案》+ 主机厂签字 ## 输出 diff --git a/ai/skills/build-your-own-x-explorer.md b/ai/skills/build-your-own-x-explorer.md index 399c44b9..c80a8926 100644 --- a/ai/skills/build-your-own-x-explorer.md +++ b/ai/skills/build-your-own-x-explorer.md @@ -1,6 +1,6 @@ --- name: build-your-own-x-explorer -description: "Build-your-own-X 教学引导 Skill。按用户当前测试场景推荐对应 byox 教程深度学习路径;每条标 estimated_time_hours;主宪章 §31 教学层 13 大类落地。" +description: "Build-your-own-X 教学引导 Skill。按用户当前测试场景推荐对应 byox 教程深度学习路径;每条标 estimated_time_hours;教学层 13 大类落地。" tools: Read, Write, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -47,11 +47,11 @@ SKILL_IMPL_STATUS: production | E2E 调试 / 视觉回归 | byox-web-browser | | webhook / gateway | byox-bot | -## 与主宪章融合 +## 与融合 -- §23 教学层:本 skill 是 learn mode 深度路径入口 -- §27 Karpathy 原则 4(Goal-Driven):推荐前必问用户**时间预算**;无预算 → 拒推 -- §29 essence-watcher:byox 标 `essence_only`(默认不动 Test-Agent),需要时本 skill 主动引 +- 教学层:本 skill 是 learn mode 深度路径入口 +- Karpathy 原则 4(Goal-Driven):推荐前必问用户**时间预算**;无预算 → 拒推 +- essence-watcher:byox 标 `essence_only`(默认不动 Test-Agent),需要时本 skill 主动引 ## 不做 diff --git a/ai/skills/e2e-testing.md b/ai/skills/e2e-testing.md index b4f9ce70..dcff9869 100644 --- a/ai/skills/e2e-testing.md +++ b/ai/skills/e2e-testing.md @@ -1,6 +1,6 @@ --- name: e2e-testing -description: "E2E 测试 Skill。Playwright 关键用户流 + 跨浏览器 + 2FA/TOTP/SSO 自动登录 + 视觉回归 + 录屏。派生自 ECC e2e-testing(主宪章 §28)。" +description: "E2E 测试 Skill。Playwright 关键用户流 + 跨浏览器 + 2FA/TOTP/SSO 自动登录 + 视觉回归 + 录屏。派生自 ECC e2e-testing。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -21,7 +21,7 @@ SKILL_IMPL_STATUS: production | 浏览器 | Playwright(Chromium / Firefox / WebKit) | | 2FA / TOTP | `pyotp.TOTP(SECRET).now()` | | SSO | Playwright follow redirects(Okta / Auth0 / Azure AD / Keycloak) | -| 视觉回归 | `screenshot()` + SSIM(主宪章 §21 测试类型) | +| 视觉回归 | `screenshot()` + SSIM | | 录屏 | `context = browser.new_context(record_video_dir="evidence/")`| | Trace | `tracing.start(screenshots=True, snapshots=True)` | @@ -47,12 +47,12 @@ class LoginPage: 5. 退出 + 登录失效 6. 错误路径(密码错 5 次锁定 等) -## 与主宪章融合 +## 与融合 -- §17 测试架构:E2E 占金字塔顶层 10% -- §21 测试类型:含视觉回归 -- §21 横切可复现性:trace + 录屏 + screenshots -- §22 6-缺陷 RCA(回归 + 变更影响) +- 测试架构:E2E 占金字塔顶层 10% +- 测试类型:含视觉回归 +- 横切可复现性:trace + 录屏 + screenshots +- 6-缺陷 RCA(回归 + 变更影响) ## 不做 diff --git a/ai/skills/eval-harness.md b/ai/skills/eval-harness.md index 10cec5c1..335c8a1c 100644 --- a/ai/skills/eval-harness.md +++ b/ai/skills/eval-harness.md @@ -1,6 +1,6 @@ --- name: eval-harness -description: "Eval 框架 Skill。LLM/AI 系统评测:pass@k / Jaccard@k / top-1 stability / latency Δ。融合 gbrain eval 回放(主宪章 §24)+ ECC eval-harness。" +description: "Eval 框架 Skill。LLM/AI 系统评测:pass@k / Jaccard@k / top-1 stability / latency Δ。融合 gbrain eval 回放+ ECC eval-harness。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -32,7 +32,7 @@ SKILL_IMPL_STATUS: production ## 使用 ```bash -# 1. opt-in capture(主宪章 §24) +# 1. opt-in capture TAGENT_EVAL_CAPTURE=1 tagent run "..." # 2. 改 router/prompt/KB @@ -41,7 +41,7 @@ TAGENT_EVAL_CAPTURE=1 tagent run "..." python -m runtime.tutor.eval_replay # 输出 3 指标 ``` -## 评测原则(主宪章 §24) +## 评测原则 - **opt-in 不偷数据**:`TAGENT_EVAL_CAPTURE=1` 必显式 - **PII 必 scrub**:落档前 6 类正则 diff --git a/ai/skills/pentest-coordinator.md b/ai/skills/pentest-coordinator.md index 1ee9d9cb..aeebdf12 100644 --- a/ai/skills/pentest-coordinator.md +++ b/ai/skills/pentest-coordinator.md @@ -1,6 +1,6 @@ --- name: pentest-coordinator -description: "渗透测试主编排 Skill。完整渗透流程编排:授权检查 → 侦察 → 漏洞 → 利用 → 后渗透 → 报告。仅含 working PoC 才入报告(shannon 哲学)。tagent.yml pentest.authorized=true 显式才允许跑(§24 safe-by-default)。" +description: "渗透测试主编排 Skill。完整渗透流程编排:授权检查 → 侦察 → 漏洞 → 利用 → 后渗透 → 报告。仅含 working PoC 才入报告(shannon 哲学)。tagent.yml pentest.authorized=true 显式才允许跑(safe-by-default)。" tools: Read, Write, Bash, Grep, Glob requires_layer: [base, security] SKILL_IMPL_STATUS: production @@ -14,9 +14,9 @@ SKILL_IMPL_STATUS: production /pentest-coordinator [target] [--mode whitebox|blackbox|graybox] [--scope <list>] ``` -## 前置检查(铁律) +## 前置检查(规则) -1. 读 `tagent.yml`:`pentest.authorized: true` + `pentest.scope: [list]` **必须显式**;否则拒绝(§24 safe-by-default) +1. 读 `tagent.yml`:`pentest.authorized: true` + `pentest.scope: [list]` **必须显式**;否则拒绝(safe-by-default) 2. `target` 必须在 `scope` 内;不在 → 拒绝 3. `prod` 环境 → 拒绝;只允许 staging/sandbox/dev 4. 落 `workspace/测试报告/{项目名}/decisions/pentest_{run_id}_authorized.json` 记授权来源 @@ -35,7 +35,7 @@ SKILL_IMPL_STATUS: production - Injection / XSS / SSRF / Auth / Authz - 各自子 agent 独立运行;只汇总有 PoC 的 -- subagent pool(主宪章 §22)`runtime/subagent/pool.py` +- subagent pool`runtime/subagent/pool.py` ## 输出 @@ -49,4 +49,4 @@ SKILL_IMPL_STATUS: production - 不真破坏(只生成 PoC) - 不未授权(显式 yaml + scope 必有) - 不报无 PoC 的"理论风险" -- 不污染主 session(用 aux_client,§22) +- 不污染主 session(用 aux_client diff --git a/ai/skills/pentest-exploit.md b/ai/skills/pentest-exploit.md index 2470e224..7ddaab41 100644 --- a/ai/skills/pentest-exploit.md +++ b/ai/skills/pentest-exploit.md @@ -8,12 +8,12 @@ SKILL_IMPL_STATUS: production # pentest-exploit -## 沙箱铁律(主宪章 §22) +## 沙箱规则 - 利用代码**必须在 Docker/VM 内**跑(`runtime/backends/docker.py`) - 不在 host 直接跑利用 - 失败注入 (Chaos) 仅限沙箱 -- 命令 `rm` / `DROP` / `unlink` / `truncate` / `fdisk` → **destructive-guard 拦**(主宪章 §24) +- 命令 `rm` / `DROP` / `unlink` / `truncate` / `fdisk` → **destructive-guard 拦** ## 流程 diff --git a/ai/skills/pentest-report.md b/ai/skills/pentest-report.md index eb16e8dc..12a6b3c4 100644 --- a/ai/skills/pentest-report.md +++ b/ai/skills/pentest-report.md @@ -8,12 +8,12 @@ SKILL_IMPL_STATUS: production # pentest-report -## 报告原则(铁律) +## 报告原则(规则) 1. **仅 PoC 入报告**:`status: verified` 的才报;`unverified` 不报 2. **可复现**:每 PoC 3 次都成才算 reproducible 3. **PII scrub**:涉及客户数据自动脱敏(`runtime/tutor/eval_replay.PII_PATTERNS` 单源) -4. **CVSS 3.1 评分**:每条必算 + 映射到 P0/P1/P2/P3(主宪章 §18-4) +4. **CVSS 3.1 评分**:每条必算 + 映射到 P0/P1/P2/P3 5. **修复建议**:含具体代码片段 + 引用 OWASP/NIST ## 报告章节(标准模板) diff --git a/ai/skills/pentest-vuln.md b/ai/skills/pentest-vuln.md index ea4064f8..d1ad5afa 100644 --- a/ai/skills/pentest-vuln.md +++ b/ai/skills/pentest-vuln.md @@ -37,7 +37,7 @@ SKILL_IMPL_STATUS: production - `workspace/渗透/vuln_candidates.json`:候选漏洞(**未验证**,标 `unverified`) - 喂入 `/pentest-exploit` 验证 -## 铁律 +## 规则 - 漏洞 ≠ PoC,候选必标 `status: unverified` - 不在此 skill 出报告(报告在 `/pentest-report` 且只含 verified) diff --git a/ai/skills/security-review.md b/ai/skills/security-review.md index b19b0b0d..316b4a54 100644 --- a/ai/skills/security-review.md +++ b/ai/skills/security-review.md @@ -50,10 +50,10 @@ pip-audit safety check ``` -## 与主宪章融合 +## 与融合 -- §17 Shift-Left 7 层:本 skill 是 L4 pre-commit + L5 PR gate + L6 静态分析 -- §25 渗透 PoC-only 哲学:本 skill 报 unverified 候选;喂 `/pentest-vuln` 验证 +- Shift-Left 7 层:本 skill 是 L4 pre-commit + L5 PR gate + L6 静态分析 +- 渗透 PoC-only 哲学:本 skill 报 unverified 候选;喂 `/pentest-vuln` 验证 ## 不做 diff --git a/ai/skills/tdd-workflow.md b/ai/skills/tdd-workflow.md index b9ddb4dc..8e2791bd 100644 --- a/ai/skills/tdd-workflow.md +++ b/ai/skills/tdd-workflow.md @@ -1,6 +1,6 @@ --- name: tdd-workflow -description: "TDD 测试驱动开发 Skill。Tests BEFORE code,80%+ 覆盖(unit+integration+E2E),边界+异常+错误场景必覆盖。派生自 ECC 同名 skill(主宪章 §28)。" +description: "TDD 测试驱动开发 Skill。Tests BEFORE code,80%+ 覆盖(unit+integration+E2E),边界+异常+错误场景必覆盖。派生自 ECC 同名 skill。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -32,14 +32,14 @@ SKILL_IMPL_STATUS: production 3. **重构**保持测试过(refactor) 4. 移到下一个测试 case -## 与主宪章融合 +## 与融合 -- §21 测试深度横切准则:"用例本身用变异测试反向验证"(覆盖率 ≠ 用例质量) -- §27 Karpathy 原则 4 Goal-Driven:每任务转为 "写复现测试 → 让它过" -- §18-14 修改四关:测试套件全过才许 commit +- 测试深度横切准则:"用例本身用变异测试反向验证"(覆盖率 ≠ 用例质量) +- Karpathy 原则 4 Goal-Driven:每任务转为 "写复现测试 → 让它过" +- 修改四关:测试套件全过才许 commit ## 不做 - 不写无 assert 的测试 - 不一次写完 200 行测试不跑(分小批 red → green) -- 不为了覆盖率写无意义测试(主宪章 §21 测试热寂检测) +- 不为了覆盖率写无意义测试 diff --git a/ai/skills/testcase-design.md b/ai/skills/testcase-design.md index 92d1520d..08e88842 100644 --- a/ai/skills/testcase-design.md +++ b/ai/skills/testcase-design.md @@ -1,6 +1,6 @@ --- name: testcase-design -description: 快速生成测试用例技能。输入需求描述,调用 testcase-designer 专家生成结构化测试用例,输出格式由用户自选:默认 Excel(4 Sheet),可选 xmind / markmap / opml 思维导图(V1.9 加),或 --format all 一键产全部。适用于用例评审、快速梳理测试点。 +description: 快速生成测试用例技能。输入需求描述,调用 testcase-designer 专家生成结构化测试用例,输出格式由用户自选:默认 Excel(4 Sheet),可选 xmind / markmap / opml 思维导图,或 --format all 一键产全部。适用于用例评审、快速梳理测试点。 tools: Read, Write, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -82,7 +82,7 @@ P1 主要用例(12 条): - **Sheet3 P0冒烟集**:仅 P0 用例(带前置条件、数据) - **Sheet4 P0_P1回归集**:P0+P1 用例 -### 思维导图 / 大纲(V1.9 加,按需) +### 思维导图 / 大纲 `runtime/exporters/` 已注册 3 个 exporter,用户自选;同一 TestCaseTree 一份 IR,三种落盘: diff --git a/ai/skills/verification-loop.md b/ai/skills/verification-loop.md index 94ebb956..72d04928 100644 --- a/ai/skills/verification-loop.md +++ b/ai/skills/verification-loop.md @@ -1,6 +1,6 @@ --- name: verification-loop -description: "5-phase 验证循环 Skill:build → typecheck → lint → test → coverage。任意失败 STOP + 修。派生自 ECC 同名 skill(主宪章 §28)。PR 前 / 质量门禁前 / refactor 后必跑。" +description: "5-phase 验证循环 Skill:build → typecheck → lint → test → coverage。任意失败 STOP + 修。派生自 ECC 同名 skill。PR 前 / 质量门禁前 / refactor 后必跑。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -11,7 +11,7 @@ SKILL_IMPL_STATUS: production - feature 完成后 - PR 提交前 -- 质量门禁前(主宪章 §17 五层门禁) +- 质量门禁前 - refactor 后 - darwin-skill 评分前 @@ -48,16 +48,16 @@ pytest runtime/tests/ -v 2>&1 | tail -50 ```bash pytest --cov=runtime --cov-report=term-missing 2>&1 | tail -30 ``` -对比 §17 regression 门槛 cov ≥ 80% +对比 regression 门槛 cov ≥ 80% -## 与主宪章融合 +## 与融合 -- §17 五层门禁:本 skill 是**进 smoke → regression** 的前置 -- §18-14 修改四关:四关 = 本 skill 4 阶段简化版 -- §21 横切可复现性:失败必固定 seed + snapshot +- 五层门禁:本 skill 是**进 smoke → regression** 的前置 +- 修改四关:四关 = 本 skill 4 阶段简化版 +- 横切可复现性:失败必固定 seed + snapshot ## 不做 -- 不跳阶段(主宪章 §21 跳阶段 = 测试不诚信) +- 不跳阶段 - 不忽略 type 错误"等会儿再修" - 不静默吞 lint warning(--fix 默认开) diff --git a/ai/skills/zentao-bug-submission.md b/ai/skills/zentao-bug-submission.md index c52a59e7..3bfe395d 100644 --- a/ai/skills/zentao-bug-submission.md +++ b/ai/skills/zentao-bug-submission.md @@ -1,6 +1,6 @@ --- name: zentao-bug-submission -description: BugTracker Bug 提交技能(默认 adapter:禅道,主宪章 §12 BugTrackerBase 统一契约支持 Jira/GitHub/GitLab/Linear/Webhook,由 .env BUG_TRACKER 选)。输入 Bug 描述或测试失败信息,自动规范化 Bug 报告并提交到所选 BugTracker,支持批量提交和状态追踪。默认实现 utils/zentao_bug_manager.py(severity 1=P0/2=P1/3=P2/4=P3)。 +description: BugTracker Bug 提交技能。输入 Bug 描述或测试失败信息,自动规范化 Bug 报告并提交到所选 BugTracker,支持批量提交和状态追踪。默认实现 utils/zentao_bug_manager.py(severity 1=P0/2=P1/3=P2/4=P3)。 tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: script --- diff --git a/ci/INDEX.md b/ci/INDEX.md index fc9b59a2..adfb47f3 100644 --- a/ci/INDEX.md +++ b/ci/INDEX.md @@ -58,7 +58,7 @@ | 性能基线何时更新 | 仅 `release/*` 分支 + `PERF_MODE=full` + 当次门禁全 PASS | | 多端通知未发出 | `.env` / Secrets 未配 `WECHAT_WEBHOOK_URL` / `FEISHU_WEBHOOK` / `DINGTALK_WEBHOOK` / `SLACK_WEBHOOK_URL` / `EMAIL_SMTP_*` / `TEAMS_WEBHOOK_URL` 等;未配自动跳过不阻塞 | -## 同步链路(宪章§1 同步铁律) +## 同步链路(宪章同步规则) 修改本目录任一文件时,**必须**联动检查: diff --git a/deploy/config/.mcp.json b/deploy/config/.mcp.json index a540ce2f..0f372d40 100644 --- a/deploy/config/.mcp.json +++ b/deploy/config/.mcp.json @@ -1,5 +1,5 @@ { - "_comment": "MCP 服务配置 — V1.2.0 6 件套 (主宪章 §16)。P2 #12 MCP client 完善后全部启用。", + "_comment": "MCP 服务配置 — V1.2.0 6 件套 。P2 #12 MCP client 完善后全部启用。", "mcpServers": { "filesystem": { "command": "npx", diff --git a/deploy/config/INDEX.md b/deploy/config/INDEX.md index 2d14ca50..90dacce0 100644 --- a/deploy/config/INDEX.md +++ b/deploy/config/INDEX.md @@ -29,7 +29,7 @@ - 真实凭据(`TEST_DB_PASSWORD` / `ZENTAO_PASSWORD` / `WECHAT_WEBHOOK_URL` 等)只放 `.env` 或 GitHub Secrets / Jenkins Credentials - 修改 `.env.example` 加新字段时,必须同步 `conftest.py::EnvConfig` 与 `docs/getting-started/配置清单.md` -## 同步链路(宪章§1 同步铁律) +## 同步链路(宪章同步规则) 修改本目录任一配置文件时,**必须**联动检查: diff --git a/deploy/config/conftest.py b/deploy/config/conftest.py index 0739e0a0..5437353a 100644 --- a/deploy/config/conftest.py +++ b/deploy/config/conftest.py @@ -37,7 +37,7 @@ for _utils_dir in _UTILS_CANDIDATES: if _utils_dir.is_dir() and str(_utils_dir) not in sys.path: sys.path.insert(0, str(_utils_dir)) - # utils 子目录也注入 — V1.x 重组后 utils/ 下 12 子目录 + # utils 子目录也注入 — 重组后 utils/ 下 12 子目录 for _sub in _utils_dir.iterdir(): if _sub.is_dir() and not _sub.name.startswith(("_", ".")): if str(_sub) not in sys.path: diff --git a/deploy/config/llm-providers.md b/deploy/config/llm-providers.md index 3dfc8bb8..ab591948 100644 --- a/deploy/config/llm-providers.md +++ b/deploy/config/llm-providers.md @@ -11,12 +11,12 @@ 1. 选厂商 (路径 A 内置 6 / 路径 B 兼容 5+) 2. 复制对应 export 3. `tagent demo` 验路由 -- **实测有效** (V1.0.0): Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79) +- **实测有效** : Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79) - **适用场景**: - 离线本地 = Ollama / Qwen - 国内合规 = 智谱 / 豆包 / 通义 - 性价比 = DeepSeek / Kimi - - 主备 fallback = 多 provider 接管 (见 §3) + - 主备 fallback = 多 provider 接管 (见 ) --- @@ -164,7 +164,7 @@ export TAGENT_LLM_API_KEY=<your_key> --- -## 3 · .env 文件示例 +## 3 .env 文件示例 将选定厂商配置写入项目根 `.env` (从 `.env.example` 复制后填): diff --git a/deploy/config/mcp-server-impl.md b/deploy/config/mcp-server-impl.md index 1494247d..339ec4b0 100644 --- a/deploy/config/mcp-server-impl.md +++ b/deploy/config/mcp-server-impl.md @@ -32,7 +32,7 @@ MCP server 通常通过 stdio 与 client 通信,对外暴露 tools / resources ```python # zentao_mcp_server/__main__.py -"""禅道 MCP Server 骨架(默认 BugTracker 实现示例;Jira/GitHub/GitLab/Linear/Webhook 同骨架,主宪章 §12)""" +"""禅道 MCP Server 骨架""" import asyncio import json import logging diff --git a/deploy/config/requirements.txt b/deploy/config/requirements.txt index 37f6058c..5e017e9b 100644 --- a/deploy/config/requirements.txt +++ b/deploy/config/requirements.txt @@ -147,13 +147,13 @@ defusedxml==0.7.1 # [稳定层] XXE/亿笑/decompression bomb # - kubectl(K8s 混沌测试) # ============================================================ -# V1.1.0 运行时层(`runtime/`)依赖 — 全部 [可选] +# 运行时层(`runtime/`)依赖 — 全部 [可选] # 仅启用 V1.1.0 运行时(AI 路由+Prefect+飞轮+FastAPI/CLI)时安装 # 完整列表见 runtime/pyproject.toml # ============================================================ # ===== [可选 · runtime 核心] ===== -mcp>=1.0.0 # V1.2.0 MCP SDK(6 件套 server + P2 #12 client): test-orchestrator/protocol-adapter/evidence-vault/defect-tracker/knowledge-base/compliance-checker +mcp>=1.0.0 # MCP SDK(6 件套 server + P2 #12 client): test-orchestrator/protocol-adapter/evidence-vault/defect-tracker/knowledge-base/compliance-checker # litellm>=1.55.0 # 多厂商 LLM 路由(Claude/OpenAI/Gemini/Qwen/DeepSeek/Ollama) # prefect>=2.20.0,<3.0 # 编排引擎(可选,Direct 执行器在 prefect 缺席时自动接管) # fastapi>=0.115.0 # HTTP 入口 diff --git a/deploy/config/templates/INDEX.md b/deploy/config/templates/INDEX.md index 86d3669d..a93dd116 100644 --- a/deploy/config/templates/INDEX.md +++ b/deploy/config/templates/INDEX.md @@ -1,4 +1,4 @@ -# 配置模板库索引(V1.0.0) +# 配置模板库索引 > `tagent init` 交互向导从本目录读取模板 + matrix · 5 分钟生成 `.env` + `tagent.yml` + `STARTUP.md`。 @@ -39,6 +39,6 @@ llm_providers: ## 相关 -- 主宪章 §5 多格式 I/O · §36 多端通知 canon · §37 BugTracker canon +- 多格式 I/O · 多端通知 canon · BugTracker canon - 上一级:[`../INDEX.md`](../INDEX.md) - 实现:[`../../runtime/init/INDEX.md`](../../../runtime/init/INDEX.md) diff --git a/deploy/config/templates/STARTUP.md.tpl b/deploy/config/templates/STARTUP.md.tpl index e7d294ed..0d3d2af2 100644 --- a/deploy/config/templates/STARTUP.md.tpl +++ b/deploy/config/templates/STARTUP.md.tpl @@ -45,7 +45,7 @@ tagent run "{{SAMPLE_TARGET}}" --mode learn |------|------| | `LLM 调用 raise` | 检查 API key + 网络;切 `TAGENT_LLM_PROVIDER=ollama` 离线兜底 | | `BugTracker 提交失败` | 占位没填或网络 / 权限错;不阻塞,但日报会少 | -| `通知没发出` | 至少配 1 个渠道(主宪章 §36);未配自动跳过 | +| `通知没发出` | 至少配 1 个渠道;未配自动跳过 | | `selftest n7 失败` | 装 python-docx:`pip install python-docx` | ## 7. 下一步 diff --git a/deploy/config/templates/base.env.tpl b/deploy/config/templates/base.env.tpl index 8a69ae05..d69117bb 100644 --- a/deploy/config/templates/base.env.tpl +++ b/deploy/config/templates/base.env.tpl @@ -15,10 +15,10 @@ TAGENT_LLM_PROVIDER_FALLBACK=ollama # ===== 平台 extras ===== {{PLATFORM_EXTRAS_BLOCK}} -# ===== BugTracker(主宪章 §37,默认 zentao,可换) ===== +# ===== BugTracker ===== {{BUG_TRACKER_ENV_BLOCK}} -# ===== 多端通知(主宪章 §36,任意 1 个生效即可) ===== +# ===== 多端通知 ===== {{NOTIFIER_ENV_BLOCK}} # ===== Test-Agent 运行时(通常不需改) ===== diff --git a/deploy/config/templates/base.tagent.yml.tpl b/deploy/config/templates/base.tagent.yml.tpl index 5ca4d4e9..dcb50505 100644 --- a/deploy/config/templates/base.tagent.yml.tpl +++ b/deploy/config/templates/base.tagent.yml.tpl @@ -15,7 +15,7 @@ skills: bug_tracker: primary: {{BUG_TRACKER}} - # 多 tracker 并存(主宪章 §37):写成 [zentao, github],按 Bug label 路由 + # 多 tracker 并存:写成 [zentao, github],按 Bug label 路由 # extra: [github] notifiers: @@ -28,15 +28,15 @@ quality_gates: perf_p99_ms_max: 300 selftest: - # 主宪章 §33 自检铁律 + # 自检规则 pre_tag_required: true pass_threshold: 0.80 strict_on_release: true marketplace: - enabled: false # 默认关 · 主宪章 §30 safe-by-default + enabled: false # 默认关 -# ============== SAFETY GATES · safe-by-default(主宪章 §22 / §35 + W5 sprint v2) ============== +# ============== SAFETY GATES · safe-by-default ============== # 危险操作 / 自动化 / 影响生产 的功能 必须显式开启, 否则 destructive-guard 拒绝运行。 # 详见 SECURITY.md 武器化代码使用边界 + 测试工具准入控制 节。 @@ -62,7 +62,7 @@ gateway: # 例: enabled_platforms: [telegram, feishu] pentest: - # 法律契约(default refuse · charter §35); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 + # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 authorized: false # 法律授权确认 scope_in_targets: [] # IP/domain/URL 白名单(IN) scope_out_targets: [] # 强制黑名单(覆盖 IN) diff --git a/deploy/config/templates/matrix.yaml b/deploy/config/templates/matrix.yaml index 88be4cf9..132d2548 100644 --- a/deploy/config/templates/matrix.yaml +++ b/deploy/config/templates/matrix.yaml @@ -1,4 +1,4 @@ -# tagent init 单源真理矩阵(V1.12.0) +# tagent init 单源真理矩阵 # # 测试类型 × 平台 × LLM provider × BugTracker × 通知渠道 → 生成 .env + tagent.yml + STARTUP.md # 加新选项:补这里 + 看 wizard 自动列出。 @@ -118,7 +118,7 @@ llm_providers: OLLAMA_HOST: "http://localhost:11434" model_hint: qwen2.5:7b -# 4. BugTracker(6 选,主宪章 §37) +# 4. BugTracker bug_trackers: zentao: label: 禅道(默认) @@ -160,7 +160,7 @@ bug_trackers: BUG_TRACKER: webhook BUG_WEBHOOK_URL: "<your_webhook_endpoint>" -# 5. 通知渠道(6 选,可多选,主宪章 §36) +# 5. 通知渠道 notifiers: wechat: label: 企业微信群机器人 diff --git a/deploy/marketplace/INDEX.md b/deploy/marketplace/INDEX.md index d0b7d519..81823ea3 100644 --- a/deploy/marketplace/INDEX.md +++ b/deploy/marketplace/INDEX.md @@ -1,4 +1,4 @@ -# marketplace 索引(主宪章 §30) +# marketplace 索引 > 对标 Claude Code 官方 marketplace · 4 lane 资源库:skills / agents / mcp / hooks > 用户按需下载;**4 关安全门必过**(签名 / prompt 扫 / 沙箱试跑 / darwin 评分) @@ -51,20 +51,20 @@ tagent verify <name> # 单独跑沙箱验证 tagent uninstall <name> # 卸(归档不删) ``` -## 4 关安全门(主宪章 §30 铁律) +## 4 关安全门 1. **签名校验**:SHA256 + 可选 GPG/ed25519 2. **全 prompt 扫描**:`runtime/scheduler/injection_scan.py` 复用,扫 skill 文本 3. **沙箱试跑**:`runtime/backends/docker.py` 在 Docker 内跑 24h 观察 4. **darwin 评分**:`darwin-skill` 评 ≥75 才放行 -任一不过 → 拒装 + 落 `decisions/`(§18-12) +任一不过 → 拒装 + 落 `decisions/`() ## 注意 - **不复制 Anthropic / OpenAI 源码**(品牌+协议红线) - **仅镜像 metadata + 链接**到上游 -- 卸载只**归档**到 `marketplace/.archive/`(§22 不可逆禁止) +- 卸载只**归档**到 `marketplace/.archive/` ## 配置 diff --git a/deploy/marketplace/_safety_policy.yaml b/deploy/marketplace/_safety_policy.yaml index a2eab841..04eb9f6e 100644 --- a/deploy/marketplace/_safety_policy.yaml +++ b/deploy/marketplace/_safety_policy.yaml @@ -1,4 +1,4 @@ -# Marketplace 4 关安全门(主宪章 §30) +# Marketplace 4 关安全门 # 4 关任一不过 → 拒装 + 落 decisions/ gates: @@ -52,7 +52,7 @@ trust_tiers: # 卸载策略 uninstall: - archive_only: true # 主宪章 §22 决策不可逆禁止 + archive_only: true # 决策不可逆禁止 archive_dir: marketplace/.archive/ # 紧急 kill switch diff --git a/deploy/marketplace/registry.json b/deploy/marketplace/registry.json index a8fc486a..f0649d3a 100644 --- a/deploy/marketplace/registry.json +++ b/deploy/marketplace/registry.json @@ -1,5 +1,5 @@ { - "_comment": "Marketplace registry · 4 lane(skills/agents/mcp/hooks)· 主宪章 §30. 初始为空,装时由 tagent install 写入.", + "_comment": "Marketplace registry · 4 lane(skills/agents/mcp/hooks) "version": "1.0", "last_updated": "2026-05-12", "entries": [] diff --git a/deploy/profiles/INDEX.md b/deploy/profiles/INDEX.md index f35b38fb..7232cb78 100644 --- a/deploy/profiles/INDEX.md +++ b/deploy/profiles/INDEX.md @@ -1,4 +1,4 @@ -# profiles/ 索引(V1.10.0) +# profiles/ 索引 > 行业合规 profile 配置 · 测试时按行业自动加载额外规则。 @@ -23,5 +23,5 @@ ## 相关 - 上一级:[`../README.md`](../../README.md) -- 主宪章 §17(九大簇维度边界)+ §25(渗透 & 安全)+ §26(车载 & 自动驾驶) +- (九大簇维度边界)+ (渗透 & 安全)+ (车载 & 自动驾驶) - 加载实现:`runtime/config/settings.py` profile 字段 diff --git a/deploy/profiles/compliance/INDEX.md b/deploy/profiles/compliance/INDEX.md index ae51ccb0..8b23d47f 100644 --- a/deploy/profiles/compliance/INDEX.md +++ b/deploy/profiles/compliance/INDEX.md @@ -1,7 +1,7 @@ # profiles/compliance 索引 > 行业合规规则库插槽。每个 YAML 文件 = 一个合规框架的检查项集。 -> 真规则由领域专家提供;本目录仅含**空载示例**,V1.2.0 起步。 +> 真规则由领域专家提供;本目录仅含**空载示例** ## 已提供示例(空载) @@ -35,5 +35,5 @@ checks: ## 接入方式 -L4 级被测项(charter §21 深度准则 L4)必须通过 `mcp-compliance-checker.check_compliance(profile, run_id)` 验证。 -真规则文件由领域专家+test-lead 双签签字后入库(charter §10 五条铭文 + §15 AgentChat 协议)。 +L4 级被测项(charter 深度准则 L4)必须通过 `mcp-compliance-checker.check_compliance(profile, run_id)` 验证。 +真规则文件由领域专家+test-lead 双签签字后入库(charter 五条铭文 + AgentChat 协议)。 diff --git a/docs/INDEX.md b/docs/INDEX.md index ebfa20c0..c614991e 100644 --- a/docs/INDEX.md +++ b/docs/INDEX.md @@ -8,7 +8,7 @@ |--------------|------|--------| | `STYLE.md` | 全仓 Markdown 样式约定(标题/加粗/列表/表格/代码块/链接/emoji/命名/中英混排) | 写文档前必看一次 | | `SURVEY.md` | 12 题用户调研模板(NPS + skill 使用率) | 想发用户问卷时 | -| `theory/` | **教学层 KB 13 大类**(主宪章 §23 §31) · `01-tools` ~ `13-build-your-own` | learn mode 推荐路径 | +| `theory/` | **教学层 KB 13 大类** · `01-tools` ~ `13-build-your-own` | learn mode 推荐路径 | | `charter/` | 愿景宪章 7 子文件(vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license) | 项目设计原理深读 | | `case-studies/` | 实施 case study(timeline + 决策 + metrics + 教训) | 想看真实项目演进决策 | | `assets/` | 演示素材 · `demo.recipe.md`(30 秒 demo 录制脚本)+ `terminalizer-config.yml` | 录演示视频时 | @@ -28,5 +28,5 @@ ## 相关 - 上一级:[`../README.md`](../README.md) -- 主宪章 §23(教学层准则)+ §31(KB 扩 13 大类) +- (教学层准则)+ (KB 扩 13 大类) - 样式约束:`.pre-commit-config.yaml` markdownlint hook(MD001/MD036) diff --git a/docs/MASTER_PLAN.md b/docs/MASTER_PLAN.md index 9f4d5cf8..e0318605 100644 --- a/docs/MASTER_PLAN.md +++ b/docs/MASTER_PLAN.md @@ -61,8 +61,8 @@ ### #7 Karpathy 诚实化 - **文件**: `README.md` + `README.zh-CN.md` — "8640 combinations" → "~12 common combinations tested in CI"; "95% aspirational" → "Coverage is broad but not exhaustive" -- **文件**: `00-项目导航.md` — 移除 9 处 "主宪章 §X" 引用 -- **文件**: `ROADMAP.md` — 移除 3 处 "主宪章" 引用 +- **文件**: `00-项目导航.md` — 移除 9 处 "X" 引用 +- **文件**: `ROADMAP.md` — 移除 3 处 "" 引用 - **文件**: `utils/generate_report.py` — `generate_test_report()` 143→30 行, 提取 6 helper - **文件**: `utils/mobile_driver.py` — `run_monkey()` 107→55 行, 提取 2 helper - **文件**: `runtime/router/llm_client.py` — `_stub_response()` 77 行 if/elif → dispatch table 8 条目 diff --git a/docs/STYLE.md b/docs/STYLE.md index 2bc183b5..76b4d322 100644 --- a/docs/STYLE.md +++ b/docs/STYLE.md @@ -21,7 +21,7 @@ |------|-----| | 关键术语首次出现 | "采用 **遍历性检验**:失败能否重来" | | 表格表头(可选) | 表头单元格内的术语 | -| 警告 / 铁律前缀 | "**铁律**:敏感文件不入 repo" | +| 警告 / 规则前缀 | "**规则**:敏感文件不入 repo" | **禁止**: - 整段加粗(用 blockquote `>` 代替) diff --git a/docs/assets/demo-script-v1.12.md b/docs/assets/demo-script-v1.12.md index ea1cbc4a..1c1ea46b 100644 --- a/docs/assets/demo-script-v1.12.md +++ b/docs/assets/demo-script-v1.12.md @@ -1,4 +1,4 @@ -# 30 秒 demo · 录制脚本(V1.12 配置自动组装) +# 30 秒 demo · 录制脚本 > 目标:让观众在 30 秒内看到"从 0 到测试报告"完整链路 · 用于推特 / 微信视频号 / 掘金 / Hacker News > 录制工具:[Terminalizer](https://terminalizer.com) / [asciinema](https://asciinema.org) / OBS 屏幕录制 @@ -37,7 +37,7 @@ cat STARTUP.md | head -30 # Step 4 · 健康检查(秒过) tagent doctor --agents -# Step 5 · 跑 demo(V1.13 加 · 全 stub LLM 0 成本) +# Step 5 · 跑 demo tagent demo # Step 6 · 看产物(树形) @@ -112,6 +112,6 @@ terminalizer render demo --output docs/assets/demo.mp4 --quality 80 ## 相关 -- 项目宪章 §1 一键部署 · §38 配置自动组装 canon · §5 多格式 I/O +- 项目宪章 一键部署 · 配置自动组装 canon · 多格式 I/O - Terminalizer 配置:[`terminalizer-config.yml`](terminalizer-config.yml) -- 录制原 recipe:[`demo.recipe.md`](demo.recipe.md)(V1.7 起占位) +- 录制原 recipe:[`demo.recipe.md`](demo.recipe.md) diff --git a/docs/assets/terminalizer-config.yml b/docs/assets/terminalizer-config.yml index 781e71ad..9b8dfb2b 100644 --- a/docs/assets/terminalizer-config.yml +++ b/docs/assets/terminalizer-config.yml @@ -7,7 +7,7 @@ command: bash scripts/_demo-commands.sh -cwd: . +cwd:. env: recording: true diff --git "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" index 703d6147..3a634bfa 100644 --- "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" +++ "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" @@ -1,4 +1,4 @@ -# Case Study: 文档诚实化 + 防 mock 闭环 (V1.14.0+1) +# Case Study: 文档诚实化 + 防 mock 闭环 > **时间**: 2026-05-13 → 2026-05-15 > **范围**: V1.14.0 → V1.14.0+1 @@ -82,7 +82,7 @@ X4 真正核心 = **skill 防 mock + 单源化**, 不是单纯 router 过滤。 ### Phase 4: stub 设计不一致根治 (PR #68) -X4 验时发现 pre-existing baseline 坏测试 `test_router_ends_with_report_generator`。根因: V1.14 把 `test-lead` 加到 stub 的 web-system path 末 (按主宪章 §40 "测试主管 — 协调 + 最终上线建议"), 但 mobile-app / desktop-app / ai-model / rest-api 4 path 未同步。 +X4 验时发现 pre-existing baseline 坏测试 `test_router_ends_with_report_generator`。根因: V1.14 把 `test-lead` 加到 stub 的 web-system path 末 , 但 mobile-app / desktop-app / ai-model / rest-api 4 path 未同步。 修法: 5 path 末统一 `test-lead` 决策 (与 `agents/README.md` L20-31 流程对齐): @@ -136,7 +136,7 @@ f6 假阳性 3 问全否后撤项。 ### 决策 4: stub 5 path 统一 test-lead (而非容错测试) -`test_router_ends_with_report_generator` 旧测试错, 不是 stub 错。改修 stub (V1.14 漏改 4 path 同步) + rename 测试, 与 `agents/README.md` 流程对齐。 +`test_router_ends_with_report_generator` 旧测试错, 不是 stub 错。改修 stub + rename 测试, 与 `agents/README.md` 流程对齐。 ## 5. 教训 / 复用 pattern @@ -151,13 +151,13 @@ f6 假阳性 3 问全否后撤项。 - 协作章程 (六道闸 + f1-f6): 项目内部文档 - utils env-var gate v2 范式 (5 utils 实战沉淀): 项目内部工程模式文档 - LLM-as-judge 弱模型 prompt 工程 (4 层 prompt + 顺序敏感 few-shot): 项目内部工程模式文档 -- 防 mock 闭环 4-step pattern: 本文 §3 闭环图, 待跨项目复用 +- 防 mock 闭环 4-step pattern: 本文 闭环图, 待跨项目复用 ## 7. 未做 / 后续 | 项 | 状态 | |---|---| -| X4.5 (production skill no-op) | NOT-A-BUG (本 case study §4 决策 3) | +| X4.5 (production skill no-op) | NOT-A-BUG (本 case study 决策 3) | | TG 跨项目迁移 (协作宪章 v6 + utils gate v2) | 待启 | | Mac 真机 install.sh 实测 | 待启 (Linux Docker 已通 PR #60/#61/#62) | | V1.15 sprint (env-manager LLM-driven minimum viable) | 防 mock 前置已落, 可直接进入 expert 实装 | diff --git a/docs/case-studies/INDEX.md b/docs/case-studies/INDEX.md index 28dba174..1350ac91 100644 --- a/docs/case-studies/INDEX.md +++ b/docs/case-studies/INDEX.md @@ -10,11 +10,11 @@ ## 复用导引 -- 看「**多源核对纪律**」→ 2026-05-15 §5 教训 1 -- 看「**单源化优于双源**」→ 2026-05-15 §5 教训 2 -- 看「**一文件一审 + f1-f6**」→ 2026-05-15 §5 教训 3 (协作章程) -- 看「**范围修订要敢承认错估**」→ 2026-05-15 §5 教训 4 -- 看「**防 mock 闭环 4-step pattern**」→ 2026-05-15 §3 闭环图 +- 看「**多源核对纪律**」→ 2026-05-15 教训 1 +- 看「**单源化优于双源**」→ 2026-05-15 教训 2 +- 看「**一文件一审 + f1-f6**」→ 2026-05-15 教训 3 (协作章程) +- 看「**范围修订要敢承认错估**」→ 2026-05-15 教训 4 +- 看「**防 mock 闭环 4-step pattern**」→ 2026-05-15 闭环图 ## 相关 diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md index 9be1f7b3..6d474614 100644 --- a/docs/charter/01-vision-dimensions.md +++ b/docs/charter/01-vision-dimensions.md @@ -6,7 +6,7 @@ ## 🏛️ 项目宪章(灵魂底色) -> 三公理 + 五条铭文 + 工程映射 + V1.0.0 锁死 + 双签解锁条件 — 已迁入主宪章 §10(memory `project_test_agent_workflow.md`),FULL_GUIDE 不再重复维护。 +> 三公理 + 五条铭文 + 工程映射 + V1.0.0 锁死 + 双签解锁条件 — 已迁入(memory `project_test_agent_workflow.md`),FULL_GUIDE 不再重复维护。 --- @@ -35,12 +35,12 @@ > 工程矩阵之下的认知地图——回答"测试 Agent 到底需要哪些维度的能力"。各簇能力的工程落点散布在「核心特性」「全链路覆盖矩阵」「关键模块清单」中。 > **接入策略**:簇 1-2 为 V1.0.0 主交付;簇 3-5 部分进入工程矩阵;簇 6-9 多数列入 Phase 2-4 路线图,**承认存在但不假装能立刻交付**——符合第三公理。 -### 簇 1 · 工程与架构层(V1.0.0 主体) +### 簇 1 · 工程与架构层 - 七阶段工作流:需求理解 → 用例生成 → 执行 → 观测 → 根因 → 反馈 → 治理 - 自动化工具栈、Agent 协作协议、用户交互界面 - 决策回放器、停机演练、可观测性递归 -### 簇 2 · 认知暗物质层(V1.0.0 部分 + Phase 3 补全) +### 簇 2 · 认知暗物质层 - 认知债务(被遗忘的 Why) - 跨系统嗅觉(上下游气味相投) - 沉默故障(不报警的恶化) @@ -53,7 +53,7 @@ - 制度性愚蠢抗体 - 生态位"暗杀"攻击建模 -### 簇 4 · 抽象与元层(V1.0.0 部分) +### 簇 4 · 抽象与元层 - 预兆感知(弱信号 + 拓扑同调) - 反目标函数(对测试本身的测试) - 语言游戏(语义歧义放大器) @@ -61,7 +61,7 @@ - 测试热寂与熵减祭司 - 本体论测试(数字孪生 vs 物理承诺) -### 簇 5 · 行业元逻辑层(V1.0.0 参照表 + Phase 2 选定 MVP) +### 簇 5 · 行业元逻辑层 - 金融=承诺守恒、医疗=伤害可逆、法律=边界例外 - 教育=认知脚手架、农业=优雅降级、艺术=避免审查官 - 自动驾驶/机器人=物理承诺 diff --git a/docs/charter/03-agentchat-protocol.md b/docs/charter/03-agentchat-protocol.md index 2a5d53e8..658edece 100644 --- a/docs/charter/03-agentchat-protocol.md +++ b/docs/charter/03-agentchat-protocol.md @@ -130,6 +130,6 @@ agent 在三种情况**必须停下反问用户**,不允许猜: --- ``` -**铁律**:争议未落档 → 不允许 Bug 单关闭、不允许测试报告签发、不允许测试计划评审通过。 +**规则**:争议未落档 → 不允许 Bug 单关闭、不允许测试报告签发、不允许测试计划评审通过。 --- diff --git a/docs/charter/05-install-deploy.md b/docs/charter/05-install-deploy.md index 29632a77..9fc38790 100644 --- a/docs/charter/05-install-deploy.md +++ b/docs/charter/05-install-deploy.md @@ -236,7 +236,7 @@ cursor # Cursor ## 🔐 闭环约定(设计原则) -> 18 条全栈闭环约定(数据/cov/重试/severity/error_rate/基线/门禁/MCP/prod禁/Flaky/铭文/决策追溯/三筐/修改四关/工具兼容/纪要不可删/自进化棘轮/依赖补装)— 已迁入主宪章 §19,FULL_GUIDE 不再重复维护。 +> 18 条全栈闭环约定(数据/cov/重试/severity/error_rate/基线/门禁/MCP/prod禁/Flaky/铭文/决策追溯/三筐/修改四关/工具兼容/纪要不可删/自进化棘轮/依赖补装)— 已迁入,FULL_GUIDE 不再重复维护。 --- diff --git a/docs/charter/06-test-architecture.md b/docs/charter/06-test-architecture.md index 8741f1c1..8345f95a 100644 --- a/docs/charter/06-test-architecture.md +++ b/docs/charter/06-test-architecture.md @@ -204,7 +204,7 @@ | # | 议题 | | | |---|------|---------|------| | | | | | -| Q2 | Agent 架构:单体 vs 专 | | V1.0.0 选专科 + test-lead 中枢 | +| Q2 | Agent 架构:单体 vs 专 | | V1.0.0 选专科 + test-lead 中枢 | | Q3 | 五条铭文的技术实现机制(不可变区域、熔断条件)? | 🔄 | V1.0.0 铭文锁死,无削弱机制;Phase 4 接入合规行业后重新设计 | | Q4 | 独立审计署的法律实体形态? | ⏳ | 触发条件:团队 ≥ 20 人 或 接入合规行业 | | Q5 | 末日哨兵权的触发授权链? | ⏳ | 需监管/学界共识,Phase 4 | diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md index 229cae6b..065080ff 100644 --- a/docs/charter/07-runtime-license.md +++ b/docs/charter/07-runtime-license.md @@ -6,7 +6,7 @@ ## 🧠 V1.36.0 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill / 79 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 +> 已有 16 专家 / 32 Skill / 79 utils**不动**(宪章规则),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 > 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。 ### 模块拓扑 @@ -113,7 +113,7 @@ uvicorn runtime.api.main:app --port 8800 | 79 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | | `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` | -任何专家/Skill/脚本**新增**或**修改**仍按宪章 §1 同步铁律走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 +任何专家/Skill/脚本**新增**或**修改**仍按宪章 同步规则走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增执行能力,详见 [ROADMAP.md](../../ROADMAP.md)。 --- @@ -121,9 +121,9 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ## 📜 LICENSE / CHANGELOG / CONTRIBUTING / SECURITY - **LICENSE**:MIT(详见 [`LICENSE`](../../LICENSE)) -- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md)(V1.43.0 / Phase 3+4+5 落版 + 32/32 skill active 全 production) +- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md) - **VERSION**:详见 [`VERSION`](../../VERSION) -- **CONTRIBUTING**:详见 [`CONTRIBUTING.md`](../../CONTRIBUTING.md)(含同步铁律 + RACI 矩阵) +- **CONTRIBUTING**:详见 [`CONTRIBUTING.md`](../../CONTRIBUTING.md)(含同步规则 + RACI 矩阵) - **SECURITY**:详见 [`SECURITY.md`](../../SECURITY.md)(漏洞报告流程 + GitHub Security Advisories 入口) - **CODE_OF_CONDUCT**:详见 [`CODE_OF_CONDUCT.md`](../../CODE_OF_CONDUCT.md)(基于 Contributor Covenant 2.1) @@ -133,9 +133,9 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 当前阶段(最后更新:2026-05-18) -- **Phase**:Phase 2 前期(V1.36.0 · expert rollout 收尾 + skill rollout 全 16/16 完成) +- **Phase**:Phase 2 前期(expert rollout 收尾 + skill rollout 全 16/16 完成) - **关键已交付**:16 expert (11p+5s) · 32 skill (23p+7s+0r+2v) · AgentChat · Bug 多适配 · 按需安装 · darwin-skill · MCP 6 件套 · Marketplace · 教学层 · 多 LLM config · 16 SkillRunner 全落地 -- **活跃 PR**:#124-#127 merged(V1.34-V1.36, 2026-05-18) +- **活跃 PR**:#124-#127 merged ### 历史关键决议 diff --git "a/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" "b/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" index 93b9e9af..799d1e5b 100644 --- "a/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" +++ "b/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" @@ -233,7 +233,7 @@ workspace/测试报告/{项目名}/baselines/perf_baseline.json ← 性能基 --- -## V1.1.0 · 运行时层交付物 +## · 运行时层交付物 | 场景 | 路径 / 出口 | 来源 | |------|------------|------| diff --git "a/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" index 3b7a8712..70fc8a08 100644 --- "a/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -654,7 +654,7 @@ python -m utils.jmeter_result_parser \ --- -## V1.1.0 · 运行时 CLI/API 用法 +## · 运行时 CLI/API 用法 > 运行时层 `runtime/` 是 V1.1.0 新增。已有 16 专家 / 32 Skill / 79 脚本不动,本层仅作可执行调度。 diff --git "a/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" index 030c31a8..01a6928a 100644 --- "a/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -636,7 +636,7 @@ python -c "from utils.api_retry_util import call_with_retry; print('utils OK')" --- -## V1.1.0 · 运行时层 `runtime/` 部署 +## · 运行时层 `runtime/` 部署 > 运行时层是可选的(用户也可只用 16 专家+32 Skill+79 脚本的 V1.0.0 工作流模式)。 > 想要 HTTP/CLI 一键跑、AI 路由、飞轮存储,启它。 diff --git "a/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" "b/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" index e1d60c14..bc11d16c 100644 --- "a/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" +++ "b/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" @@ -407,7 +407,7 @@ pip install -U -r requirements.txt --- -## V1.1.0 · 运行时层配置(`runtime/`) +## · 运行时层配置(`runtime/`) ### `.env` 增项 diff --git a/docs/theory/03-foundation/test-pyramid-2024.zh.md b/docs/theory/03-foundation/test-pyramid-2024.zh.md index 77d9c349..c0901d02 100644 --- a/docs/theory/03-foundation/test-pyramid-2024.zh.md +++ b/docs/theory/03-foundation/test-pyramid-2024.zh.md @@ -10,11 +10,11 @@ authority: - "Mike Cohn 2009《Succeeding with Agile》ch.16(原版金字塔)" - "Martin Fowler https://martinfowler.com/articles/practical-test-pyramid.html" - "Google Testing Blog 2024 doc on pyramid" - - ISO/IEC 25010 §4 Quality Model + - ISO/IEC 25010 Quality Model confidence: high last_reviewed: 2026-05-12 reviewer: agent-curator -when_to_use: 任何项目测试架构设计前;主宪章 §17 落点 +when_to_use: 任何项目测试架构设计前;落点 common_pitfall: - "70/20/10 一刀切——按变更频率+阻塞代价重新分布才合理" - "把视觉回归当成独立层——它属 E2E" @@ -55,4 +55,4 @@ reading_en: - 单元最便宜最快 → 多写 - E2E 最贵最脆 → 少写 - 契约层填补微服务断点 → 必有 -- 变异分数 ≠ 覆盖率(主宪章 §21 横切准则)→ 用例质量反验 +- 变异分数 ≠ 覆盖率→ 用例质量反验 diff --git a/docs/theory/04-strategy/shift-left.zh.md b/docs/theory/04-strategy/shift-left.zh.md index 6848d151..94ff8301 100644 --- a/docs/theory/04-strategy/shift-left.zh.md +++ b/docs/theory/04-strategy/shift-left.zh.md @@ -13,7 +13,7 @@ authority: confidence: high last_reviewed: 2026-05-12 reviewer: agent-curator -when_to_use: 任何项目;主宪章 §17 测试架构 7 层左移 +when_to_use: 任何项目;测试架构 7 层左移 common_pitfall: - "只测开发末端 → 修复成本爆炸" - "需求阶段不评审 → 测试用例无所依" diff --git a/docs/theory/05-methods/equivalence-partitioning.zh.md b/docs/theory/05-methods/equivalence-partitioning.zh.md index 2ddd0da1..76e06499 100644 --- a/docs/theory/05-methods/equivalence-partitioning.zh.md +++ b/docs/theory/05-methods/equivalence-partitioning.zh.md @@ -56,5 +56,5 @@ reading_en: - 与边界值配合,90% 输入缺陷可覆盖 ## 反模式 -- 漏无效类(只测好路径)= **测试不诚信**(主宪章 §21 取舍铁律) +- 漏无效类(只测好路径)= **测试不诚信** - 类切太细 = 用例数爆炸,失去抽象价值 diff --git a/docs/theory/06-protocols/http-https.zh.md b/docs/theory/06-protocols/http-https.zh.md index 5b540cc9..cdd589e4 100644 --- a/docs/theory/06-protocols/http-https.zh.md +++ b/docs/theory/06-protocols/http-https.zh.md @@ -56,7 +56,7 @@ Web/API 测试**必经协议**。Test-Agent `utils/api_retry_util.py` 提供 10/ | 性能 | TTFB / P95 / TPS(JMeter / k6) | | 安全 | OWASP API Top 10 / TLS 配置 | | 幂等性 | Idempotency-Key 重复提交 | -| 重试 | 指数退避(主宪章 §18-3 全栈 10/20/40s) | +| 重试 | 指数退避 | ## 为什么 Agent 默认调 HTTP? 被测物 = Web/REST/GraphQL/Webhook/SOAP → 全部跑在 HTTP 之上;Agent 用 `runtime/mcp/protocol_adapter` 的 HTTP adapter 做协议层抽象。 diff --git a/docs/theory/08-gates/flaky-vs-reruns.zh.md b/docs/theory/08-gates/flaky-vs-reruns.zh.md index b903d2de..7e05abeb 100644 --- a/docs/theory/08-gates/flaky-vs-reruns.zh.md +++ b/docs/theory/08-gates/flaky-vs-reruns.zh.md @@ -9,7 +9,7 @@ one_liner_en: No reruns at smoke (preserve flaky signal); reruns at regression ( authority: - "Google Testing Blog: Flaky Tests at Google and How We Mitigate Them" - "pytest-rerunfailures docs" - - ISTQB Advanced Test Manager §6 Risk-based Testing + - ISTQB Advanced Test Manager Risk-based Testing confidence: high last_reviewed: 2026-05-12 reviewer: agent-curator diff --git a/docs/theory/12-process/bug-lifecycle.zh.md b/docs/theory/12-process/bug-lifecycle.zh.md index 9f78b9b0..8ef2db16 100644 --- a/docs/theory/12-process/bug-lifecycle.zh.md +++ b/docs/theory/12-process/bug-lifecycle.zh.md @@ -26,7 +26,7 @@ example: | └→ Deferred reopen: Closed → Open(失败回归触发) - 严重度(技术影响):1=P0 / 2=P1 / 3=P2 / 4=P3(主宪章 §18-4) + 严重度(技术影响):1=P0 / 2=P1 / 3=P2 / 4=P3 优先级(业务紧急):同样四级,但**与严重度独立** related_to: [rca-5why-8d-fishbone, bug-tracker-adapters] --- @@ -40,7 +40,7 @@ Test-Agent **统一权威**:`utils/bug_severity_map.py` 跨 5 adapter(zentao/jir - **优先级**:修复紧急度(老板用 vs 客户偶遇) - **两者独立**:可能"P3 优先级 + P0 严重度"(数据丢失但只影响 1% 用户) -## RCA 标准方法(主宪章 §21 流程阶段 6) +## RCA 标准方法 - 5Why:连问 5 个为什么 - 8D:8 步纪律(团队+护栏+遏制+根因+永久解+预防) - Fishbone:鱼骨图分类(人/机/料/法/环/测) diff --git a/docs/theory/13-build-your-own/INDEX.md b/docs/theory/13-build-your-own/INDEX.md index 1a0ff686..b3ab4aa2 100644 --- a/docs/theory/13-build-your-own/INDEX.md +++ b/docs/theory/13-build-your-own/INDEX.md @@ -19,10 +19,10 @@ ## 横切准则 -- 每卡 `estimated_time_hours` 必填(防 §27 原则 4 时间陷阱) -- confidence 默认 `medium`(tutorial 质量参差,§23 KB) +- 每卡 `estimated_time_hours` 必填(防 原则 4 时间陷阱) +- confidence 默认 `medium`(tutorial 质量参差 - 引用 URL 必带 1 句摘要(防链接失效) -- `essence_only` 标:不自动提议入 Test-Agent(§29 policy) +- `essence_only` 标:不自动提议入 Test-Agent(policy) ## 不收录 diff --git a/docs/theory/13-build-your-own/byox-bot.zh.md b/docs/theory/13-build-your-own/byox-bot.zh.md index a552fd9e..37e10c4a 100644 --- a/docs/theory/13-build-your-own/byox-bot.zh.md +++ b/docs/theory/13-build-your-own/byox-bot.zh.md @@ -24,8 +24,8 @@ reading_en: ["https://core.telegram.org/bots/api"] # 对测试工作 -- **gateway 测试**(本项目 §22 runtime/gateway 8 平台):理解 webhook 校验 + 限流 + retry +- **gateway 测试**(本项目 runtime/gateway 8 平台):理解 webhook 校验 + 限流 + retry - **消息回调测试**:测平台超时 / 重试策略 -- **scheduler + bot**(§22):懂 webhook 才能测自动化日报推送 +- **scheduler + bot**():懂 webhook 才能测自动化日报推送 - **垃圾消息防御**:bot 必测 rate-limit + 签名校验 - **bot 模拟器**:用 from-scratch bot 当测试 mock diff --git a/docs/theory/13-build-your-own/byox-programming-language.zh.md b/docs/theory/13-build-your-own/byox-programming-language.zh.md index 2dc74447..3b57f167 100644 --- a/docs/theory/13-build-your-own/byox-programming-language.zh.md +++ b/docs/theory/13-build-your-own/byox-programming-language.zh.md @@ -27,4 +27,4 @@ reading_en: ["https://craftinginterpreters.com/"] - **DSL 测试**:测试配置语言 / 业务规则引擎 - **编译器 bug**:测试代码生成正确性(差分测试) - **解析器测试**:本项目 `runtime/router/schema.py` 用 Pydantic 解析 LLM JSON,理解 grammar 才能写好 schema -- **§28 ECC agent-introspection-debugging** 调试 LLM 输出 = 解析 LLM 类自然语言"DSL" +- **ECC agent-introspection-debugging** 调试 LLM 输出 = 解析 LLM 类自然语言"DSL" diff --git a/docs/theory/13-build-your-own/byox-search-engine.zh.md b/docs/theory/13-build-your-own/byox-search-engine.zh.md index c2faa1a0..2728f6e2 100644 --- a/docs/theory/13-build-your-own/byox-search-engine.zh.md +++ b/docs/theory/13-build-your-own/byox-search-engine.zh.md @@ -25,6 +25,6 @@ reading_en: ["https://nlp.stanford.edu/IR-book/"] # 对测试工作 - **RAG 测试**:理解检索召回率/精度 → 设计 Jaccard@k / nDCG eval -- **混合检索**(本项目 §24):懂 BM25 + vector 才能融合 fork +- **混合检索**(本项目 ):懂 BM25 + vector 才能融合 fork - **KB 测试**:本项目 docs/theory KB 检索质量评测 - **gbrain 精髓** § 1.3 混合检索 4 路落地的理论基础 diff --git a/docs/theory/13-build-your-own/byox-web-browser.zh.md b/docs/theory/13-build-your-own/byox-web-browser.zh.md index 0f6939c0..483567d3 100644 --- a/docs/theory/13-build-your-own/byox-web-browser.zh.md +++ b/docs/theory/13-build-your-own/byox-web-browser.zh.md @@ -27,4 +27,4 @@ reading_en: ["https://browser.engineering/"] - **视觉回归**:理解 layout/paint → 知道为什么字体抖动 / DPR / 动画导致 SSIM 不稳 - **Web Vitals**(LCP/FID/CLS/INP):懂渲染管线才能优化 - **selector 失效**(M1-10 web-demo bug):懂 DOM 才知道用 `role` / `text` 而非 CSS class -- **§28 ECC e2e-testing skill** 落地的底层 +- **ECC e2e-testing skill** 落地的底层 diff --git a/docs/theory/INDEX.md b/docs/theory/INDEX.md index d9f5e4fc..bdd2f308 100644 --- a/docs/theory/INDEX.md +++ b/docs/theory/INDEX.md @@ -1,7 +1,7 @@ -# Theory KB 索引(主宪章 §23 教学层准则) +# Theory KB 索引 > Test-Agent 部署后的**学习知识库**。用户进入学习模式时,Agent 依此 KB 提供权威解释。 -> 主宪章 §23 铁律:**LLM 不得编造 KB 外的引用**(防幻觉 L1 层)。 +> 规则:**LLM 不得编造 KB 外的引用**(防幻觉 L1 层)。 ## 双语支持 @@ -9,7 +9,7 @@ - `--lang zh-en` 双栏对照(学英文用) - UI i18n 独立(`runtime/web/src/locales/`) -## 12 大类(主宪章 §23) +## 12 大类 | # | 分类 | 目录 | 主题示例 | |---|------|------|---------| @@ -43,7 +43,7 @@ - `confidence`(high/medium/low/**llm-draft-unreviewed**) - `last_reviewed` + `reviewer` -## 反幻觉 3 层(主宪章 §23) +## 反幻觉 3 层 | 层 | 机制 | |----|------| diff --git a/docs/theory/_authority_sources.yaml b/docs/theory/_authority_sources.yaml index f9002ebe..4a7ffe50 100644 --- a/docs/theory/_authority_sources.yaml +++ b/docs/theory/_authority_sources.yaml @@ -1,4 +1,4 @@ -# 权威源白名单(Q4-D 全要)· 主宪章 §23 铁律 +# 权威源白名单(Q4-D 全要) # LLM 在 learn mode 引用必须出自此列表;否则标 confidence=llm-draft-unreviewed. international: diff --git a/docs/theory/_schema.yaml b/docs/theory/_schema.yaml index b61e699a..808e0e60 100644 --- a/docs/theory/_schema.yaml +++ b/docs/theory/_schema.yaml @@ -1,6 +1,6 @@ -# Theory KB card schema (charter §23). +# Theory KB card schema (charter ). # 每张卡片(*.zh.md / *.en.md) frontmatter 必须满足此 schema. -# 主宪章 §23 铁律:LLM 在 learn mode 下只能引用 KB 中存在的 id;不在 KB 的概念输出"该领域未收录"。 +# 规则:LLM 在 learn mode 下只能引用 KB 中存在的 id;不在 KB 的概念输出"该领域未收录"。 required_fields: id: diff --git a/examples/INDEX.md b/examples/INDEX.md index 783ddb1b..423eb12f 100644 --- a/examples/INDEX.md +++ b/examples/INDEX.md @@ -21,7 +21,7 @@ - 想加新 demo(API / 移动 / 桌面 / IoT 等):新建子目录,写 `README.md` 说明启动方式 - demo 不应含真实凭据 / 真实客户数据 → 一律占位 `<YOUR_*>` 或 `.env.example` -## 私有边界(V1.10 起强制) +## 私有边界 - **禁止**:真实客户 PRD 样本入 `examples/` - **占位**:用 `_template_*` 前缀(如 `_template_login_prd.md`) @@ -30,4 +30,4 @@ ## 相关 - 上一级:[`../README.md`](../README.md) -- 主宪章 §0(开源约束)+ §29(精髓库隔离)+ §34(精髓库防误入,V1.10) +- (开源约束)+ (精髓库隔离)+ (精髓库防误入 diff --git a/examples/_smoke_prd.md b/examples/_smoke_prd.md index 67f61470..634740d5 100644 --- a/examples/_smoke_prd.md +++ b/examples/_smoke_prd.md @@ -1,6 +1,6 @@ # Smoke PRD · 登录模块(fixture) -> Test-Agent 自检 fixture · 主宪章 §33 · 改了请同步更新 `selftest` 期望产出。 +> Test-Agent 自检 fixture > **此文件不代表任何真实项目**,纯为 e2e 流程验证。 ## 1. 背景 diff --git a/runtime/INDEX.md b/runtime/INDEX.md index a4817782..596e08e6 100644 --- a/runtime/INDEX.md +++ b/runtime/INDEX.md @@ -1,6 +1,6 @@ # runtime 索引 -> Test-Agent 运行时层(V1.1.0 新增)。 +> Test-Agent 运行时层。 > 顶层导航见根目录 `00-项目导航.md`;运行时完整章节见 `docs/charter/07-runtime-license.md`;架构设计见 [`ARCHITECTURE.md`](ARCHITECTURE.md)。 ## 定位 diff --git a/runtime/api/deps.py b/runtime/api/deps.py index 463041fe..081dc01e 100644 --- a/runtime/api/deps.py +++ b/runtime/api/deps.py @@ -36,7 +36,7 @@ def decide(self, artifact: TargetArtifact, *, vote_providers: list[str] | None = # ---------- run lifecycle ---------- def submit(self, artifact: TargetArtifact, *, persist: bool = True) -> tuple[str, RoutingDecision]: decision = self.decide(artifact) - # V1.14 主宪章 §40 — 把原始 artifact 文本注入每节点 inputs,让 AgentRunner 拿得到 + # 把原始 artifact 文本注入每节点 inputs,让 AgentRunner 拿得到 full_text = artifact.text or "" if not full_text and artifact.path: p = Path(artifact.path) diff --git a/runtime/api/main.py b/runtime/api/main.py index 0128bc09..8bbdd7e9 100644 --- a/runtime/api/main.py +++ b/runtime/api/main.py @@ -126,7 +126,7 @@ def catalog() -> CatalogResponse: @app.post("/run/text", response_model=RunCreated) def run_text(payload: RunCreateText, bg: BackgroundTasks, mode: str = "exec", lang: str = "zh") -> RunCreated: - # Charter §23 mode+lang per-request + # Charter mode+lang per-request from runtime.tutor.i18n import set_lang from runtime.tutor.verbosity import set_mode diff --git a/runtime/backends/INDEX.md b/runtime/backends/INDEX.md index c6a9ca43..b7707ef2 100644 --- a/runtime/backends/INDEX.md +++ b/runtime/backends/INDEX.md @@ -27,7 +27,7 @@ class BaseExecutionEnv(abc.ABC): async def close(self) -> None ``` -## 经济模型(主宪章 §22 落地) +## 经济模型 - $5 VPS = local/docker 后端跑得起 - Serverless hibernate = modal/daytona 闲置零成本 diff --git a/runtime/backends/__init__.py b/runtime/backends/__init__.py index aad95b2e..03664a97 100644 --- a/runtime/backends/__init__.py +++ b/runtime/backends/__init__.py @@ -1,4 +1,4 @@ -"""7 execution backends · hermes §1.4. +"""7 execution backends · hermes. local / docker / ssh / singularity / modal / daytona / vercel_sandbox. Use `get_backend(name)` to obtain an adapter implementing BaseExecutionEnv. diff --git a/runtime/backends/base.py b/runtime/backends/base.py index c3837f3d..a843f35e 100644 --- a/runtime/backends/base.py +++ b/runtime/backends/base.py @@ -1,4 +1,4 @@ -"""BaseExecutionEnv abstract base (hermes §1.4). +"""BaseExecutionEnv abstract base (hermes ). All 7 backends implement this contract; new backend = new file + @register. """ diff --git a/runtime/backends/daytona.py b/runtime/backends/daytona.py index 9df8f4eb..b6807ee3 100644 --- a/runtime/backends/daytona.py +++ b/runtime/backends/daytona.py @@ -1,4 +1,4 @@ -"""Daytona dev sandbox backend (hermes §1.4 serverless hibernate).""" +"""Daytona dev sandbox backend (hermes serverless hibernate).""" from __future__ import annotations @@ -14,7 +14,7 @@ class DaytonaBackend(BaseExecutionEnv): """Wrap a Daytona workspace via its CLI (`daytona`). Requires the Daytona CLI on PATH + authenticated profile. - Hermes §1.4 经济模型: workspace hibernates when idle, wakes on demand. + Hermes 经济模型: workspace hibernates when idle, wakes on demand. """ def __init__(self, workspace: str, *, profile: str | None = None) -> None: diff --git a/runtime/backends/docker.py b/runtime/backends/docker.py index a9982b2c..285aa753 100644 --- a/runtime/backends/docker.py +++ b/runtime/backends/docker.py @@ -1,4 +1,4 @@ -"""Docker backend (hermes §1.4). Wraps `docker exec` for a named container.""" +"""Docker backend (hermes ). Wraps `docker exec` for a named container.""" from __future__ import annotations diff --git a/runtime/backends/local.py b/runtime/backends/local.py index b02ff6af..3f107200 100644 --- a/runtime/backends/local.py +++ b/runtime/backends/local.py @@ -1,4 +1,4 @@ -"""Local subprocess backend (hermes §1.4).""" +"""Local subprocess backend (hermes ).""" from __future__ import annotations diff --git a/runtime/backends/modal.py b/runtime/backends/modal.py index 674ef960..1506118d 100644 --- a/runtime/backends/modal.py +++ b/runtime/backends/modal.py @@ -1,4 +1,4 @@ -"""Modal serverless backend (hermes §1.4 经济模型 — hibernate when idle). +"""Modal serverless backend (hermes 经济模型 — hibernate when idle). Modal client SDK must be installed and authenticated: pip install modal @@ -20,7 +20,7 @@ class ModalBackend(BaseExecutionEnv): """Wrap a Modal Function/App; commands run inside a hibernated container. - Hermes §1.4 经济模型: ground-state nearly zero cost when idle. + Hermes 经济模型: ground-state nearly zero cost when idle. """ def __init__(self, app_name: str, *, image: str | None = None) -> None: diff --git a/runtime/backends/singularity.py b/runtime/backends/singularity.py index 15ac7338..acfa93cc 100644 --- a/runtime/backends/singularity.py +++ b/runtime/backends/singularity.py @@ -1,4 +1,4 @@ -"""Singularity / Apptainer backend (hermes §1.4; HPC-friendly).""" +"""Singularity / Apptainer backend (hermes ; HPC-friendly).""" from __future__ import annotations diff --git a/runtime/backends/ssh.py b/runtime/backends/ssh.py index e003b7bb..b9de6e93 100644 --- a/runtime/backends/ssh.py +++ b/runtime/backends/ssh.py @@ -1,4 +1,4 @@ -"""SSH backend (hermes §1.4). Uses asyncssh for persistent connection.""" +"""SSH backend (hermes ). Uses asyncssh for persistent connection.""" from __future__ import annotations diff --git a/runtime/backends/vercel_sandbox.py b/runtime/backends/vercel_sandbox.py index 06dafff9..32e4612c 100644 --- a/runtime/backends/vercel_sandbox.py +++ b/runtime/backends/vercel_sandbox.py @@ -1,4 +1,4 @@ -"""Vercel Sandbox backend (hermes §1.4 边缘运行).""" +"""Vercel Sandbox backend (hermes 边缘运行).""" from __future__ import annotations diff --git a/runtime/cli/config.py b/runtime/cli/config.py index 95969a0a..7acaa64d 100644 --- a/runtime/cli/config.py +++ b/runtime/cli/config.py @@ -1,11 +1,11 @@ -"""tagent config — LLM provider configuration (V1.22.0 · Step 2 multi-model onboarding). +"""tagent config — LLM provider configuration (Step 2 multi-model onboarding). 5 sub-commands: list — list 6 built-in + path-B compatible examples show — show current .env config (keys fully redacted) use — path A: switch to built-in provider, write TAGENT_LLM_PROVIDER + vendor key placeholder use-compat — path B: OpenAI-compatible fallback channel (any vendor, plug-and-play) - unset — remove specified key from .env (V1.25.0) + unset — remove specified key from .env env file priority: CWD/.env -> repo-root/.env. Always backup to .env.bak before writing. """ diff --git a/runtime/config/safety.py b/runtime/config/safety.py index ca7d4510..6122371a 100644 --- a/runtime/config/safety.py +++ b/runtime/config/safety.py @@ -1,4 +1,4 @@ -"""Safe-by-default destructive guard · gbrain §1.9 派生. +"""Safe-by-default destructive guard · gbrain 派生. 危险/自动化/生产影响 操作必须 tagent.yml 显式开启. """ @@ -74,7 +74,7 @@ def get_setting(key_path: str, default: Any = None) -> Any: return _resolve(key_path.split("."), default=default) -# Common gates (charter §24) +# Common gates (charter ) def gate_scheduler_tick() -> None: assert_allowed("scheduler.tick", "scheduler.enabled") diff --git a/runtime/docker-compose.app.yml b/runtime/docker-compose.app.yml index 84407234..5d09a1b3 100644 --- a/runtime/docker-compose.app.yml +++ b/runtime/docker-compose.app.yml @@ -2,7 +2,7 @@ # Usage: docker compose -f docker-compose.yml -f docker-compose.app.yml up services: app: - build: . + build:. container_name: tagent-app depends_on: postgres: diff --git a/runtime/essence_watcher/INDEX.md b/runtime/essence_watcher/INDEX.md index 4a79423e..38143aad 100644 --- a/runtime/essence_watcher/INDEX.md +++ b/runtime/essence_watcher/INDEX.md @@ -1,7 +1,7 @@ # essence_watcher 索引 > 自动追踪 upstream reference 引用的所有 upstream repo 更新。 -> 主宪章 §29 教学层加固之 essence 自动汲取。 +> 教学层加固之 essence 自动汲取。 ## 文件清单 @@ -26,7 +26,7 @@ d. 标 confidence: llm-draft-unreviewed 5. 应用 policy.yaml: - skill-related delta → 提议入 skills/ - - rule-related delta → 提议入主宪章 § 待审 + - rule-related delta → 提议入待审 - 其他 → 仅入 upstream 不动 Test-Agent ``` @@ -47,9 +47,9 @@ essence_watcher: # 哪些 delta 自动提议入 Test-Agent auto_propose: - skill_definitions # 新 skill 名字 / 描述 / 元数据 → 提议 skills/ - - charter_rules # 主宪章规则更新 → 提议 主宪章 - - safety_patterns # 防护模式 → 提议 §24 safe-by-default - - test_methodology # 测试方法论新增 → 提议 §17/§21 + - charter_rules # 规则更新 → 提议 + - safety_patterns # 防护模式 → 提议 safe-by-default + - test_methodology # 测试方法论新增 → 提议 / # 仅入精髓库,不动 Test-Agent essence_only: diff --git a/runtime/essence_watcher/__init__.py b/runtime/essence_watcher/__init__.py index 6f4f84cf..69d5e8a3 100644 --- a/runtime/essence_watcher/__init__.py +++ b/runtime/essence_watcher/__init__.py @@ -4,11 +4,11 @@ 1. 解析 upstream INDEX 提取 repo url 2. gh API 查最新 commit hash + 与上次记录 diff 3. 若有新 commit → 拉 README + 关键 files - 4. LLM 萃取 delta(用 aux_client,主宪章 §22) + 4. LLM 萃取 delta 5. 写 upstream update 文件 标 confidence: llm-draft-unreviewed 6. 应用 policy 决定是否提议入 Test-Agent -接入 scheduler(主宪章 §22 §24 safe-by-default): +接入 scheduler: - tagent.yml essence_watcher.enabled: true 才允许跑 - 默认每周一次 """ diff --git a/runtime/essence_watcher/apply_policy.example.yaml b/runtime/essence_watcher/apply_policy.example.yaml index 3a577e89..1b0cc63c 100644 --- a/runtime/essence_watcher/apply_policy.example.yaml +++ b/runtime/essence_watcher/apply_policy.example.yaml @@ -1,4 +1,4 @@ -# essence_watcher 选择性应用 policy(主宪章 §29) +# essence_watcher 选择性应用 policy # # 实际部署:cp 到 <essence-repo>/_apply_policy.yaml 启用 # 默认所有 delta 仅入精髓库,不动 Test-Agent. @@ -6,7 +6,7 @@ # 自动提议入 Test-Agent(待审) auto_propose: - skill_definitions # 新 skill name/description/metadata - - charter_rules # 主宪章规则更新建议 + - charter_rules # 规则更新建议 - safety_patterns # 防护模式 - test_methodology # 测试方法论新增 - tool_integrations # 工具集成模式 diff --git a/runtime/essence_watcher/runner.py b/runtime/essence_watcher/runner.py index 95b49659..0bee777b 100644 --- a/runtime/essence_watcher/runner.py +++ b/runtime/essence_watcher/runner.py @@ -1,7 +1,7 @@ """Essence watcher main runner. CLI: python -m runtime.essence_watcher.runner -Cron: 接入 runtime/scheduler 由 cron 触发(主宪章 §22) +Cron: 接入 runtime/scheduler 由 cron 触发 """ from __future__ import annotations @@ -19,7 +19,7 @@ def run() -> dict: """Main entry. Returns summary of changes detected + reports written.""" - # Safe-by-default gate (charter §24) + # Safe-by-default gate (charter ) if not is_allowed("essence_watcher.enabled"): raise SafeByDefaultBlocked(op="essence_watcher.run", key_path="essence_watcher.enabled") diff --git a/runtime/exporters/INDEX.md b/runtime/exporters/INDEX.md index 9682fb01..9f1ac17c 100644 --- a/runtime/exporters/INDEX.md +++ b/runtime/exporters/INDEX.md @@ -1,4 +1,4 @@ -# runtime/exporters 索引(V1.9.0) +# runtime/exporters 索引 > 用例多格式导出 · 用户自选 · 默认 Excel(已有)+ 3 新格式. @@ -38,7 +38,7 @@ class TestCaseTree: `testcase-designer` 专家 / `/testcase-design` skill 产此 IR,再 dispatch 到具体 exporter. -## CLI(V1.9 加) +## CLI ```bash tagent export <plan.json> --format xmind --out workspace/测试用例/login.xmind @@ -56,7 +56,7 @@ tagent export <plan.json> --format all --out-dir workspace/测试用例/ - `plantuml`(文本驱动 mindmap) - `mermaid-mindmap`(Markdown 嵌入,GitHub 渲染) -## 与主宪章关系 +## 与关系 -- §5 多格式 I/O(扩输出端 3 种思维导图格式) -- §27 简洁优先:**只加用户用得到的 3 格式**(P0+P1),P2 留位 +- 多格式 I/O(扩输出端 3 种思维导图格式) +- 简洁优先:**只加用户用得到的 3 格式**(P0+P1),P2 留位 diff --git a/runtime/exporters/__init__.py b/runtime/exporters/__init__.py index 8a29e679..cd977d4b 100644 --- a/runtime/exporters/__init__.py +++ b/runtime/exporters/__init__.py @@ -1,4 +1,4 @@ -"""Multi-format test-case exporters · V1.9.0. +"""Multi-format test-case exporters · By default: Excel 4 Sheet(`utils/excel_generator.py`,已有). New formats: xmind / markmap / opml / freemind / plantuml(按用户选). diff --git a/runtime/gateway/__init__.py b/runtime/gateway/__init__.py index 707ec638..81a66cdb 100644 --- a/runtime/gateway/__init__.py +++ b/runtime/gateway/__init__.py @@ -1,4 +1,4 @@ -"""Multi-platform messaging gateway · hermes §1.5. +"""Multi-platform messaging gateway · hermes. Single gateway process serves N platforms. Cross-platform conversation continuity. """ diff --git a/runtime/gateway/base.py b/runtime/gateway/base.py index 4f77b5d6..388120e8 100644 --- a/runtime/gateway/base.py +++ b/runtime/gateway/base.py @@ -1,4 +1,4 @@ -"""Platform abstraction · hermes §1.5.""" +"""Platform abstraction · hermes .""" from __future__ import annotations diff --git a/runtime/gateway/session.py b/runtime/gateway/session.py index 27756d7d..5a4becf4 100644 --- a/runtime/gateway/session.py +++ b/runtime/gateway/session.py @@ -1,4 +1,4 @@ -"""Cross-platform session continuity (hermes §1.5). +"""Cross-platform session continuity (hermes ). Stores conversation handles keyed by (user, app_session); each platform may attach its native chat_id so a user moving Telegram → Slack still finds the same context. diff --git a/runtime/healthcheck/INDEX.md b/runtime/healthcheck/INDEX.md index 15f3b8a5..6742b9d5 100644 --- a/runtime/healthcheck/INDEX.md +++ b/runtime/healthcheck/INDEX.md @@ -1,6 +1,6 @@ -# runtime/healthcheck/ 索引(V1.10.0) +# runtime/healthcheck/ 索引 -> 4 层自检 · 主宪章 §33 自检铁律 · pre-tag 卡 release。 +> 4 层自检 ## 4 层结构 @@ -42,6 +42,6 @@ ## 相关 -- 主宪章 §33(自检铁律,V1.10.0 加) +- (自检规则 - `.pre-commit-config.yaml` 中 `forbid-private-source` / `forbid-essence-library` / `file-count-check` 协同 - 日志归档:`discussions/selftest_<version>_<timestamp>.log` diff --git a/runtime/healthcheck/__init__.py b/runtime/healthcheck/__init__.py index 974a473e..5a243f80 100644 --- a/runtime/healthcheck/__init__.py +++ b/runtime/healthcheck/__init__.py @@ -1,4 +1,4 @@ -"""Healthcheck · L1 frontmatter lint + L3 LLM smoke(V1.10.0 · 主宪章 §33). +"""Healthcheck · L1 frontmatter lint + L3 LLM smoke. L1(本模块,无 LLM):agent/skill frontmatter 必填字段 + 注册表存在性 L2(CI mock):workflow `ci.yml` selftest job diff --git a/runtime/healthcheck/llm_smoke.py b/runtime/healthcheck/llm_smoke.py index 2b05b09f..ebc8ae74 100644 --- a/runtime/healthcheck/llm_smoke.py +++ b/runtime/healthcheck/llm_smoke.py @@ -1,7 +1,7 @@ """L3 lightweight LLM smoke · 单次最小往返,验真-LLM 通 + 报告延迟/token/估算成本. `tagent doctor --llm-smoke` 5 秒验证,远轻于 --probe (16 agent 全跑 ~$0.3-0.8). -用 aux 通道 provider 隔离主 prompt cache (hermes §1.3 借鉴). +用 aux 通道 provider 隔离主 prompt cache (hermes 借鉴). """ from __future__ import annotations diff --git a/runtime/init/INDEX.md b/runtime/init/INDEX.md index 985cbf99..c3c7820c 100644 --- a/runtime/init/INDEX.md +++ b/runtime/init/INDEX.md @@ -1,6 +1,6 @@ -# runtime/init/ 索引(V1.12.0) +# runtime/init/ 索引 -> `tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 主宪章 §1 一键部署。 +> `tagent init` 配置自动组装 · 5 分钟从 0 到可跑 ## 模块 @@ -40,8 +40,8 @@ tagent init --overwrite 不改 wizard / renderer 代码,改 matrix.yaml 即可: - 新 LLM provider → `llm_providers:` 加节 -- 新 BugTracker → `bug_trackers:` 加节(主宪章 §37 6 adapter 之外加) -- 新通知渠道 → `notifiers:` 加节(主宪章 §36 6 渠道之外加) +- 新 BugTracker → `bug_trackers:` 加节 +- 新通知渠道 → `notifiers:` 加节 - 新测试类型 → `test_types:` 加节 + 同步 `agents/` 加平台 expert(如需) ## 矩阵规模 @@ -50,6 +50,6 @@ tagent init --overwrite ## 相关 -- 主宪章 §1(同步铁律)+ §5(多格式 I/O)+ §7(一键部署)+ §36(多端)+ §37(BugTracker) +- (同步规则)+ (多格式 I/O)+ (一键部署)+ (多端)+ (BugTracker) - 模板:[`config/templates/`](../../deploy/config/templates/INDEX.md) - 集成 CLI:`runtime/cli/main.py` `init` 子命令 diff --git a/runtime/init/__init__.py b/runtime/init/__init__.py index f613d5e8..f96a4a5d 100644 --- a/runtime/init/__init__.py +++ b/runtime/init/__init__.py @@ -1,4 +1,4 @@ -"""tagent init · 配置自动组装(V1.12.0). +"""tagent init · 配置自动组装. 读 `config/templates/matrix.yaml` 矩阵 + base.*.tpl 模板,产 `.env` + `tagent.yml` + `STARTUP.md`。 矩阵 8 测试类型 × 6 平台 × 5 LLM × 6 BugTracker × 6 通知 = 8640 组合,wizard 自动列出。 diff --git a/runtime/init/wizard.py b/runtime/init/wizard.py index 1f1ea485..230a07f9 100644 --- a/runtime/init/wizard.py +++ b/runtime/init/wizard.py @@ -87,13 +87,13 @@ def run_wizard(matrix: Matrix | None = None) -> InitAnswers: ) bug_tracker = _pick_one( - "4) BugTracker?(主宪章 §37,6 选,默认禅道)", + "4) BugTracker?", [(k, v.label) for k, v in m.bug_trackers.items()], default_key="zentao", ) notifiers = _pick_many( - "5) 通知渠道?(主宪章 §36,可多选,逗号分隔)", + "5) 通知渠道?", [(k, v.label) for k, v in m.notifiers.items()], default_keys=["wechat"], ) diff --git a/runtime/learning_loop/INDEX.md b/runtime/learning_loop/INDEX.md index cf2a5808..7b6301e7 100644 --- a/runtime/learning_loop/INDEX.md +++ b/runtime/learning_loop/INDEX.md @@ -1,6 +1,6 @@ # learning_loop 索引 -> 主宪章 §14 darwin-skill 是 skill 文本本身的棘轮;本模块是**外层协调器**: +> darwin-skill 是 skill 文本本身的棘轮;本模块是**外层协调器**: > session 检索 + 用户画像 + curator 触发 + skill 自创建提示。 ## 不变量(与 hermes 同源) diff --git a/runtime/learning_loop/__init__.py b/runtime/learning_loop/__init__.py index e96f1419..6c4126e5 100644 --- a/runtime/learning_loop/__init__.py +++ b/runtime/learning_loop/__init__.py @@ -1,5 +1,5 @@ -"""Closed learning loop · hermes §1.1 inspired. +"""Closed learning loop · hermes inspired. -Coordinates with darwin-skill (charter §14) as the inner棘轮; +Coordinates with darwin-skill (charter ) as the inner棘轮; this module is the outer协调器 (curator + session search + user model). """ diff --git a/runtime/learning_loop/curator.py b/runtime/learning_loop/curator.py index b90c96f1..e1c28944 100644 --- a/runtime/learning_loop/curator.py +++ b/runtime/learning_loop/curator.py @@ -1,11 +1,11 @@ -"""Curator coordinator · hermes §1.1. +"""Curator coordinator · hermes. Background skill maintenance: - inactivity-triggered (no daemon) - only touches agent-created skills - never auto-deletes — archives only - pinned skills bypass - - uses aux client (charter §22; runtime/subagent/aux_client) + - uses aux client (charter ; runtime/subagent/aux_client) """ from __future__ import annotations diff --git a/runtime/learning_loop/session_search.py b/runtime/learning_loop/session_search.py index 62226669..670f7904 100644 --- a/runtime/learning_loop/session_search.py +++ b/runtime/learning_loop/session_search.py @@ -1,4 +1,4 @@ -"""FTS5 session search · hermes §1.1. +"""FTS5 session search · hermes. SQLite FTS5 over historical sessions. LLM summary attached at retrieval time. """ diff --git a/runtime/learning_loop/user_model.py b/runtime/learning_loop/user_model.py index 819de763..a46ad51c 100644 --- a/runtime/learning_loop/user_model.py +++ b/runtime/learning_loop/user_model.py @@ -1,4 +1,4 @@ -"""Dialectic user modeling · hermes §1.1 (inspired by Honcho). +"""Dialectic user modeling · hermes (inspired by Honcho). Cross-session profile of user preferences / vocabulary / working style. Stored as JSON facts under `workspace/learning/user_models/{user_id}.json`. diff --git a/runtime/marketplace/INDEX.md b/runtime/marketplace/INDEX.md index d4c8cec6..0ece0f82 100644 --- a/runtime/marketplace/INDEX.md +++ b/runtime/marketplace/INDEX.md @@ -34,9 +34,9 @@ 任一关失败 → 全部回滚 + decisions/ 落原因。 -## 与主宪章融合 +## 规则 -- §22 决策不可逆禁止:卸载只归档(`marketplace/.archive/`) -- §24 safe-by-default:`tagent.yml marketplace.enabled` 默认 false -- §27 Karpathy 原则 3 Surgical:卸载只动安装时建的文件,不动相邻 -- §29 Essence watcher:可关联 marketplace 远程 registry 自动同步 +- 分类卸载:按类型归档(`marketplace/.archive/`) +- safe-by-default:`tagent.yml marketplace.enabled` 默认 false +- Karpathy 原则 3 Surgical:卸载只动安装时建的文件,不动相邻 +- Essence watcher:可关联 marketplace 远程 registry 自动同步 diff --git a/runtime/marketplace/__init__.py b/runtime/marketplace/__init__.py index f83f8fcf..3e0bc442 100644 --- a/runtime/marketplace/__init__.py +++ b/runtime/marketplace/__init__.py @@ -1,4 +1,4 @@ -"""Marketplace · 主宪章 §30. +"""Marketplace 4 lane(skills/agents/mcp/hooks)+ 4 关安全门(sig/scan/sandbox/darwin). Safe-by-default:`tagent.yml marketplace.enabled: true` 才允许跑. diff --git a/runtime/marketplace/catalog.py b/runtime/marketplace/catalog.py index ab87c0c4..2048cb9f 100644 --- a/runtime/marketplace/catalog.py +++ b/runtime/marketplace/catalog.py @@ -1,4 +1,4 @@ -"""Marketplace local + remote catalog · §30. +"""Marketplace local + remote catalog ·. 读 marketplace/registry.json,可选拉远程 mirror,合并查询. """ diff --git a/runtime/marketplace/installer.py b/runtime/marketplace/installer.py index dfafc003..6bfa6456 100644 --- a/runtime/marketplace/installer.py +++ b/runtime/marketplace/installer.py @@ -1,7 +1,7 @@ -"""Install / Uninstall / Archive · §30. +"""Install / Uninstall / Archive ·. 安装流程:catalog 查 → verifier 跑 4 关 → 落地到 marketplace/{lane}/{name}/ -卸载只归档不删(§22) +卸载只归档不删() """ from __future__ import annotations @@ -79,7 +79,7 @@ def install(entry: Entry, content_path: Path) -> dict: def uninstall(name: str) -> dict: - """Uninstall by archiving (§22 不可逆禁止).""" + """Uninstall by archiving.""" if not is_allowed("marketplace.enabled"): raise SafeByDefaultBlocked(op="marketplace.uninstall", key_path="marketplace.enabled") diff --git a/runtime/marketplace/verifier.py b/runtime/marketplace/verifier.py index eada6cb3..edcf75fd 100644 --- a/runtime/marketplace/verifier.py +++ b/runtime/marketplace/verifier.py @@ -1,4 +1,4 @@ -"""4 关安全门 · 主宪章 §30. +"""4 关安全门 关 1: signature_check (sha256 + ed25519 可选) 关 2: injection_scan (prompt 注入扫,复用 scheduler 模块) diff --git a/runtime/mcp/INDEX.md b/runtime/mcp/INDEX.md index b53ed339..ab6dcd74 100644 --- a/runtime/mcp/INDEX.md +++ b/runtime/mcp/INDEX.md @@ -1,6 +1,6 @@ # mcp 索引 -> 主宪章 §16 预留 6 件套,V1.2.0(M2)实现。 +> 预留 6 件套 > 当前 `config/.mcp.json` 仅启用 filesystem;本目录服务通过 `config/.mcp.json` 启用。 ## 模块清单 @@ -40,8 +40,8 @@ python -m runtime.mcp.test_orchestrator.server --http 8801 # http mode `base.py` 提供: - `make_server(name, version)`:统一 Server 实例化 -- `tool_decision_logged(name)`:工具装饰器,自动落 `decisions/{date}_mcp_{tool}.json`(宪章 §18-12) -- `with_run_id(handler)`:run_id 全链路注入(§21 横切可复现性) +- `tool_decision_logged(name)`:工具装饰器,自动落 `decisions/{date}_mcp_{tool}.json`(宪章 ) +- `with_run_id(handler)`:run_id 全链路注入(横切可复现性) ## MCP 客户端 (P2 #12) diff --git a/runtime/mcp/__init__.py b/runtime/mcp/__init__.py index 727f62e7..76a7c0c1 100644 --- a/runtime/mcp/__init__.py +++ b/runtime/mcp/__init__.py @@ -1,6 +1,6 @@ """MCP servers for Test-Agent. -6 servers per project charter §16: +6 servers per project charter : - test-orchestrator: 主调度,包装 runtime/orchestrator - protocol-adapter: HTTP/gRPC/WS/MQTT/Kafka 协议层 - evidence-vault: 证据/录屏/日志(MinIO + Postgres) @@ -9,7 +9,7 @@ - compliance-checker: 行业合规规则库(SOC2/PCI/HIPAA/IEC 62304 等) All servers respect: - - 主宪章 §9: 已有不动 → 仅包装,不修改 16 专家/32 skill/67 脚本 - - 主宪章 §21 横切: 失败可复现(seed+snapshot+录屏),不入回归库否 - - 主宪章 §18-12: 决策可追溯 → 工具调用落 decisions/ + - 已有不动 → 仅包装,不修改 16 专家/32 skill/67 脚本 + - 横切: 失败可复现(seed+snapshot+录屏),不入回归库否 + - 12: 决策可追溯 → 工具调用落 decisions/ """ diff --git a/runtime/mcp/base.py b/runtime/mcp/base.py index 28c22e82..de34a447 100644 --- a/runtime/mcp/base.py +++ b/runtime/mcp/base.py @@ -1,9 +1,9 @@ """Shared MCP server scaffolding. Honors charter: - - §18-12 决策可追溯:工具调用自动落 `decisions/{date}_mcp_{tool}_{run_id}.json` - - §21 横切可复现性:run_id 注入 + seed 记录 + 失败 snapshot - - §1 同步铁律:服务列表必须与 `config/.mcp.json` 一致 + - 决策可追溯:工具调用自动落 `decisions/{date}_mcp_{tool}_{run_id}.json` + - 横切可复现性:run_id 注入 + seed 记录 + 失败 snapshot + - 同步规则:服务列表必须与 `config/.mcp.json` 一致 """ from __future__ import annotations @@ -46,7 +46,7 @@ def _decisions_dir() -> Path: def log_decision(tool: str, payload: dict, run_id: str | None = None) -> Path: - """Persist a decision record per charter §18-12. + """Persist a decision record per charter. Returns the written file path. """ @@ -68,7 +68,7 @@ def log_decision(tool: str, payload: dict, run_id: str | None = None) -> Path: def tool_decision_logged(tool_name: str) -> Callable: """Wrap an async MCP tool handler with decision logging. - Charter §18-12 决策可追溯: every call (success or failure) writes a record. + Charter 决策可追溯: every call (success or failure) writes a record. Logging failures must not mask the original handler exception/result. """ diff --git a/runtime/mcp/compliance_checker/__init__.py b/runtime/mcp/compliance_checker/__init__.py index 7d0c7940..0e951419 100644 --- a/runtime/mcp/compliance_checker/__init__.py +++ b/runtime/mcp/compliance_checker/__init__.py @@ -1,5 +1,5 @@ """mcp-compliance-checker: 行业合规规则库 (空载 + 规则插槽). -V1.2.0 仅起步骨架,真规则库由领域专家供。 -L4 级被测项触发 (charter §21 横切准则: 合规审计周期). +仅起步骨架,真规则库由领域专家供。 +L4 级被测项触发 (charter 横切准则: 合规审计周期). """ diff --git a/runtime/mcp/compliance_checker/server.py b/runtime/mcp/compliance_checker/server.py index e7722f33..0b4add66 100644 --- a/runtime/mcp/compliance_checker/server.py +++ b/runtime/mcp/compliance_checker/server.py @@ -33,7 +33,7 @@ async def tool_list_profiles() -> dict: def _load_profile(name: str) -> dict | None: - # Charter §4 security: 防 path traversal — 仅允许字母数字+连字符,且 resolve 后必须落在 profiles_dir 下 + # Charter security: 防 path traversal — 仅允许字母数字+连字符,且 resolve 后必须落在 profiles_dir 下 import re if not re.fullmatch(r"[A-Za-z0-9_\-\.]+", name) or ".." in name: @@ -135,7 +135,7 @@ def build_server(): ), Tool( name="check_compliance", - description="Match a profile's evidence_required against a run's actual evidence. L4 被测项必触发(charter §21).", + description="Match a profile's evidence_required against a run's actual evidence. L4 被测项必触发(charter ).", inputSchema={ "type": "object", "properties": { diff --git a/runtime/mcp/defect_tracker/__init__.py b/runtime/mcp/defect_tracker/__init__.py index ede82576..87127626 100644 --- a/runtime/mcp/defect_tracker/__init__.py +++ b/runtime/mcp/defect_tracker/__init__.py @@ -1,4 +1,4 @@ """mcp-defect-tracker: 工单桥 (5 adapter zentao/jira/github/linear/webhook). -Per charter §12: 接入新 adapter 必须实现 BugTrackerBase 契约。 +Per charter : 接入新 adapter 必须实现 BugTrackerBase 契约。 """ diff --git a/runtime/mcp/defect_tracker/base.py b/runtime/mcp/defect_tracker/base.py index 4a84a93c..555d2343 100644 --- a/runtime/mcp/defect_tracker/base.py +++ b/runtime/mcp/defect_tracker/base.py @@ -1,6 +1,6 @@ """BugTrackerBase contract (mirrors utils/bug_manager.py 5 adapter). -Charter §12: 实现该 5 方法才能注册。 +Charter : 实现该 5 方法才能注册。 severity 映射统一权威 (`utils/bug_severity_map.py`): 1=P0 / 2=P1 / 3=P2 / 4=P3. """ diff --git a/runtime/mcp/defect_tracker/server.py b/runtime/mcp/defect_tracker/server.py index 38330da6..ec1797a8 100644 --- a/runtime/mcp/defect_tracker/server.py +++ b/runtime/mcp/defect_tracker/server.py @@ -3,7 +3,7 @@ Tools wrap the 5 BugTrackerBase methods; delegate to existing `utils/bug_manager.py` if available, else fall back to flywheel `defects` table. -Per charter §12 + §18-4: severity 1=P0 / 2=P1 / 3=P2 / 4=P3 一致。 +Per charter + : severity 1=P0 / 2=P1 / 3=P2 / 4=P3 一致。 """ from __future__ import annotations @@ -47,7 +47,7 @@ def _flywheel_get(bug_id: str) -> dict | None: d = s.get(Defect, bid) if d is None: return None - # Charter §18-4: 1=P0 / 2=P1 / 3=P2 / 4=P3 (one-based; enum value "P0".."P3" is zero-based string) + # Charter : 1=P0 / 2=P1 / 3=P2 / 4=P3 (one-based; enum value "P0".."P3" is zero-based string) sev_int = int(d.severity.value[1]) + 1 if d.severity.value.startswith("P") else 0 return { "bug_id": str(d.id), @@ -164,7 +164,7 @@ def build_server(): TOOLS = [ Tool( name="create_bug", - description="Create a defect. severity 1=P0 / 2=P1 / 3=P2 / 4=P3 (charter §18-4).", + description="Create a defect. severity 1=P0 / 2=P1 / 3=P2 / 4=P3 (charter ).", inputSchema={ "type": "object", "properties": { diff --git a/runtime/mcp/evidence_vault/server.py b/runtime/mcp/evidence_vault/server.py index 8cbba050..e048363c 100644 --- a/runtime/mcp/evidence_vault/server.py +++ b/runtime/mcp/evidence_vault/server.py @@ -76,7 +76,7 @@ def _validate_evidence_path(path_str: str) -> Path: def _persist_evidence(run_id: str, kind: str, data: bytes, key: str) -> dict: """DB insert first, then MinIO upload; if upload fails, rollback DB row. - Charter §18 闭环约定: 防止 MinIO 与 Postgres 不一致 (orphaned file or dangling row). + Charter 闭环约定: 防止 MinIO 与 Postgres 不一致 (orphaned file or dangling row). """ import hashlib diff --git a/runtime/mcp/protocol_adapter/adapters.py b/runtime/mcp/protocol_adapter/adapters.py index ba7690a9..09885d01 100644 --- a/runtime/mcp/protocol_adapter/adapters.py +++ b/runtime/mcp/protocol_adapter/adapters.py @@ -146,7 +146,7 @@ async def close(self) -> None: class MQTTAdapter(ProtocolAdapter): """MQTT v3.1.1 via paho-mqtt sync client wrapped in asyncio threadpool. - Charter §21 横切准则: paho-mqtt's on_message callback runs on the network + Charter 横切准则: paho-mqtt's on_message callback runs on the network thread. We guard the shared buffer with a lock so async recv() and the callback don't race. """ diff --git a/runtime/mcp/protocol_adapter/base.py b/runtime/mcp/protocol_adapter/base.py index bcb375cd..b935cca9 100644 --- a/runtime/mcp/protocol_adapter/base.py +++ b/runtime/mcp/protocol_adapter/base.py @@ -1,7 +1,7 @@ """ProtocolAdapter abstract base. All concrete adapters must implement this contract to register. -Honors charter §21: +Honors charter : - 协议调用即测,不裸跑 - 失败必带 seed+snapshot(可复现性横切准则) """ diff --git a/runtime/mcp/test_orchestrator/server.py b/runtime/mcp/test_orchestrator/server.py index 34cd68e4..c6eb52e3 100644 --- a/runtime/mcp/test_orchestrator/server.py +++ b/runtime/mcp/test_orchestrator/server.py @@ -23,7 +23,7 @@ _kernel: Kernel | None = None # Bounded LRU cache for in-memory run results. -# Charter §21 横切预算: 防 server 长时跑无限增长. +# Charter 横切预算: 防 server 长时跑无限增长. # Production should rely on Postgres `runs` table; this is the fast path. _MAX_RUN_RESULTS = 1024 _run_results: OrderedDict[str, dict] = None # type: ignore[assignment] diff --git a/runtime/orchestrator/adapters/experts.py b/runtime/orchestrator/adapters/experts.py index b2d149cb..2617c02c 100644 --- a/runtime/orchestrator/adapters/experts.py +++ b/runtime/orchestrator/adapters/experts.py @@ -7,10 +7,10 @@ declarative description and execute its CANONICAL SCRIPT mapping (below). - A handful of experts have a strong default script. The rest fall back to recording the expert step + producing an empty result placeholder which the - report-generator then summarises (matching V1.0.0 manual workflow). + report-generator then summarises (matching manual workflow). - Scripts with required CLI args(e.g. generate_report.py --data)get default inputs auto-injected via SCRIPT_DEFAULT_ARGS;referenced fixtures auto-materialized - by _ensure_fixture (V1.11 修 V1.10 n7 selftest bug)。 + by _ensure_fixture 。 """ from __future__ import annotations @@ -41,9 +41,9 @@ "visual-tester": None, "system-tester": None, "ai-tester": "ai_validator.py", - "pentest-tester": None, # V1.19 production (V1.x rollout 收尾) - "automotive-tester": None, # V1.20 production (V1.x rollout 收尾) - # V1.34 bridge: standalone scripts wired into orchestrator + "pentest-tester": None, # production (rollout 收尾) + "automotive-tester": None, # production (rollout 收尾) + # bridge: standalone scripts wired into orchestrator "mutation-test": "mutation_runner.py", "chaos-test": "chaos_helper.py", "fuzz-test": "fuzzer.py", @@ -51,15 +51,15 @@ "suite-minimize": "suite_minimizer.py", } -# V1.14 防 mock 单源 (ROADMAP V1.15 Day 0 承诺): +# 防 mock 单源 (ROADMAP Day 0 承诺): # 实装状态读 registry catalog (agents/skills *.md frontmatter # EXPERT_IMPL_STATUS / SKILL_IMPL_STATUS),避免 hardcoded dict 与 .md 双源漂移。 # # 合法值 (registry._VALID_IMPL_STATUS 同步): # - production: 真 LLM-driven runner (orchestrator/agents/*.py) 已实装 # - script: 真 script-backed (utils/*.py) 已实装 -# - rollout: V1.x rollout 待实装 → execute_node 拒绝路由,不输出 mock -# - vision: V2.x 方法论参考 → 同 rollout 处理 +# - rollout: rollout 待实装 → execute_node 拒绝路由,不输出 mock +# - vision: 方法论参考 → 同 rollout 处理 # - unknown: frontmatter 缺失/非法值 → 同 rollout 处理 (fail closed) @@ -86,7 +86,7 @@ def _get_impl_status(name: str, kind: str) -> str: "visual-test": None, "system-test": None, "ai-test": "ai_validator.py", - # V1.34 bridge: standalone scripts wired into orchestrator + # bridge: standalone scripts wired into orchestrator "mutation-testing": "mutation_runner.py", "chaos-engineering": "chaos_helper.py", "api-fuzzing": "fuzzer.py", @@ -221,7 +221,7 @@ def reset_upstream_cache() -> None: def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: int = 1800) -> StepOutcome: inputs = inputs or {} - # V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): 拒绝路由未实装 expert/skill,不输出 mock 数据 + # 防 mock (ROADMAP Day 0 承诺): 拒绝路由未实装 expert/skill,不输出 mock 数据 # 单源 = agents/skills .md frontmatter (registry catalog) if kind in ("expert", "skill"): status = _get_impl_status(name, kind) @@ -233,7 +233,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i returncode=2, # 明确非 0,标记 "未实装" 而非 no-op 兜底 stdout="", stderr=( - f"[V1.x {status}] {kind} '{name}' 未实装 (ROADMAP.md);" + f"[{status}] {kind} '{name}' 未实装 (ROADMAP.md);" f" router/test-lead 应跳过此 {kind},不输出 mock 数据" ), duration_ms=0, @@ -252,7 +252,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i duration_ms=0, ) - # V1.14 真 agent runner 优先(主宪章 §40,5 核心 expert 落地) + # 真 agent runner 优先 if kind == "expert": try: from runtime.config.settings import get_settings @@ -297,7 +297,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i except Exception as e: # noqa: BLE001 logger.warning("agent runner {} unavailable, fallback to script map: {}", name, e) - # V1.21 真 skill runner 优先 (ROADMAP skill rollout 起点) + # 真 skill runner 优先 (ROADMAP skill rollout 起点) # 与 expert runner 接口同, 仅 registry 独立 SKILL_RUNNERS if kind == "skill": try: @@ -343,7 +343,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i except Exception as e: # noqa: BLE001 logger.warning("skill runner {} unavailable, fallback to script map: {}", name, e) - # Fallback: SCRIPT_MAP(主宪章 §9 已有实现保留) + # Fallback: SCRIPT_MAP script = _resolve_script(name, kind) if script is None: return StepOutcome( @@ -388,7 +388,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i for k, v in defaults.items(): if k not in inputs: # only materialize fixture for auto-injected defaults _ensure_fixture(str(v)) - # V1.14:`artifact_text` 给 AgentRunner 用,不当 CLI arg(多行文本会炸 argparse) + # :`artifact_text` 给 AgentRunner 用,不当 CLI arg(多行文本会炸 argparse) _CLI_EXCLUDE = {"artifact_text", "lang", "mode"} args = [f"--{k}={v}" for k, v in merged.items() if k not in _CLI_EXCLUDE] res: ScriptResult = run_script(script, args=args, timeout=timeout) diff --git a/runtime/orchestrator/adapters/scripts.py b/runtime/orchestrator/adapters/scripts.py index a5fb5e32..60798342 100644 --- a/runtime/orchestrator/adapters/scripts.py +++ b/runtime/orchestrator/adapters/scripts.py @@ -43,7 +43,7 @@ def run_script(script_filename: str, args: list[str] | None = None, *, timeout: scripts_dir: Path = s.resolve(s.scripts_dir) script_path = scripts_dir / script_filename if not script_path.is_file(): - # utils-reorg (V1.x): scripts moved into subdirs (utils/reporting/, utils/data/, ...). + # utils-reorg : scripts moved into subdirs (utils/reporting/, utils/data/, ...). # Recursively look up by basename when not at top-level. matches = [p for p in scripts_dir.rglob(script_filename) if p.is_file()] if len(matches) == 1: @@ -84,7 +84,7 @@ def _do_run() -> subprocess.CompletedProcess[str]: def list_available_scripts() -> list[str]: s = get_settings() scripts_dir: Path = s.resolve(s.scripts_dir) - # utils-reorg (V1.x): scripts in subdirs (utils/reporting/, utils/data/, ...). + # utils-reorg : scripts in subdirs (utils/reporting/, utils/data/, ...). # Return basenames so callers can run_script("excel_generator.py") regardless of subdir. seen: set[str] = set() for p in scripts_dir.rglob("*.py"): diff --git a/runtime/orchestrator/agents/INDEX.md b/runtime/orchestrator/agents/INDEX.md index 6c2caf49..03b0f51a 100644 --- a/runtime/orchestrator/agents/INDEX.md +++ b/runtime/orchestrator/agents/INDEX.md @@ -1,8 +1,8 @@ -# runtime/orchestrator/agents/ 索引(V1.36.0) +# runtime/orchestrator/agents/ 索引 -> 真 LLM-driven expert runner · 16 核心 expert 全落地 · 主宪章 §40 真 agent 落地 canon。 +> 真 LLM-driven expert runner · 16 核心 expert 全落地 -## 已实现 16 runner(V1.32) +## 已实现 16 runner | Runner | 角色源 | 上游 | 产物 | |--------|--------|------|------| @@ -12,7 +12,7 @@ | `bug-manager` | agents/08-Bug管理.md | test-executor | `bug_drafts.json`(BugTracker-ready) | | `test-lead` | agents/01-测试主管.md | 全链路 | `final_verdict_*.json`(上线决策) | -## 0 未实现(V1.32 rollout 完成) +## 0 未实现 - test-lead 自身已实现(用全链路上游),其他 11 个:env-manager / data-preparer(scripted)/ testcase-designer(scripted)/ report-generator(scripted)/ mobile-tester / desktop-tester(scripted)/ visual-tester / system-tester / ai-tester(scripted)/ 渗透 / 车载 - **5 个有 script 真跑**(testcase-designer / data-preparer / report-generator / desktop-tester / ai-tester)→ SCRIPT_MAP 兜 @@ -40,12 +40,12 @@ 3. 加 import 到 `__init__.py`(触发 @register) 4. 跑 `tagent selftest --e2e` 验编排 -## 主宪章 +## -- §33 自检铁律(L1+L2+L3+L4) -- §40 真 agent 落地 canon(V1.32 加) -- §9 已有实现不动 — 5 个 SCRIPT_MAP 兜底 expert 不动 -- §10 第 5 铭文:test-lead 决策 `requires_human_signoff: true` +- 自检规则(L1+L2+L3+L4) +- 真 agent 落地 canon +- 已有实现不动 — 5 个 SCRIPT_MAP 兜底 expert 不动 +- 第 5 铭文:test-lead 决策 `requires_human_signoff: true` ## 相关 diff --git a/runtime/orchestrator/agents/__init__.py b/runtime/orchestrator/agents/__init__.py index b1927b2a..3bbfe7cc 100644 --- a/runtime/orchestrator/agents/__init__.py +++ b/runtime/orchestrator/agents/__init__.py @@ -1,24 +1,24 @@ -"""Real LLM-driven agent runners(V1.36.0 · 主宪章 §33 + §40). +"""Real LLM-driven agent runners. 每个 runner 把 agents/*.md 的角色描述变成可执行的 LLM 调用: - 读上游产物 → 拼 prompt → 调 LLM → 解析输出 → 落产物 → 给下游 -11 核心 runner(V1.x rollout 收尾,所有 LLM-driven expert 已实装): -- requirements-analyst (V1.14) -- automation-engineer (V1.14) -- test-executor (V1.14) -- bug-manager (V1.14) -- test-lead (V1.14) -- env-manager (V1.15.0, ROADMAP rollout #1 落地 — minimum viable) -- mobile-tester (V1.16.0, ROADMAP rollout #2 落地 — minimum viable) -- visual-tester (V1.17.0, ROADMAP rollout #3 落地 — minimum viable) -- system-tester (V1.18.0, ROADMAP rollout #4 落地 — minimum viable) -- pentest-tester (V1.19.0, ROADMAP rollout #5 落地 — minimum viable; +11 核心 runner(rollout 收尾,所有 LLM-driven expert 已实装): +- requirements-analyst +- automation-engineer +- test-executor +- bug-manager +- test-lead +- env-manager +- mobile-tester +- visual-tester +- system-tester +- pentest-tester (ROADMAP rollout #5 落地 — minimum viable; 仅输出测试计划文本, 不调外部攻击工具;真执行守护已在 utils 层 env gate) -- automotive-tester (V1.20.0, ROADMAP rollout #6 落地 — minimum viable; - V1.x rollout 收尾;ASIL 评估 + HIL 测试 + ADAS 场景 + OTA + 合规矩阵) +- automotive-tester (ROADMAP rollout #6 落地 — minimum viable; + rollout 收尾;ASIL 评估 + HIL 测试 + ADAS 场景 + OTA + 合规矩阵) -剩余 5 个 expert 走 SCRIPT_MAP script-backed (主宪章 §9 已有实现: +剩余 5 个 expert 走 SCRIPT_MAP script-backed (已有实现: testcase-designer / data-preparer / report-generator / desktop-tester / ai-tester)。 """ diff --git a/runtime/orchestrator/agents/automotive_tester.py b/runtime/orchestrator/agents/automotive_tester.py index c6a6190a..ddbce565 100644 --- a/runtime/orchestrator/agents/automotive_tester.py +++ b/runtime/orchestrator/agents/automotive_tester.py @@ -1,11 +1,11 @@ """automotive-tester · LLM 读 PRD + CAN-bus/ISO-26262 上下文 → ASIL 评估 + HIL 测试用例 + ADAS 场景 + OTA 升级测试 + 协议特定配置. -V1.20.0 minimum viable (ROADMAP rollout #6 落地, V1.x rollout 收尾): +minimum viable (ROADMAP rollout #6 落地, rollout 收尾): - 仅生成 ASIL 评估 + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix 结构化 JSON - 不实装 16-车载测试.md 全部职责 (Vector CANoe 真跑 / HIL 台架真接 / VTD/CarMaker/CARLA - 仿真真跑 / OTA A/B 分区真切 / SocketCAN 真嗅探 等留 V1.x 深化) + 仿真真跑 / OTA A/B 分区真切 / SocketCAN 真嗅探 等留 深化) - 覆盖 ECU + ADAS 域控 + IVI + V2X 4 大子系统 - 覆盖 CAN / CAN-FD / LIN / FlexRay / Automotive Ethernet / DoIP/UDS / SOME-IP / V2X 8 协议 diff --git a/runtime/orchestrator/agents/base.py b/runtime/orchestrator/agents/base.py index 7d2a1d47..86b09772 100644 --- a/runtime/orchestrator/agents/base.py +++ b/runtime/orchestrator/agents/base.py @@ -80,7 +80,7 @@ def summary(self, output: dict[str, Any]) -> str: # noqa: ARG002 def run(self, ctx: RunnerContext) -> RunnerResult: """ 执行 LLM-driven agent。ok/degraded 语义: - - stub/mock 模式: ok=True + degraded=True (mock 兜底,主宪章 §33 selftest 允许) + - stub/mock 模式: ok=True + degraded=True (mock 兜底, selftest 允许) - 真 LLM 成功 + JSON 解析 OK: ok=True + degraded=False (真输出) - 真 LLM 成功但 JSON 解析错: ok=False + degraded=True (LLM 回了但不合规) - exec 模式 LLM 失败 fallback: ok=False + degraded=True (不再假绿) @@ -94,7 +94,7 @@ def run(self, ctx: RunnerContext) -> RunnerResult: if ctx.settings_provider == "stub" or ctx.mode == "mock": # stub/mock 模式: 输出 mock,标 degraded output = self.mock_output(ctx) - raw = "[stub] mock output(主宪章 §33 selftest 兜底)" + raw = "[stub] mock output (selftest 兜底)" ok = True degraded = True else: @@ -176,7 +176,7 @@ def get_runner(name: str) -> AgentRunner | None: return cls() if cls else None -# Skill runner registry (V1.21.0 — skill LLM-driven rollout 基础设施). +# Skill runner registry. # SkillRunner 接口与 AgentRunner 100% 一致 (system_prompt / user_prompt / # mock_output / summary / output_file / run), 仅 registry 独立, 避免 expert/skill # 同名冲突,且让 catalog / router / orchestrator 按 kind 路由清晰。 diff --git a/runtime/orchestrator/agents/bug_manager.py b/runtime/orchestrator/agents/bug_manager.py index 83e6c2f7..0acfe4f0 100644 --- a/runtime/orchestrator/agents/bug_manager.py +++ b/runtime/orchestrator/agents/bug_manager.py @@ -1,4 +1,4 @@ -"""bug-manager · 分类失败 → 产 BugTracker-ready Bug 列表(主宪章 §37).""" +"""bug-manager · 分类失败 → 产 BugTracker-ready Bug 列表.""" from __future__ import annotations @@ -13,7 +13,7 @@ class BugManager(AgentRunner): def system_prompt(self) -> str: return ( "你是 Test-Agent 项目内 bug-manager 专家(agents/08-Bug管理.md)。\n" - "职责:把 test-executor 的失败列表转 BugTracker-ready Bug(默认 zentao,可换 Jira/GitHub Issues 等,主宪章 §37)。\n" + "职责:把 test-executor 的失败列表转 BugTracker-ready Bug。\n" "原则:\n" "1) severity 权威映射:1=P0(阻塞)/ 2=P1(高)/ 3=P2(中)/ 4=P3(低)\n" "2) STAR 格式:Situation / Task / Action / Result\n" @@ -69,7 +69,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: "severity": 1, # P0 阻塞 — 不应作为发版决策依据 "pri": 1, "steps": [ - "检查上游 expert 实装状态 (ROADMAP.md V1.15-V1.20 rollout)", + "检查上游 expert 实装状态 (ROADMAP.md rollout)", "确认 LLM provider 不在 stub mode (settings.llm_provider)", "若 expert 处于 rollout,等待对应版本完成实装", ], diff --git a/runtime/orchestrator/agents/env_manager.py b/runtime/orchestrator/agents/env_manager.py index d8855b4c..9d970578 100644 --- a/runtime/orchestrator/agents/env_manager.py +++ b/runtime/orchestrator/agents/env_manager.py @@ -1,8 +1,8 @@ """env-manager · LLM 读 PRD + 上游需求摘要 → 环境检查清单 + 准备步骤. -V1.15.0 minimum viable (ROADMAP rollout #1 落地): +minimum viable (ROADMAP rollout #1 落地): - 仅生成 env checklist + prep steps 结构化 markdown/JSON -- 不实装 04-环境管理.md 全 5 节 (Docker / 异常退避 / 清理等留 V1.x 深化) +- 不实装 04-环境管理.md 全 5 节 (Docker / 异常退避 / 清理等留 深化) - 输出消费者: data-preparer / automation-engineer / test-executor """ diff --git a/runtime/orchestrator/agents/mobile_tester.py b/runtime/orchestrator/agents/mobile_tester.py index d257843b..20ec7f18 100644 --- a/runtime/orchestrator/agents/mobile_tester.py +++ b/runtime/orchestrator/agents/mobile_tester.py @@ -1,9 +1,9 @@ """mobile-tester · LLM 读 PRD + 上游摘要 → 移动测试用例 + ADB/Xcode 命令清单. -V1.16.0 minimum viable (ROADMAP rollout #2 落地): +minimum viable (ROADMAP rollout #2 落地): - 仅生成 mobile test cases + device commands + test_environment 结构化 JSON - 不实装 10-移动测试.md 全部职责 (Appium driver 真跑 / 云真机集成 / 弱网 / 权限弹窗 - 等留 V1.x 深化) + 等留 深化) - 覆盖 Android / iOS 原生 + 微信/支付宝/抖音 小程序 """ diff --git a/runtime/orchestrator/agents/pentest_tester.py b/runtime/orchestrator/agents/pentest_tester.py index f4f3ac1c..5d74258a 100644 --- a/runtime/orchestrator/agents/pentest_tester.py +++ b/runtime/orchestrator/agents/pentest_tester.py @@ -1,12 +1,12 @@ """pentest-tester · LLM 读 PRD + 安全上下文 → 渗透测试计划 + 工具调用清单. -V1.19.0 minimum viable (ROADMAP rollout #5 落地): +minimum viable (ROADMAP rollout #5 落地): - 仅生成 pentest plan (target_scope + recon/vuln/exploit phases + reporting 结构) 结构化 JSON - 不实装 15-渗透测试.md 全部职责 (sqlmap / Metasploit / Hydra 真跑 / Static-Dynamic - Correlation 实装 / PoC 沙箱执行 等留 V1.x 深化 — 真执行守护已在 utils 层 + Correlation 实装 / PoC 沙箱执行 等留 深化 — 真执行守护已在 utils 层 `api_security_scanner.py` / `ai_adversarial.py` 用 TAGENT_PENTEST_AUTHORIZED env gate) - 覆盖 5 攻击域 (Injection / XSS / SSRF / Auth / Authz) + API + 移动 + AI 应用安全 -- 主宪章 §24 safe-by-default + Shannon 哲学 (仅 working PoC 才入报告) +- safe-by-default + Shannon 哲学 (仅 working PoC 才入报告) 授权边界:本 runner 只输出**测试计划文本**,不调外部攻击工具,不执行不可逆操作。 法律责任 (CFAA / 网络安全法 / NIS2) 在操作者侧,见 SECURITY.md L84 "武器化代码使用边界"。 diff --git a/runtime/orchestrator/agents/system_tester.py b/runtime/orchestrator/agents/system_tester.py index 800b3caf..5474a238 100644 --- a/runtime/orchestrator/agents/system_tester.py +++ b/runtime/orchestrator/agents/system_tester.py @@ -1,10 +1,10 @@ """system-tester · LLM 读 PRD + IoT/串口/MQTT/MQ/Tracing 上下文 → 系统集成测试用例 + 设备命令清单 + 协议特定配置. -V1.18.0 minimum viable (ROADMAP rollout #4 落地): +minimum viable (ROADMAP rollout #4 落地): - 仅生成 test_cases + device_commands + protocol_specific 结构化 JSON - 不实装 13-系统集成测试.md 全部职责 (paramiko/pyserial/paho-mqtt 真跑 / FFmpeg - 解码 / Jaeger 查询执行 / Kafka consumer 真起 等留 V1.x 深化) + 解码 / Jaeger 查询执行 / Kafka consumer 真起 等留 深化) - 覆盖 IoT (SSH/串口/MQTT/Modbus) + 音视频 (FFmpeg) + 链路追踪 (Jaeger/OpenTelemetry) + 消息队列 (Kafka/RabbitMQ) + 跨服务集成 """ diff --git a/runtime/orchestrator/agents/test_executor.py b/runtime/orchestrator/agents/test_executor.py index 81d870f5..307a676c 100644 --- a/runtime/orchestrator/agents/test_executor.py +++ b/runtime/orchestrator/agents/test_executor.py @@ -17,7 +17,7 @@ def system_prompt(self) -> str: "原则:\n" "1) 四阶段执行:冒烟(P0) → 回归(P0+P1) → 全量 → 性能\n" "2) 失败 4 类:product_bug / test_code_bug / env_issue / flaky\n" - "3) Flaky 检测:连续 3 跑 2 过即标 flaky 隔离(主宪章 §21)\n" + "3) Flaky 检测:连续 3 跑 2 过即标 flaky 隔离\n" "4) 不真跑 sandbox,产**执行计划 JSON**(由 utils 真执行)\n" "输出严格 JSON,不 markdown 包裹。" ) diff --git a/runtime/orchestrator/agents/test_lead.py b/runtime/orchestrator/agents/test_lead.py index 190796b8..f99af013 100644 --- a/runtime/orchestrator/agents/test_lead.py +++ b/runtime/orchestrator/agents/test_lead.py @@ -17,8 +17,8 @@ def system_prompt(self) -> str: "原则:\n" "1) 看 requirements / scripts / execution_plan / bug_drafts 完整链路\n" "2) 决策标准:P0 Bug=0 + 回归通过率 ≥ 90% + 性能门禁过 = go;否则 conditional / no-go\n" - "3) 业务语言(主宪章 §10 五铭文 #5):管理层 / 开发都能秒懂\n" - "4) 标 skin-in-the-game:本决策**人类签字**,Agent 仅给建议(主宪章 §10 第 5 铭文)\n" + "3) 业务语言:管理层 / 开发都能秒懂\n" + "4) 标 skin-in-the-game:本决策**人类签字**,Agent 仅给建议\n" "5) 列出已知遗留 + 兜底方案\n" "输出严格 JSON,不 markdown 包裹。" ) @@ -34,14 +34,14 @@ def user_prompt(self, ctx: RunnerContext) -> str: if degraded_upstream: degraded_block = ( f"\n## ⚠ 上游 degraded 警示 (强制约束)\n" - f"以下上游 expert 输出降级 (mock 兜底 / LLM 失败 fallback / 未实装 V1.x rollout):\n" + f"以下上游 expert 输出降级 (mock 兜底 / LLM 失败 fallback / 未实装 rollout):\n" f"{degraded_upstream}\n\n" f"**强制要求**:\n" f"1. `verdict` **绝不能输出 'go'** — 因为本次测试数据不完整\n" f"2. `verdict` 应输出 `conditional`(部分数据可信) 或 `no-go`(P0 缺失维度过多)\n" f"3. `known_risks` **必须列出每个 degraded expert 名**及对应未覆盖维度\n" f"4. `rationale` 必须包含「测试数据不完整,基于 {len(degraded_upstream)} 个降级 expert 无法做发版决策」\n" - f"5. `fallback_plan` 必须包含「等 V1.x rollout 完成后重跑」\n" + f"5. `fallback_plan` 必须包含「等 rollout 完成后重跑」\n" ) return ( @@ -68,7 +68,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: bug = ctx.upstream.get("bug-manager", {}).get("summary", {}) p0 = bug.get("p0", 0) if isinstance(bug, dict) else 0 - # V1.14 防 mock 闭环: 检查上游是否有 degraded 信号 + # 防 mock 闭环: 检查上游是否有 degraded 信号 # (mock 兜底 / LLM 失败 fallback / JSON 解析错 / rollout expert 被路由) degraded_upstream = [ name for name, meta in ctx.upstream_meta.items() @@ -88,7 +88,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: ) rationale = ( f"防 mock 闭环触发: 上游 expert {degraded_upstream} 输出 degraded " - f"(mock 兜底 / LLM 失败 / 未实装 V1.x rollout)。" + f"(mock 兜底 / LLM 失败 / 未实装 rollout)。" f"不能基于不完整数据输出 GO,降级 conditional 等人审。" ) known_risks = [ diff --git a/runtime/orchestrator/agents/visual_tester.py b/runtime/orchestrator/agents/visual_tester.py index bd9b64b9..a6b7e0ef 100644 --- a/runtime/orchestrator/agents/visual_tester.py +++ b/runtime/orchestrator/agents/visual_tester.py @@ -1,9 +1,9 @@ """visual-tester · LLM 读 PRD + UI 描述 → 视觉测试点 + 视觉对比脚本片段. -V1.17.0 minimum viable (ROADMAP rollout #3 落地): +minimum viable (ROADMAP rollout #3 落地): - 仅生成 visual test points + comparison scripts + tolerance + baseline_strategy 结构化 JSON - 不实装 12-视觉游戏测试.md 全部职责 (Airtest 真跑 / OCR 调用 / SSIM 像素对比执行 - 等留 V1.x 深化) + 等留 深化) - 覆盖 Web Canvas/WebGL + 手游/PC 游戏 + OCR + 视觉回归 """ diff --git a/runtime/orchestrator/direct.py b/runtime/orchestrator/direct.py index 78f2c9e0..063b7d67 100644 --- a/runtime/orchestrator/direct.py +++ b/runtime/orchestrator/direct.py @@ -113,7 +113,7 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers: configure_logging() init_tracing() log = bind_run(run_id) - reset_upstream_cache() # V1.14 主宪章 §40 + reset_upstream_cache() # decision = RoutingDecision.model_validate(decision_dict) ordered: list[DAGNode] = decision.topological() log.info("direct flow start: run_id={} nodes={}", run_id, len(ordered)) @@ -237,7 +237,7 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers: # L2-C: rollout 节点 + on_failure=skip 节点 rollout_skipped = [ nid for nid, r in results.items() - if not r.get("ok") and "[V1.x rollout]" in (r.get("stderr_tail") or "") + if not r.get("ok") and "[rollout]" in (r.get("stderr_tail") or "") ] + skipped summary = { diff --git a/runtime/orchestrator/flows.py b/runtime/orchestrator/flows.py index d4d0963a..8878c0c6 100644 --- a/runtime/orchestrator/flows.py +++ b/runtime/orchestrator/flows.py @@ -39,7 +39,7 @@ def run_decision_flow(decision_dict: dict[str, Any], run_id: str, on_progress: A configure_logging() init_tracing() log = bind_run(run_id) - reset_upstream_cache() # V1.14 主宪章 §40 — 每 run 清 runner 间产物缓存 + reset_upstream_cache() # 每 run 清 runner 间产物缓存 decision = RoutingDecision.model_validate(decision_dict) ordered: list[DAGNode] = decision.topological() log.info("flow start: run_id={} nodes={}", run_id, len(ordered)) @@ -91,7 +91,7 @@ def run_decision_flow(decision_dict: dict[str, Any], run_id: str, on_progress: A # L2-C: 识别 rollout 节点 + on_failure=skip 节点 rollout_skipped = [ nid for nid, r in results.items() - if not r.get("ok") and "[V1.x rollout]" in (r.get("stderr_tail") or "") + if not r.get("ok") and "[rollout]" in (r.get("stderr_tail") or "") ] + skipped summary = { diff --git a/runtime/orchestrator/skills/__init__.py b/runtime/orchestrator/skills/__init__.py index 90b120b2..350899d6 100644 --- a/runtime/orchestrator/skills/__init__.py +++ b/runtime/orchestrator/skills/__init__.py @@ -1,10 +1,10 @@ -"""Real LLM-driven skill runners (V1.x · ALL 18/18 rollout + 2 ex-vision complete). +"""Real LLM-driven skill runners (· ALL 18/18 rollout + 2 ex-vision complete). 18 production runners across 4 domains: - General: mobile-test, visual-test, system-test, eval-harness - Pentest: pentest-coordinator, pentest-recon, pentest-vuln, pentest-exploit, pentest-api, pentest-web, pentest-report - Automotive: automotive-test, automotive-can-bus-test, automotive-adas-scenario, automotive-ota-update-test, automotive-hil-loop-test -- Meta (V1.x ex-vision): agent-introspection-debugging, build-your-own-x-explorer +- Meta (ex-vision): agent-introspection-debugging, build-your-own-x-explorer """ from runtime.orchestrator.agents.base import ( # noqa: F401 diff --git a/runtime/orchestrator/skills/agent_introspection_debugging.py b/runtime/orchestrator/skills/agent_introspection_debugging.py index 862abed9..a2e57e9e 100644 --- a/runtime/orchestrator/skills/agent_introspection_debugging.py +++ b/runtime/orchestrator/skills/agent_introspection_debugging.py @@ -1,4 +1,4 @@ -"""agent-introspection-debugging skill · 五维自省分析 (V1.x). +"""agent-introspection-debugging skill · 五维自省分析 . 职责: 对 agent 行为做五维自省 (决策回放/工具调用/token/上下文/状态机) → 结构化报告。 """ diff --git a/runtime/orchestrator/skills/automotive_adas_scenario.py b/runtime/orchestrator/skills/automotive_adas_scenario.py index f44cb901..a4595667 100644 --- a/runtime/orchestrator/skills/automotive_adas_scenario.py +++ b/runtime/orchestrator/skills/automotive_adas_scenario.py @@ -1,4 +1,4 @@ -"""automotive-adas-scenario · ADAS 场景库测试编排 (V1.31.0).""" +"""automotive-adas-scenario · ADAS 场景库测试编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_can_bus_test.py b/runtime/orchestrator/skills/automotive_can_bus_test.py index ce62fb97..221b5818 100644 --- a/runtime/orchestrator/skills/automotive_can_bus_test.py +++ b/runtime/orchestrator/skills/automotive_can_bus_test.py @@ -1,4 +1,4 @@ -"""automotive-can-bus-test · CAN/CAN-FD/LIN/FlexRay/SOME-IP 协议测试编排 (V1.31.0).""" +"""automotive-can-bus-test · CAN/CAN-FD/LIN/FlexRay/SOME-IP 协议测试编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_hil_loop_test.py b/runtime/orchestrator/skills/automotive_hil_loop_test.py index d54f2100..bd17ace6 100644 --- a/runtime/orchestrator/skills/automotive_hil_loop_test.py +++ b/runtime/orchestrator/skills/automotive_hil_loop_test.py @@ -1,4 +1,4 @@ -"""automotive-hil-loop-test · HIL/SIL/MIL/PIL 环路编排 (V1.31.0).""" +"""automotive-hil-loop-test · HIL/SIL/MIL/PIL 环路编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_ota_update_test.py b/runtime/orchestrator/skills/automotive_ota_update_test.py index 26f07a0a..716cc705 100644 --- a/runtime/orchestrator/skills/automotive_ota_update_test.py +++ b/runtime/orchestrator/skills/automotive_ota_update_test.py @@ -1,4 +1,4 @@ -"""automotive-ota-update-test · OTA 升级测试编排 (V1.31.0).""" +"""automotive-ota-update-test · OTA 升级测试编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_test.py b/runtime/orchestrator/skills/automotive_test.py index d6ee2841..0dc01a2f 100644 --- a/runtime/orchestrator/skills/automotive_test.py +++ b/runtime/orchestrator/skills/automotive_test.py @@ -1,4 +1,4 @@ -"""automotive-test skill · 整车测试主编排 (V1.31.0 batch). +"""automotive-test skill · 整车测试主编排. 10 阶段: HARA+ASIL → 静态 MISRA → 单元 MC/DC → SIL/PIL → HIL → CAN → ADAS → OTA → 合规 → 报告 """ diff --git a/runtime/orchestrator/skills/build_your_own_x_explorer.py b/runtime/orchestrator/skills/build_your_own_x_explorer.py index e7daaefb..4aa475ec 100644 --- a/runtime/orchestrator/skills/build_your_own_x_explorer.py +++ b/runtime/orchestrator/skills/build_your_own_x_explorer.py @@ -1,7 +1,7 @@ -"""build-your-own-x-explorer skill · 教学层 byox deep-dive 推荐 (V1.32.0). +"""build-your-own-x-explorer skill · 教学层 byox deep-dive 推荐. 职责: 据用户当前测试场景 + 时间预算, 从 13 类 byox KB 推 deep-dive 路径。 -铁律: 1) 必问时间预算 2) 不强推 3) 不复制全文。 +规则: 1) 必问时间预算 2) 不强推 3) 不复制全文。 """ from __future__ import annotations @@ -19,7 +19,7 @@ def system_prompt(self) -> str: "(database/network-stack/web-server/git/search-engine/shell/regex-engine/" "programming-language/web-browser/bot/...) KB 推 deep-dive 学习路径, " "每条带 estimated_hours + why。\n" - "铁律: 1) 必问时间预算 (无预算→拒推) 2) 不强推 (用户测试主线优先) 3) 不复制 tutorial 全文。\n" + "规则: 1) 必问时间预算 (无预算→拒推) 2) 不强推 (用户测试主线优先) 3) 不复制 tutorial 全文。\n" "输出严格 JSON。" ) diff --git a/runtime/orchestrator/skills/eval_harness.py b/runtime/orchestrator/skills/eval_harness.py index 8cade3d3..4ec67aec 100644 --- a/runtime/orchestrator/skills/eval_harness.py +++ b/runtime/orchestrator/skills/eval_harness.py @@ -1,6 +1,6 @@ """eval-harness skill · LLM 读上游 ai-tester 产物 → 4 维度 LLM/AI 评测编排. -V1.27.0 minimum viable (ROADMAP skill rollout #5 落地): +minimum viable (ROADMAP skill rollout #5 落地): - LLM 读 PRD + 上游 ai-tester expert 产物 → 5 阶段评测计划 (评测配置 / pass@k / 稳定性 / 延迟 / 报告归档) + 质量门禁 + 安全护栏 diff --git a/runtime/orchestrator/skills/mobile_test.py b/runtime/orchestrator/skills/mobile_test.py index 18ac7157..500a7450 100644 --- a/runtime/orchestrator/skills/mobile_test.py +++ b/runtime/orchestrator/skills/mobile_test.py @@ -1,6 +1,6 @@ """mobile-test skill · LLM 读上游 mobile-tester 产物 → 6 阶段移动端执行编排. -V1.23.0 minimum viable (ROADMAP skill rollout #2 落地): +minimum viable (ROADMAP skill rollout #2 落地): - LLM 读 PRD + 上游 mobile-tester expert 产物 → 6 阶段执行计划 (设备就绪 / Appium / 用例批次 / 性能采集 / Monkey / 报告归档) + 质量门禁 + 跨平台并行策略 diff --git a/runtime/orchestrator/skills/pentest_api.py b/runtime/orchestrator/skills/pentest_api.py index 1e3d85f5..ebf9b80b 100644 --- a/runtime/orchestrator/skills/pentest_api.py +++ b/runtime/orchestrator/skills/pentest_api.py @@ -1,6 +1,6 @@ """pentest-api skill · OWASP API Top 10 2023 编排. -V1.30.0 (pentest batch 2): +(pentest batch 2): - API1-API10 全覆盖: BOLA/Auth/MassAssignment/RateLimit/SSRF/CORS/ShadowAPI - OpenAPI 驱动 fuzz + GraphQL introspection + gRPC/WebSocket """ @@ -15,7 +15,7 @@ @register_skill("pentest-api") class PentestApi(AgentRunner): def system_prompt(self) -> str: - return "你是 pentest-api skill。职责:OWASP API Top 10 2023 全覆盖编排。\n铁律:1)授权前置 2)scope 内 3)prod 拒绝\n输出严格 JSON。" + return "你是 pentest-api skill。职责:OWASP API Top 10 2023 全覆盖编排。\n规则:1)授权前置 2)scope 内 3)prod 拒绝\n输出严格 JSON。" def user_prompt(self, ctx: RunnerContext) -> str: return f"## PRD\n```\n{ctx.artifact_text[:3000]}\n```\n\n## schema\n{{\n \"project_name\":\"string\",\"run_id\":\"string\",\n \"api_categories\":[{{\"id\":\"API1\",\"name\":\"BOLA\",\"checks\":[\"string\"]}}],\n \"openapi_driven\":true,\"graphql_enabled\":false,\n \"outputs\":{{\"api_vulns\":\"workspace/渗透/api_vulns.json\"}},\n \"risks\":[\"string\"],\"confidence\":\"string\"\n}}" def mock_output(self, ctx) -> dict: # noqa: ARG002 diff --git a/runtime/orchestrator/skills/pentest_coordinator.py b/runtime/orchestrator/skills/pentest_coordinator.py index 92d54336..e6ef6e8c 100644 --- a/runtime/orchestrator/skills/pentest_coordinator.py +++ b/runtime/orchestrator/skills/pentest_coordinator.py @@ -1,11 +1,11 @@ """pentest-coordinator skill · LLM 读 PRD + 授权上下文 → 5 阶段渗透流程编排. -V1.21.0 minimum viable (ROADMAP skill rollout #1 落地, 解锁 rollout skill 流水线): +minimum viable (ROADMAP skill rollout #1 落地, 解锁 rollout skill 流水线): - LLM 读 PRD + tagent.yml 授权摘要 + 上游 pentest-tester expert 产物 → 5 阶段并发计划 (recon / vuln / exploit / post-exploit / report) + 子 skill 调用顺序 + 授权前置检查 evidence - 不实装 skills/pentest-coordinator.md 全部职责 (subagent pool 真起 / Allure 报告生成 / decisions/ 真写入 等留后续深化) -- shannon 哲学 (仅 working PoC 入报告) + 主宪章 §22 决策不可逆禁止 + §24 safe-by-default +- shannon 哲学 (仅 working PoC 入报告) + 不可逆操作 + safe-by-default 授权边界:本 skill 只输出**编排计划文本**,不调子 skill,不执行不可逆操作。 真执行守护在 utils 层 (`api_security_scanner.py` / `ai_adversarial.py` env gate)。 @@ -27,7 +27,7 @@ def system_prompt(self) -> str: "你是 Test-Agent 项目内 pentest-coordinator skill(skills/pentest-coordinator.md)。\n" "职责:基于 PRD + 授权上下文 + 上游 pentest-tester expert 产物,编排渗透测试 5 阶段流程。\n" "原则:\n" - "1) 前置检查铁律 (主宪章 §24):tagent.yml pentest.authorized=true + pentest.scope=[list]\n" + "1) 前置检查规则 :tagent.yml pentest.authorized=true + pentest.scope=[list]\n" " 必须显式;否则编排拒绝运行 + evidence 记录拒绝理由\n" "2) target 必须在 scope 内;不在 → 拒绝\n" "3) prod 环境 → 拒绝;只允许 staging / sandbox / dev\n" @@ -78,7 +78,7 @@ def user_prompt(self, ctx: RunnerContext) -> str: ' "outputs": {\n' ' "report_path": "workspace/渗透报告/pentest_{target}_{date}.md",\n' ' "evidence_dir": "workspace/测试报告/{项目名}/evidence/{run_id}/",\n' - ' "bug_tickets_format": "CVSS → P0-P3 (主宪章 §18-4)",\n' + ' "bug_tickets_format": "CVSS → P0-P3 ",\n' ' "allure_report": "workspace/Allure/pentest/{run_id}/"\n' " },\n" ' "refuse_conditions": [\n' @@ -153,7 +153,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: # noqa: ARG002 "outputs": { "report_path": "workspace/渗透报告/pentest_staging.example.com_20260516.md", "evidence_dir": "workspace/测试报告/{项目名}/evidence/selftest-20260516-000001/", - "bug_tickets_format": "CVSS 9-10=P0 / 7-8.9=P1 / 4-6.9=P2 / <4=P3 (主宪章 §18-4)", + "bug_tickets_format": "CVSS 9-10=P0 / 7-8.9=P1 / 4-6.9=P2 / <4=P3 ", "allure_report": "workspace/Allure/pentest/selftest-20260516-000001/", }, "refuse_conditions": [ @@ -166,7 +166,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: # noqa: ARG002 "子 skill 失败级联致整体流程中断 (建议每阶段独立 checkpoint)", "subagent pool 耗尽致并发降级为串行 (建议 max=5 + 队列回退)", "授权 yaml 被运行时篡改 (建议启动期 SHA-256 锁定 + 运行期不重读)", - "aux_client 串话致敏感数据进主 session (建议 §22 严格隔离 + telemetry 告警)", + "aux_client 串话致敏感数据进主 session (建议 严格隔离 + telemetry 告警)", ], "confidence": "medium", "_mode": "mock(stub provider)", diff --git a/runtime/orchestrator/skills/pentest_exploit.py b/runtime/orchestrator/skills/pentest_exploit.py index cf666ec6..371da4d0 100644 --- a/runtime/orchestrator/skills/pentest_exploit.py +++ b/runtime/orchestrator/skills/pentest_exploit.py @@ -1,8 +1,8 @@ """pentest-exploit skill · LLM 读 vuln_candidates → 沙箱 PoC 验证编排. -V1.30.0 minimum viable (pentest batch 2): +minimum viable (pentest batch 2): - 读 pentest-vuln 产物 vuln_candidates.json → 5 攻击域并发 PoC 尝试 -- 沙箱铁律: Docker/VM 内跑, host 禁直接执行 +- 沙箱规则: Docker/VM 内跑, host 禁直接执行 - destructive-guard: 拦截 rm/DROP/truncate/fdisk - 仅成功+可复现 PoC 标 verified, 其余丢弃 """ @@ -19,7 +19,7 @@ class PentestExploit(AgentRunner): def system_prompt(self) -> str: return ( "你是 pentest-exploit skill。职责:读 vuln_candidates,编排沙箱 PoC 验证。\n" - "铁律:1)Docker/VM 沙箱内跑 2)destructive ops 拦截 3)仅 verified + reproducible 入报告\n" + "规则:1)Docker/VM 沙箱内跑 2)destructive ops 拦截 3)仅 verified + reproducible 入报告\n" "输出严格 JSON。" ) def user_prompt(self, ctx: RunnerContext) -> str: diff --git a/runtime/orchestrator/skills/pentest_recon.py b/runtime/orchestrator/skills/pentest_recon.py index 92daca54..a887869b 100644 --- a/runtime/orchestrator/skills/pentest_recon.py +++ b/runtime/orchestrator/skills/pentest_recon.py @@ -1,6 +1,6 @@ """pentest-recon skill · LLM 读上游 pentest-tester 产物 → 侦察信息收集编排. -V1.28.0 minimum viable (ROADMAP pentest skill rollout #6 落地): +minimum viable (ROADMAP pentest skill rollout #6 落地): - LLM 读 PRD + 授权上下文 + 上游 pentest-tester 产物 → 主动/被动侦察计划 - 子域枚举 / 端口扫描 / 服务指纹 / Web 技术栈 / 路径发现 / 历史 URL / 泄露凭据 - 授权前置: pentest.recon_active=true 才允许主动扫, 否则仅 passive @@ -23,7 +23,7 @@ def system_prompt(self) -> str: "你是 Test-Agent 项目内 pentest-recon skill(skills/pentest-recon.md)。\n" "职责:基于 PRD + 授权上下文 + 上游 pentest-tester 产物,编排渗透侦察信息收集计划。\n" "原则:\n" - "1) 授权前置铁律: tagent.yml pentest.recon_active=true 允许主动扫;否则仅 passive\n" + "1) 授权前置规则: tagent.yml pentest.recon_active=true 允许主动扫;否则仅 passive\n" "2) scope 防护: target 在 pentest.scope 内;跨网段/跨公司域 → 拒绝\n" "3) prod 环境 → 拒绝;仅 staging/sandbox/dev\n" "4) 被动优先:子域枚举(cert.sh/DNSdumpster) > 主动扫描\n" diff --git a/runtime/orchestrator/skills/pentest_report.py b/runtime/orchestrator/skills/pentest_report.py index 1556aedb..0e0ed8ef 100644 --- a/runtime/orchestrator/skills/pentest_report.py +++ b/runtime/orchestrator/skills/pentest_report.py @@ -1,6 +1,6 @@ """pentest-report skill · 渗透报告生成编排 (shannon 哲学). -V1.30.0 (pentest batch 2): +(pentest batch 2): - 仅 verified PoC 入报告; unverified 丢弃 - CVSS 3.1 + CWE + 修复建议 + PII scrub - 标准 7 章报告模板 @@ -16,7 +16,7 @@ @register_skill("pentest-report") class PentestReport(AgentRunner): def system_prompt(self) -> str: - return "你是 pentest-report skill。职责:仅 working PoC 入报告 (shannon 哲学)。\n铁律:1)仅 verified 2)PII scrub 3)CVSS 3.1 必算 4)修复含代码\n输出严格 JSON。" + return "你是 pentest-report skill。职责:仅 working PoC 入报告 (shannon 哲学)。\n规则:1)仅 verified 2)PII scrub 3)CVSS 3.1 必算 4)修复含代码\n输出严格 JSON。" def user_prompt(self, ctx: RunnerContext) -> str: return f"## PRD\n```\n{ctx.artifact_text[:3000]}\n```\n\n## schema\n{{\n \"project_name\":\"string\",\"run_id\":\"string\",\n \"sections\":[\"executive_summary\",\"scope_auth\",\"methodology\",\"findings\",\"remediation\",\"appendix\"],\n \"findings\":[{{\"cwe\":\"string\",\"cvss\":0.0,\"severity\":\"P0|P1|P2|P3\",\"poc_verified\":true,\"remediation\":\"string\"}}],\n \"pii_scrub\":true,\n \"outputs\":{{\"report_md\":\"workspace/渗透/pentest_report.md\",\"report_json\":\"workspace/渗透/pentest_report.json\"}},\n \"risks\":[\"string\"],\"confidence\":\"string\"\n}}" def mock_output(self, ctx) -> dict: # noqa: ARG002 diff --git a/runtime/orchestrator/skills/pentest_vuln.py b/runtime/orchestrator/skills/pentest_vuln.py index 9e5fbae2..e607c8c8 100644 --- a/runtime/orchestrator/skills/pentest_vuln.py +++ b/runtime/orchestrator/skills/pentest_vuln.py @@ -5,7 +5,7 @@ Disclaimer: All secrets/keys/passwords in this file are demo examples for pentest check rules — not real credentials. Tests require explicit opt-in. -V1.29.0 minimum viable (ROADMAP pentest skill rollout #3 落地): +minimum viable (ROADMAP pentest skill rollout #3 落地): - LLM 读 PRD + 授权 + 上游 pentest-tester + pentest-recon 产物 → 漏洞发现计划 - 5 攻击域并发: Injection / XSS / SSRF / Auth / Authz - 白盒: SAST 源码扫描 + CPG (code property graph) source→sink trace @@ -29,7 +29,7 @@ def system_prompt(self) -> str: "你是 Test-Agent 项目内 pentest-vuln skill(skills/pentest-vuln.md)。\n" "职责:基于 PRD + 授权 + 上游侦察产物,编排 5 攻击域并发漏洞发现计划。\n" "原则:\n" - "1) 授权铁律: tagent.yml pentest 段 must have authorized=true + scope list\n" + "1) 授权规则: tagent.yml pentest 段 must have authorized=true + scope list\n" "2) 5 攻击域并发不串行: Injection / XSS / SSRF / Auth / Authz\n" "3) 白盒优先 (如有源码): SAST + CPG source→sink trace, 非硬编码 allowlist\n" "4) 黑盒模式: 用 recon 结果当输入, nuclei templates + Burp + sqlmap 等\n" diff --git a/runtime/orchestrator/skills/pentest_web.py b/runtime/orchestrator/skills/pentest_web.py index 89c90cf2..5f51112a 100644 --- a/runtime/orchestrator/skills/pentest_web.py +++ b/runtime/orchestrator/skills/pentest_web.py @@ -1,6 +1,6 @@ """pentest-web skill · OWASP Top 10 + ASVS 编排. -V1.30.0 (pentest batch 2): +(pentest batch 2): - A01-A10 全覆盖 + 2FA/TOTP/SSO 自动登录 + Burp/sqlmap 混合 """ @@ -14,7 +14,7 @@ @register_skill("pentest-web") class PentestWeb(AgentRunner): def system_prompt(self) -> str: - return "你是 pentest-web skill。职责:OWASP Top 10:2021 + ASVS 全覆盖编排。\n铁律:1)授权前置 2)scope 内 3)2FA/TOTP 自动登录支持\n输出严格 JSON。" + return "你是 pentest-web skill。职责:OWASP Top 10:2021 + ASVS 全覆盖编排。\n规则:1)授权前置 2)scope 内 3)2FA/TOTP 自动登录支持\n输出严格 JSON。" def user_prompt(self, ctx: RunnerContext) -> str: return f"## PRD\n```\n{ctx.artifact_text[:3000]}\n```\n\n## schema\n{{\n \"project_name\":\"string\",\"run_id\":\"string\",\n \"owasp_categories\":[{{\"id\":\"A01\",\"name\":\"BrokenAccessControl\",\"tools\":[\"string\"]}}],\n \"auth_auto\":{{\"totp_enabled\":false,\"sso_enabled\":false}},\n \"outputs\":{{\"web_vulns\":\"workspace/渗透/web_vulns.json\"}},\n \"risks\":[\"string\"],\"confidence\":\"string\"\n}}" def mock_output(self, ctx) -> dict: # noqa: ARG002 diff --git a/runtime/orchestrator/skills/system_test.py b/runtime/orchestrator/skills/system_test.py index 93a641a6..d1594a0b 100644 --- a/runtime/orchestrator/skills/system_test.py +++ b/runtime/orchestrator/skills/system_test.py @@ -1,6 +1,6 @@ """system-test skill · LLM 读上游 system-tester 产物 → 6 阶段系统集成测试编排. -V1.26.0 minimum viable (ROADMAP skill rollout #4 落地): +minimum viable (ROADMAP skill rollout #4 落地): - LLM 读 PRD + 上游 system-tester expert 产物 → 6 阶段执行计划 (环境检查 / IoT 测试 / 音视频校验 / 链路追踪 / 消息队列 / 报告归档) + 质量门禁 + 子场景路由策略 diff --git a/runtime/orchestrator/skills/visual_test.py b/runtime/orchestrator/skills/visual_test.py index 1fb7df20..431e6958 100644 --- a/runtime/orchestrator/skills/visual_test.py +++ b/runtime/orchestrator/skills/visual_test.py @@ -1,6 +1,6 @@ """visual-test skill · LLM 读上游 visual-tester 产物 → 5 阶段视觉测试执行编排. -V1.24.0 minimum viable (ROADMAP skill rollout #3 落地): +minimum viable (ROADMAP skill rollout #3 落地): - LLM 读 PRD + 上游 visual-tester expert 产物 → 5 阶段执行计划 (环境检查 / 模板图准备 / 视觉冒烟 / 视觉回归 / 报告归档) + 质量门禁 + 多分辨率策略 diff --git a/runtime/registry/registry.py b/runtime/registry/registry.py index 53483c58..39b93e7c 100644 --- a/runtime/registry/registry.py +++ b/runtime/registry/registry.py @@ -41,7 +41,7 @@ class CatalogEntry: tools: list[str] = field(default_factory=list) source_path: str = "" raw_body: str = "" - # V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): + # 防 mock (ROADMAP Day 0 承诺): # 从 frontmatter EXPERT_IMPL_STATUS / SKILL_IMPL_STATUS 解析,执行层据此拒绝路由未实装项。 # 合法值: production / script / rollout / vision / unknown(frontmatter 缺失或值非法时)。 impl_status: str = "unknown" @@ -114,7 +114,7 @@ def _entry_from_file(path: Path, kind: EntryKind) -> CatalogEntry | None: tools = [t.strip() for t in tools_raw.split(",") if t.strip()] else: tools = [] - # V1.14 防 mock: 按 kind 选对应 frontmatter key,缺失或非法值降级 unknown + # 防 mock: 按 kind 选对应 frontmatter key,缺失或非法值降级 unknown status_key = "EXPERT_IMPL_STATUS" if kind == "expert" else "SKILL_IMPL_STATUS" status_raw = meta.get(status_key, "") impl_status = str(status_raw).strip().lower() if status_raw else "" diff --git a/runtime/router/retrieval.py b/runtime/router/retrieval.py index 13fdcedb..c3201e93 100644 --- a/runtime/router/retrieval.py +++ b/runtime/router/retrieval.py @@ -6,7 +6,7 @@ Async-safety: - Called from sync `route()`. If already inside a running event loop (e.g. FastAPI request handler), running `asyncio.run` or `run_coroutine_threadsafe` on the - same loop deadlocks. We detect that and degrade to no-op (charter §21 横切 + same loop deadlocks. We detect that and degrade to no-op (charter 横切 可复现性: never block, never silently misbehave). """ diff --git a/runtime/router/router.py b/runtime/router/router.py index 6dd51bcc..06e52ff1 100644 --- a/runtime/router/router.py +++ b/runtime/router/router.py @@ -26,7 +26,7 @@ class RouterError(RuntimeError): def _validate_against_catalog(decision: RoutingDecision, catalog: Catalog) -> list[str]: issues: list[str] = [] - # V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): 检查 expert / skill 实装状态 + # 防 mock (ROADMAP Day 0 承诺): 检查 expert / skill 实装状态 # 单源: catalog entry.impl_status (agents/skills .md frontmatter) # rollout / vision / unknown 状态 router 仍可路由,但 issues 列表标 warning + downgrade confidence # → orchestrator execute_node 跑到时会硬拒并报明确错误 (returncode=2),不输出 mock 数据 @@ -39,7 +39,7 @@ def _validate_against_catalog(decision: RoutingDecision, catalog: Catalog) -> li continue if entry.impl_status in ("rollout", "vision"): issues.append( - f"{n.kind} '{n.name}' 处于 V1.x {entry.impl_status} (id={n.id}); " + f"{n.kind} '{n.name}' 处于 {entry.impl_status} (id={n.id}); " f"test-lead 决策应降级 conditional 或 no-go" ) elif entry.impl_status == "unknown": diff --git a/runtime/router/schema.py b/runtime/router/schema.py index d75f9f3a..e93093f5 100644 --- a/runtime/router/schema.py +++ b/runtime/router/schema.py @@ -19,7 +19,7 @@ class DAGNode(BaseModel): on_failure: Literal["retry", "skip", "abort"] = "retry" timeout_seconds: int = Field(default=1800, ge=1, description="node timeout in seconds") - # Charter §23 教学层字段(可选;LLM 在 learn mode 应填,exec mode 仅 one_liner) + # Charter 教学层字段(可选;LLM 在 learn mode 应填,exec mode 仅 one_liner) one_liner_zh: str = Field(default="", description="≤30 字 why,执行模式输出此字段") one_liner_en: str = Field(default="", description="≤120 chars why for English") why: str = Field(default="", description="long-form rationale (learn mode)") diff --git a/runtime/scheduler/INDEX.md b/runtime/scheduler/INDEX.md index ff731513..da24dd7f 100644 --- a/runtime/scheduler/INDEX.md +++ b/runtime/scheduler/INDEX.md @@ -10,7 +10,7 @@ | `scheduler.py` | tick() 每 60s,跨平台文件锁(fcntl/msvcrt) | | `injection_scan.py` | 运行时 prompt 注入扫描(防 skill 加载注入) | -## 规则(主宪章 §22 落地) +## 规则 - **运行时全 prompt 扫描**(非仅 create-time):skill 在 runtime 加载,恶意 skill 可携带注入 - **文件锁防重入**:`workspace/cron/.tick.lock` 双栈实现 diff --git a/runtime/scheduler/injection_scan.py b/runtime/scheduler/injection_scan.py index 24256c23..9d7b8c7b 100644 --- a/runtime/scheduler/injection_scan.py +++ b/runtime/scheduler/injection_scan.py @@ -1,6 +1,6 @@ -"""Runtime prompt injection scan (hermes §1.2 critical). +"""Runtime prompt injection scan (hermes critical). -Charter §22 rule: 非交互自动批准模式下,assembled prompt(含动态加载的 skill 内容) +Charter rule: 非交互自动批准模式下,assembled prompt(含动态加载的 skill 内容) 必须全扫,不止 create-time。 """ @@ -33,7 +33,7 @@ def __init__(self, reason: str, snippet: str) -> None: def scan(text: str) -> None: """Raise PromptInjectionBlocked when any pattern hits. - Charter §22 rule: scan FULL assembled prompt (system + user + tools + skill contents). + Charter rule: scan FULL assembled prompt (system + user + tools + skill contents). """ for pat in SUSPICIOUS: m = pat.search(text) diff --git a/runtime/scheduler/jobs.py b/runtime/scheduler/jobs.py index 832efad0..1f8e6621 100644 --- a/runtime/scheduler/jobs.py +++ b/runtime/scheduler/jobs.py @@ -1,4 +1,4 @@ -"""Cron job storage (hermes-inspired §1.2). +"""Cron job storage (hermes-inspired ). Jobs persist in `workspace/cron/jobs.json`. Each entry: - id: uuid diff --git a/runtime/scheduler/scheduler.py b/runtime/scheduler/scheduler.py index f57d7d9c..7e7382cc 100644 --- a/runtime/scheduler/scheduler.py +++ b/runtime/scheduler/scheduler.py @@ -1,4 +1,4 @@ -"""Scheduler tick loop (hermes §1.2). +"""Scheduler tick loop (hermes ). - 60s tick from a background thread - Cross-platform file lock (fcntl/msvcrt) prevents double-run @@ -111,7 +111,7 @@ def run_job(job: dict, *, runner: Callable[[str], dict] | None = None) -> dict: out_file = out_dir / f"{ts}.md" try: - # Charter §22 rule: runtime full-prompt injection scan + # Charter rule: runtime full-prompt injection scan scan(prompt) except PromptInjectionBlocked as e: out_file.write_text(f"# Job {job_id} blocked\n\nreason: {e.reason}\nsnippet: {e.snippet}\n", encoding="utf-8") diff --git a/runtime/storage/objects.py b/runtime/storage/objects.py index b4ed3c69..32fa3e6d 100644 --- a/runtime/storage/objects.py +++ b/runtime/storage/objects.py @@ -15,7 +15,7 @@ class ObjectStore: """Thin facade. Imports MinIO lazily so tests without infra still pass. - Charter §21 横切准则: lazy init is thread-safe (防止并发 caller 重复建桶). + Charter 横切准则: lazy init is thread-safe (防止并发 caller 重复建桶). """ _lock = threading.Lock() diff --git a/runtime/subagent/INDEX.md b/runtime/subagent/INDEX.md index 032907f2..455259d8 100644 --- a/runtime/subagent/INDEX.md +++ b/runtime/subagent/INDEX.md @@ -1,6 +1,6 @@ # subagent 索引 -## 规则(主宪章 §22) +## 规则 - **隔离 client**:子代理用 `auxiliary` LLM client,永不污染主 session prompt cache - **ThreadPool 动态调整**:默认 32 workers,可按并发 evals 数 resize_tool_pool diff --git a/runtime/subagent/aux_client.py b/runtime/subagent/aux_client.py index cb45a94a..0b8ba12a 100644 --- a/runtime/subagent/aux_client.py +++ b/runtime/subagent/aux_client.py @@ -1,4 +1,4 @@ -"""Auxiliary LLM client (hermes §1.3 'never touches main session prompt cache'). +"""Auxiliary LLM client (hermes 'never touches main session prompt cache'). Subagents and curator share NOTHING with the main routing path beyond raw model API. Different env vars (TAGENT_AUX_*) so users can pin a cheaper/faster aux model. diff --git a/runtime/subagent/pool.py b/runtime/subagent/pool.py index daa581a4..411b8d5e 100644 --- a/runtime/subagent/pool.py +++ b/runtime/subagent/pool.py @@ -1,4 +1,4 @@ -"""Global ThreadPool for subagent tasks (hermes §1.3 pattern).""" +"""Global ThreadPool for subagent tasks (hermes pattern).""" from __future__ import annotations @@ -26,7 +26,7 @@ def resize_pool(max_workers: int) -> None: """Replace the pool with a new one sized to `max_workers`. Safe to call before tasks are submitted; existing tasks finish on the old pool. - Charter §21 横切预算:避免大并发饥饿。 + Charter 横切预算:避免大并发饥饿。 """ global _executor with _lock: diff --git a/runtime/subagent/spawn.py b/runtime/subagent/spawn.py index 74fe4236..6e1294f6 100644 --- a/runtime/subagent/spawn.py +++ b/runtime/subagent/spawn.py @@ -29,7 +29,7 @@ def spawn(task: Callable[..., Any], *args, **kwargs) -> concurrent.futures.Futur def fanout(tasks: list[Callable[..., Any]], *, timeout: float = 600.0) -> list[SubagentResult]: """Run multiple subagent tasks in parallel; collect results in submission order. - Charter §21 横切准则: + Charter 横切准则: - 失败隔离:任一子任务 crash 不影响其他 - 测试预算:总 timeout 上限 """ diff --git a/runtime/tests/test_agent_runners.py b/runtime/tests/test_agent_runners.py index de6b91de..6daf617a 100644 --- a/runtime/tests/test_agent_runners.py +++ b/runtime/tests/test_agent_runners.py @@ -1,4 +1,4 @@ -"""11 个 LLM-driven AgentRunner 专项 unit test (V1.16-followup, V1.x rollout 收尾). +"""11 个 LLM-driven AgentRunner 专项 unit test. 覆盖 3 维度 × 11 runner = 33 case (参数化): - registration: @register("name") + __init__.py import 双链路 → get_runner(name) 非空 @@ -8,7 +8,7 @@ - summary 非空: 一行业务摘要存在 (防 summary 改空 regression, report-generator 下游消费) -模板规则锁定 (V1.17+ 新 AgentRunner 必填): +模板规则锁定 : - 加 1 runner → 更新 ALL_RUNNERS 加 1 行 (name, required_keys) - 不加 → 参数化漏覆盖, pytest 不报错但 silent gap @@ -47,27 +47,27 @@ ["verdict", "rationale", "metrics", "known_risks", "fallback_plan", "summary_zh", "requires_human_signoff", "signoff_owner"], ), ( - "env-manager", # V1.15.0 + "env-manager", # ["project_name", "target_env", "env_checks", "prep_steps", "dependencies", "risks", "confidence"], ), ( - "mobile-tester", # V1.16.0 + "mobile-tester", # ["project_name", "target_platform", "test_cases", "device_commands", "test_environment", "mobile_specific", "risks", "confidence"], ), ( - "visual-tester", # V1.17.0 + "visual-tester", # ["project_name", "visual_target_type", "visual_test_points", "comparison_scripts", "tolerance", "baseline_strategy", "risks", "confidence"], ), ( - "system-tester", # V1.18.0 + "system-tester", # ["project_name", "system_target_type", "test_cases", "device_commands", "protocol_specific", "test_environment", "risks", "confidence"], ), ( - "pentest-tester", # V1.19.0 + "pentest-tester", # ["project_name", "test_mode", "target_scope", "recon_phase", "vuln_assessment_phase", "exploit_plan", "reporting", "risks", "confidence"], ), ( - "automotive-tester", # V1.20.0 (V1.x rollout 收尾) + "automotive-tester", # (rollout 收尾) ["project_name", "vehicle_subsystem", "asil_assessment", "test_cases", "bus_test_plan", "adas_scenarios", "ota_plan", "compliance_matrix", "test_environment", "risks", "confidence"], ), ] diff --git a/runtime/tests/test_cli_config.py b/runtime/tests/test_cli_config.py index b4353075..a8dce7a1 100644 --- a/runtime/tests/test_cli_config.py +++ b/runtime/tests/test_cli_config.py @@ -1,4 +1,4 @@ -"""tagent config CLI 测试 (V1.22.0 · 4 子命令 list/show/use/use-compat). +"""tagent config CLI 测试 (4 子命令 list/show/use/use-compat). 本文件中所有 API key/secret 均为虚构测试数据,不是真实凭据。 All API keys and secrets in this file are fake test fixtures — not real credentials. diff --git a/runtime/tests/test_impl_status_filter.py b/runtime/tests/test_impl_status_filter.py index bed84850..55b0c270 100644 --- a/runtime/tests/test_impl_status_filter.py +++ b/runtime/tests/test_impl_status_filter.py @@ -1,6 +1,6 @@ """X4 防 mock 闭环测试:registry parse → router warn → orchestrator hard block。 -覆盖 ROADMAP V1.15 Day 0 承诺:rollout / vision / unknown 状态的 expert / skill, +覆盖 ROADMAP Day 0 承诺:rollout / vision / unknown 状态的 expert / skill, router 路由仍可生成 DAG 但 _validate_against_catalog 标 issue + 降 confidence, orchestrator execute_node 跑到时 returncode=2 + stderr "未实装",绝不输出 mock 数据。 @@ -27,22 +27,22 @@ def test_registry_impl_status_no_unknown(): def test_registry_expert_status_counts(): - """Expert 16 = 11 production + 5 script + 0 rollout (V1.20.0 automotive-tester 落地后,V1.x rollout 收尾)。""" + """Expert 16 = 11 production + 5 script + 0 rollout 。""" cat = get_catalog() counts = Counter(e.impl_status for e in cat.experts.values()) assert counts.get("production", 0) == 11, f"expert production 应 11,实 {counts.get('production')}" assert counts.get("script", 0) == 5, f"expert script 应 5,实 {counts.get('script')}" - assert counts.get("rollout", 0) == 0, f"expert rollout 应 0 (V1.x rollout 收尾),实 {counts.get('rollout')}" + assert counts.get("rollout", 0) == 0, f"expert rollout 应 0 (rollout 收尾),实 {counts.get('rollout')}" def test_registry_skill_status_counts(): - """Skill 32 = 25 production + 7 script + 0 rollout + 0 vision (V1.x 全 skill rollout 完成 + 2 ex-vision 实装)。""" + """Skill 32 = 25 production + 7 script + 0 rollout + 0 vision (全 skill rollout 完成 + 2 ex-vision 实装)。""" cat = get_catalog() counts = Counter(e.impl_status for e in cat.skills.values()) assert counts.get("production", 0) == 25, f"skill production 应 25,实 {counts.get('production')}" assert counts.get("script", 0) == 7 assert counts.get("rollout", 0) == 0, f"skill rollout 应 0,实 {counts.get('rollout')}" - assert counts.get("vision", 0) == 0, f"skill vision 应 0 (V1.x 后),实 {counts.get('vision')}" + assert counts.get("vision", 0) == 0, f"skill vision 应 0 (后),实 {counts.get('vision')}" # ---------- router 层:_validate_against_catalog warn ---------- @@ -61,7 +61,7 @@ def _mk_decision(*dag_specs: tuple[str, str, str]) -> RoutingDecision: def test_router_flags_rollout_expert(): - # V1.20 V1.x rollout 收尾,所有 expert production/script。 + # rollout 收尾,所有 expert production/script。 # rollout 分支覆盖通过 skill 层 (test_router_flags_rollout_skill,16 skill 仍 rollout)。 # unknown 分支覆盖通过 test_router_flags_unknown_entity。 # 此 test 保留为占位,改测 unknown expert (走相同 hard-block 分支)。 @@ -72,7 +72,7 @@ def test_router_flags_rollout_expert(): def test_router_does_not_falsely_flag_production_skill(): - """V1.36.0 全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。""" + """全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。""" cat = get_catalog() dec = _mk_decision(("n1", "skill", "visual-test")) issues = router._validate_against_catalog(dec, cat) @@ -80,7 +80,7 @@ def test_router_does_not_falsely_flag_production_skill(): def test_router_flags_vision_skill(): - # V1.x 2 ex-vision skill (agent-introspection-debugging / build-your-own-x-explorer) 已实装为 production。 + # 2 ex-vision skill (agent-introspection-debugging / build-your-own-x-explorer) 已实装为 production。 # vision 分支与 rollout 共用 router._validate_against_catalog 同一 if (rollout, vision) 路径, # 现 catalog 无 vision skill,此 test 改测 unknown skill (走相同 hard-block warn 分支),保留覆盖语义。 cat = get_catalog() @@ -114,7 +114,7 @@ def test_router_passes_production_clean(): def test_execute_node_rejects_rollout_expert(): - """V1.20 V1.x rollout 收尾,无 rollout expert。 + """rollout 收尾,无 rollout expert。 rollout 分支覆盖通过 test_execute_node_rejects_rollout_skill (16 skill 仍 rollout)。 expert hard-block 路径覆盖通过 test_execute_node_rejects_unknown_expert (同分支)。 此 test 保留 + 改用 unknown expert 触发同 returncode=2 hard-block。 @@ -125,14 +125,14 @@ def test_execute_node_rejects_rollout_expert(): def test_execute_node_allows_production_skill(): - """V1.36.0 全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。""" + """全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。""" r = execute_node("automotive-can-bus-test", "skill") assert r.returncode == 0, f"production skill 被误拒: rc={r.returncode} stderr={r.stderr}" assert r.stdout, "production skill 应产出结果" def test_execute_node_rejects_vision_skill(): - # V1.x 2 ex-vision skill 已实装,catalog 无 vision skill。 + # 2 ex-vision skill 已实装,catalog 无 vision skill。 # vision hard-block 分支与 rollout 共用 execute_node 同一拒绝路径, # 此 test 改测 unknown skill (走 returncode=2 同分支),保留覆盖语义。 r = execute_node("phantom-vision-skill", "skill") diff --git a/runtime/tests/test_router.py b/runtime/tests/test_router.py index d1e2daaa..f19b5bda 100644 --- a/runtime/tests/test_router.py +++ b/runtime/tests/test_router.py @@ -33,7 +33,7 @@ def test_router_picks_platform_expert(text, expected_type, expected_expert): def test_router_pentest_includes_coordinator_skill(): - """pentest path 头节点 = pentest-coordinator (kind=skill, V1.21 SkillRunner 首接入).""" + """pentest path 头节点 = pentest-coordinator (kind=skill art = TargetArtifact(kind="text", text="pentest SQL injection penetration test") decision = route(art, client=LLMClient(provider="stub", fallback="stub")) ordered = decision.topological() @@ -49,7 +49,7 @@ def test_router_starts_with_requirements_analyst(): def test_router_ends_with_test_lead_decision(): - """DAG 末节点 = test-lead 决策(主宪章 §40 + agents/README.md 流程 + """DAG 末节点 = test-lead 决策(+ agents/README.md 流程 "bug-manager → report-generator → test-lead 决策")。report-generator 倒数第二。""" art = TargetArtifact(kind="text", text="generic web system") decision = route(art, client=LLMClient(provider="stub", fallback="stub")) diff --git a/runtime/tests/test_router_real.py b/runtime/tests/test_router_real.py index 838fb3ef..3e77ca23 100644 --- a/runtime/tests/test_router_real.py +++ b/runtime/tests/test_router_real.py @@ -1,9 +1,9 @@ """Real-model router accuracy test (M2-7). -Charter §21: +Charter : - 横切准则: 失败必带 seed + snapshot (固定 random seed) - 横切准则: 测试预算上限 (timeout per call) - - 决策可追溯 §18-12: 每次失败入 decisions/ + - 决策可追溯 : 每次失败入 decisions/ Run conditions: - Requires TAGENT_LLM_PROVIDER ∈ {claude, openai, gemini, qwen, deepseek, ollama} @@ -30,7 +30,7 @@ from runtime.router.router import route, route_with_vote from runtime.router.schema import TargetArtifact -RANDOM_SEED = 42 # §21 可复现性: 固定 seed +RANDOM_SEED = 42 # 可复现性: 固定 seed random.seed(RANDOM_SEED) # 20 test samples: 4 types × 5 phrasings each @@ -63,7 +63,7 @@ def _decisions_log(record: dict) -> Path: - """Charter §18-12 决策可追溯: log each routing decision.""" + """Charter 决策可追溯: log each routing decision.""" s = get_settings() d = s.resolve(s.workspace_dir) / "测试报告" / "decisions" d.mkdir(parents=True, exist_ok=True) diff --git a/runtime/tests/test_skill_runners.py b/runtime/tests/test_skill_runners.py index 77449d22..06e27ba4 100644 --- a/runtime/tests/test_skill_runners.py +++ b/runtime/tests/test_skill_runners.py @@ -1,6 +1,6 @@ -"""LLM-driven SkillRunner 专项 unit test (V1.21.0 — skill rollout 起点). +"""LLM-driven SkillRunner 专项 unit test. -照 test_agent_runners.py pattern (V1.16-followup 锁规则) 同构: +照 test_agent_runners.py pattern 同构: 覆盖 3 维度 × N skill_runner = 3N case (参数化): - registration: @register_skill("name") + skills/__init__.py import 双链路 → get_skill_runner(name) 非空 (防 __init__.py 漏 import 致 silent fallback no-op) @@ -9,7 +9,7 @@ - summary 非空: 一行业务摘要存在 (防 summary 改空 regression, report-generator 下游消费) -模板规则锁定 (V1.21+ 新 SkillRunner 必填): +模板规则锁定 : - 加 1 skill_runner → 更新 ALL_SKILL_RUNNERS 加 1 行 (name, required_keys) - 不加 → 参数化漏覆盖, pytest 不报错但 silent gap """ @@ -25,7 +25,7 @@ # 不含下划线开头字段 (e.g., _mode 是 stub 标志, 非业务字段) ALL_SKILL_RUNNERS: list[tuple[str, list[str]]] = [ ( - "pentest-coordinator", # V1.21.0 (skill rollout 起点) + "pentest-coordinator", # (skill rollout 起点) [ "project_name", "run_id", @@ -55,55 +55,55 @@ ["project_name","run_id","sections","findings","pii_scrub","outputs","risks","confidence"], ), ( - "pentest-recon", # V1.31.0 + "pentest-recon", # ["project_name","run_id","target","authorization","outputs","risks","confidence"], ), ( - "pentest-vuln", # V1.31.0 + "pentest-vuln", # ["project_name","run_id","source_available","mode","domains","outputs","risks","confidence"], ), ( - "mobile-test", # V1.23.0 + "mobile-test", # ["project_name","run_id","target_platform","phases","outputs","risks","confidence"], ), ( - "visual-test", # V1.23.0 + "visual-test", # ["project_name","run_id","visual_target_type","phases","outputs","risks","confidence"], ), ( - "system-test", # V1.31.0 + "system-test", # ["project_name","run_id","sub_scenarios","phases","outputs","risks","confidence"], ), ( - "eval-harness", # V1.27.0 + "eval-harness", # ["project_name","run_id","eval_target","model_version","baseline_version","safety_checks","outputs","risks","confidence"], ), ( - "automotive-test", # V1.31.0 + "automotive-test", # ["project_name","run_id","vehicle_subsystem","asil_level","phases","sub_skills","outputs","risks","confidence"], ), ( - "automotive-can-bus-test", # V1.31.0 + "automotive-can-bus-test", # ["project_name","run_id","protocols","checks","outputs","risks","confidence"], ), ( - "automotive-adas-scenario", # V1.31.0 + "automotive-adas-scenario", # ["project_name","run_id","categories","odd_levels","simulation","outputs","risks","confidence"], ), ( - "automotive-ota-update-test", # V1.31.0 + "automotive-ota-update-test", # ["project_name","run_id","checks","compliance","outputs","risks","confidence"], ), ( - "automotive-hil-loop-test", # V1.31.0 + "automotive-hil-loop-test", # ["project_name","run_id","loops","asil_required","fault_injection","platform","outputs","risks","confidence"], ), ( - "agent-introspection-debugging", # V1.x + "agent-introspection-debugging", # ["project_name","run_id","target_run_id","dimensions","findings","recommendations","outputs","confidence"], ), ( - "build-your-own-x-explorer", # V1.x + "build-your-own-x-explorer", # ["project_name","run_id","user_scenario","detected_concepts","recommendations","warnings","outputs","confidence"], ), ] diff --git a/runtime/tutor/INDEX.md b/runtime/tutor/INDEX.md index 21537502..689bd4ca 100644 --- a/runtime/tutor/INDEX.md +++ b/runtime/tutor/INDEX.md @@ -1,4 +1,4 @@ -# tutor 索引(主宪章 §23 教学层) +# tutor 索引 ## 文件清单 diff --git a/runtime/tutor/__init__.py b/runtime/tutor/__init__.py index 44174a97..3cad7f3e 100644 --- a/runtime/tutor/__init__.py +++ b/runtime/tutor/__init__.py @@ -1,4 +1,4 @@ -"""Tutor 教学层 · 主宪章 §23. +"""Tutor 教学层 执行 / 学习 双模式 · 反幻觉 3 层 · 双语切换. """ diff --git a/runtime/tutor/eval_replay.py b/runtime/tutor/eval_replay.py index e339ccc0..80d81ecf 100644 --- a/runtime/tutor/eval_replay.py +++ b/runtime/tutor/eval_replay.py @@ -1,4 +1,4 @@ -"""Eval capture + replay · gbrain §1.6 派生. +"""Eval capture + replay · gbrain 派生. opt-in via TAGENT_EVAL_CAPTURE=1. PII-scrubbed routing queries land in `workspace/learning/eval_candidates.jsonl`. Replay computes 3 metrics: @@ -30,7 +30,7 @@ def _capture_path() -> Path: return d / "eval_candidates.jsonl" -# PII scrub — single source of truth (gbrain §1.9) +# PII scrub — single source of truth (gbrain ) PII_PATTERNS = [ (re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"), "<EMAIL>"), (re.compile(r"\b1[3-9]\d{9}\b"), "<PHONE-CN>"), diff --git a/runtime/tutor/explainer.py b/runtime/tutor/explainer.py index c5a5f19d..ddf17709 100644 --- a/runtime/tutor/explainer.py +++ b/runtime/tutor/explainer.py @@ -1,10 +1,10 @@ -"""Explainer · 主宪章 §23 反幻觉 L2 自检. +"""Explainer Decorates DAG nodes / tool calls with教学注释: exec mode → one_liner only (≤30 字) learn mode → why + theory_ref + alternatives + reading -Charter §23 L2 self-check: verify_refs() re-asks LLM to confirm cited card ids +Charter L2 self-check: verify_refs() re-asks LLM to confirm cited card ids are real KB entries; non-existent → strip + downgrade confidence. """ @@ -61,7 +61,7 @@ def render(self, lang: str | None = None) -> str: def filter_refs(refs: list[str]) -> tuple[list[str], list[str]]: - """Charter §23 L1: split into (in_kb, not_in_kb).""" + """Charter L1: split into (in_kb, not_in_kb).""" kb = get_kb() in_kb, not_in_kb = [], [] for r in refs: diff --git a/runtime/tutor/feedback.py b/runtime/tutor/feedback.py index 67b97ae4..fe09d39c 100644 --- a/runtime/tutor/feedback.py +++ b/runtime/tutor/feedback.py @@ -1,4 +1,4 @@ -"""User feedback · 主宪章 §23 反幻觉 L3. +"""User feedback Users flag wrong explanations → workspace/learning/feedback/{date}.jsonl curator periodically reviews & downgrades card confidence. diff --git a/runtime/tutor/graph.py b/runtime/tutor/graph.py index 31e62b6e..9583f65a 100644 --- a/runtime/tutor/graph.py +++ b/runtime/tutor/graph.py @@ -1,4 +1,4 @@ -"""KB self-wiring graph · gbrain §1.2 派生. +"""KB self-wiring graph · gbrain 派生. 零 LLM 调用:从卡片 frontmatter 的 `related_to` + `superseded_by` + body 内的 `[[wikilink]]` 抽取 typed link,建反向索引。 diff --git a/runtime/tutor/i18n.py b/runtime/tutor/i18n.py index ec665037..c4966ce3 100644 --- a/runtime/tutor/i18n.py +++ b/runtime/tutor/i18n.py @@ -1,4 +1,4 @@ -"""Language switching · 主宪章 §23. +"""Language switching zh / en / zh-en(double-column comparison) """ diff --git a/runtime/tutor/theory_kb.py b/runtime/tutor/theory_kb.py index 75dd3823..ef38a559 100644 --- a/runtime/tutor/theory_kb.py +++ b/runtime/tutor/theory_kb.py @@ -1,4 +1,4 @@ -"""Theory KB loader · 主宪章 §23 反幻觉 L1. +"""Theory KB loader Scans `docs/theory/**/*.{zh,en}.md`, parses frontmatter, exposes lookup API. LLM in learn mode 只能引用 KB 中存在的 id;非 KB 输出"该领域未收录". diff --git a/runtime/tutor/verbosity.py b/runtime/tutor/verbosity.py index 4e6cf669..8d22afe5 100644 --- a/runtime/tutor/verbosity.py +++ b/runtime/tutor/verbosity.py @@ -1,4 +1,4 @@ -"""Verbosity mode · 主宪章 §23. +"""Verbosity mode exec = 默认,每节点仅 one_liner(≤30 字);可 --silent 关 learn = 全套教学:why + theory_ref + alternatives + reading + L3 反馈 diff --git a/runtime/web/INDEX.md b/runtime/web/INDEX.md index 0f8bb4ef..45873f19 100644 --- a/runtime/web/INDEX.md +++ b/runtime/web/INDEX.md @@ -1,7 +1,7 @@ -# runtime/web 索引 (V1.0.0) +# runtime/web 索引 > Web UI for `runtime/api`. 4 页:Upload / Run Status / Report / Catalog。 -> 被测项级别 §21 **L2**(用户可见,必含功能+边界+异常+兼容+可访问性测试)。 +> 被测项级别 **L2**(用户可见,必含功能+边界+异常+兼容+可访问性测试)。 ## 技术栈 @@ -49,7 +49,7 @@ npm run test:a11y # axe-core 可访问性扫 | Catalog | `GET /catalog` | | Health | `GET /health` | -## §21 必测项(L2 级) +## 必测项(L2 级) - 功能正常路径:上传→看 DAG→看报告 - 边界:超大文件/空文本/超长 run_id diff --git a/scripts/INDEX.md b/scripts/INDEX.md index d2694443..9c901b03 100644 --- a/scripts/INDEX.md +++ b/scripts/INDEX.md @@ -1,4 +1,4 @@ -# scripts/ 索引(V1.10.0) +# scripts/ 索引 > 运维 / 分析 / 数据导出脚本 · 不属于运行时 · 不进入 tagent CLI。 @@ -24,4 +24,4 @@ ## 相关 - 上一级:[`../README.md`](../README.md) -- 主宪章 §0(安全:真实数据隔离)+ §19-12(决策可追溯) +- (安全:真实数据隔离)+ (决策可追溯) diff --git a/scripts/analyze-usage.py b/scripts/analyze-usage.py index 18c3fcc3..b4a8bcb1 100644 --- a/scripts/analyze-usage.py +++ b/scripts/analyze-usage.py @@ -40,12 +40,12 @@ def analyze_users(users: list[dict]) -> dict: def cut_decision(skill_usage: list[dict]) -> dict: - """W3 砍/留 决策(主宪章 §21 + §27 简洁优先). + """W3 砍/留 决策. 重度(≥10% 用户): keep + 文档加强 中度(3-10%): keep + 不主推 长尾(<3%): deprecated 月观察 - 0%: archive(主宪章 §22 归档不删) + 0%: archive """ keep_core: list[str] = [] keep_mid: list[str] = [] @@ -78,7 +78,7 @@ def render_md(user_stats: dict, cuts: dict, output: Path) -> None: f"- 行业 top:{user_stats['industries']}", f"- 团队规模:{user_stats['team_sizes']}", f"- 来源渠道:{user_stats['channels']}", - f"\n## Skill 决策(主宪章 §21 §27)\n", + f"\n## Skill 决策\n", ] for k, v in cuts.items(): lines.append(f"\n### {k}({len(v)} 项)") @@ -88,7 +88,7 @@ def render_md(user_stats: dict, cuts: dict, output: Path) -> None: "1. **keep_core**:文档加强 + demo gif + 教学视频\n" "2. **keep_mid**:不主推,留\n" "3. **deprecate**:30 天观察期;再无人用 → 转 archive\n" - "4. **archive**:`marketplace/.archive/` 归档(主宪章 §22 不可逆禁止)\n" + "4. **archive**:`marketplace/.archive/` 归档\n" ) output.write_text("\n".join(lines), encoding="utf-8") diff --git a/utils/README.md b/utils/README.md index 8a4d0ba5..951b6af9 100644 --- a/utils/README.md +++ b/utils/README.md @@ -20,7 +20,7 @@ | `jmeter_csv_exporter.py` | JMeter 参数化 CSV 生成 | `generate_jmeter_dataset(count, output)` | | `jmeter_result_parser.py` | JTL 解析 + 性能门禁 + 基线对比 | `parse_jtl(jtl)` / `check_performance_gates` / `compare_with_baseline` | | `regression_scope.py` | git diff 影响范围分析(YAML 配置) | `analyze_change_impact(base_branch)` | -| `zentao_bug_manager.py` | BugTracker 默认 adapter:禅道 SDK + token 续期(其他 adapter 同契约 BugTrackerBase,主宪章 §12) | `ZentaoBugManager.create_bug` / `batch_submit_from_failures` | +| `zentao_bug_manager.py` | BugTracker 默认 adapter:禅道 SDK + token 续期 | `ZentaoBugManager.create_bug` / `batch_submit_from_failures` | | `ci_quality_gate.py` | CI 门禁统一(junit + cov) | `parse_junit` / `check_smoke` / `check_regression` / `check_coverage` | | `quality_gate_engine.py` | YAML 驱动门禁引擎(替代硬编码阈值) | `QualityGateEngine` / `check_smoke/regression/coverage/performance/release` | | `bug_tracker_base.py` | BugTracker 抽象基类 + 工厂模式(5 适配器注册) | `BugTrackerBase` / `create_bug_manager` / `TRACKER_REGISTRY` | diff --git a/utils/a11y_i18n/a11y_scanner.py b/utils/a11y_i18n/a11y_scanner.py index c5d78ca0..ec9f0b83 100644 --- a/utils/a11y_i18n/a11y_scanner.py +++ b/utils/a11y_i18n/a11y_scanner.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use a11y_scanner_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use a11y_scanner_v2 instead. This file will be removed in """ 无障碍 / Accessibility 测试(WCAG 2.1) 被引用方:UX / 易用性 / 合规 diff --git a/utils/data/data_factory.py b/utils/data/data_factory.py index 3006d8ce..ff640039 100644 --- a/utils/data/data_factory.py +++ b/utils/data/data_factory.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use data_factory_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use data_factory_v2 instead. This file will be removed in """ 测试数据工厂 - Faker + Factory Boy 生成标准化测试数据 被引用方:05-数据准备 agent / data-preparation skill / conftest.py diff --git a/utils/data/db_test_helper.py b/utils/data/db_test_helper.py index e885ecba..ff090a01 100644 --- a/utils/data/db_test_helper.py +++ b/utils/data/db_test_helper.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use db_test_helper_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use db_test_helper_v2 instead. This file will be removed in """ 数据库测试工具:CRUD / 事务 ACID / 迁移 / 备份恢复 / 慢查询 / 死锁 被引用方:05-数据准备 + 安全/可靠性测试 diff --git a/utils/design/suite_minimizer.py b/utils/design/suite_minimizer.py index c401bc69..cfe31d5b 100644 --- a/utils/design/suite_minimizer.py +++ b/utils/design/suite_minimizer.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use suite_minimizer_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use suite_minimizer_v2 instead. This file will be removed in """ 测试套件减重(Suite Minimization)- 检测重复用例 / 冗余覆盖 被引用方:testcase-designer / 测试质量 diff --git a/utils/performance/chaos_helper.py b/utils/performance/chaos_helper.py index f8619321..4217cafb 100644 --- a/utils/performance/chaos_helper.py +++ b/utils/performance/chaos_helper.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use chaos_helper_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use chaos_helper_v2 instead. This file will be removed in """ 混沌工程:故障注入(CPU/内存/磁盘/网络/进程杀死) 被引用方:16-可靠性稳定性 agent / chaos-test skill diff --git a/utils/platforms/mobile_driver.py b/utils/platforms/mobile_driver.py index 390ea1a9..989296ae 100644 --- a/utils/platforms/mobile_driver.py +++ b/utils/platforms/mobile_driver.py @@ -168,7 +168,7 @@ def _parse_gfxinfo_fps(output: str) -> Optional[float]: """ 粗略统计 gfxinfo framestats 帧数(非精确 FPS)。 PROFILEDATA 段下每行 CSV 是一帧;真精确 FPS 需 timestamp 列差。 - TODO(V2.x): 解析 timestamp 列,计算 (frame_count - 1) / (timestamp[-1] - timestamp[0]) 真 FPS + TODO: 解析 timestamp 列,计算 (frame_count - 1) / (timestamp[-1] - timestamp[0]) 真 FPS """ frame_count = 0 in_data = False diff --git a/utils/reporting/generate_report.py b/utils/reporting/generate_report.py index 632c0776..b66eb714 100644 --- a/utils/reporting/generate_report.py +++ b/utils/reporting/generate_report.py @@ -68,13 +68,13 @@ def _write_docx_degraded_warning(doc, data: Dict) -> None: warning_p = doc.add_paragraph() warning_run = warning_p.add_run( f"本次报告基于不完整测试数据生成。共 {len(degraded_upstream)} 个 expert " - f"输出 degraded(mock 兜底 / LLM 失败 / 未实装 V1.x rollout):" + f"输出 degraded(mock 兜底 / LLM 失败 / 未实装 rollout):" ) warning_run.font.color.rgb = RGBColor(255, 140, 0) warning_run.bold = True for name in degraded_upstream: item = doc.add_paragraph(style="List Bullet") - item.add_run(f"expert '{name}' — 详见 ROADMAP.md V1.x rollout 节奏") + item.add_run(f"expert '{name}' — 详见 ROADMAP.md rollout 节奏") impact_p = doc.add_paragraph() impact_run = impact_p.add_run("→ 上线决策建议: conditional 或 no-go(由 test-lead 判定);不应基于此报告直接发版。") impact_run.bold = True diff --git a/utils/security/ai_adversarial.py b/utils/security/ai_adversarial.py index c406dc3d..8378a08f 100644 --- a/utils/security/ai_adversarial.py +++ b/utils/security/ai_adversarial.py @@ -263,7 +263,7 @@ def test_llm_jailbreak(endpoint: str, - 默认 prompts=JAILBREAK_PROMPTS (DAN / system 泄漏 / 指令覆盖) 风险 HIGH, 需 confirm_offensive=True 显式确认(kwarg opt-in 防误调)。 - 拒绝检测 (替代旧 binary keyword 判定, 见协作章程 §1.3 f5/f6): + 拒绝检测 (替代旧 binary keyword 判定, 见协作章程 f5/f6): - 每条输出经 _compute_refusal_confidence 算 0.0-1.0 置信度 - min_confidence (默认 0.1, 范围 0.0-1.0) 为阈值, 高于阈值算"拒绝" - 阈值越高假阳性越少 (但漏报更多), 调用方自由权衡 diff --git a/utils/security/api_security_scanner.py b/utils/security/api_security_scanner.py index 0abed394..53fab3d9 100644 --- a/utils/security/api_security_scanner.py +++ b/utils/security/api_security_scanner.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use api_security_scanner_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use api_security_scanner_v2 instead. This file will be removed in """ API 安全测试 - OWASP API Top 10 覆盖 - API1: BOLA(越权访问对象) diff --git a/utils/testing/bdd_runner.py b/utils/testing/bdd_runner.py index a40da0fc..c9c4e646 100644 --- a/utils/testing/bdd_runner.py +++ b/utils/testing/bdd_runner.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use bdd_runner_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use bdd_runner_v2 instead. This file will be removed in """ BDD(Behavior-Driven Development)/ 验收测试 - pytest-bdd 包装 被引用方:03-用例设计 + 06-自动化脚本(验收测试场景) diff --git a/utils/testing/state_machine_tester.py b/utils/testing/state_machine_tester.py index 2c9b9ab9..d19ef58e 100644 --- a/utils/testing/state_machine_tester.py +++ b/utils/testing/state_machine_tester.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use state_machine_tester_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use state_machine_tester_v2 instead. This file will be removed in """ 状态迁移测试(State Transition Testing) 被引用方:03-用例设计 agent / testcase-design skill diff --git a/workspace/_demo/STARTUP.md b/workspace/_demo/STARTUP.md index 39185870..1fdb538b 100644 --- a/workspace/_demo/STARTUP.md +++ b/workspace/_demo/STARTUP.md @@ -57,7 +57,7 @@ tagent run "https://example.com" --mode learn |------|------| | `LLM 调用 raise` | 检查 API key + 网络;切 `TAGENT_LLM_PROVIDER=ollama` 离线兜底 | | `BugTracker 提交失败` | 占位没填或网络 / 权限错;不阻塞,但日报会少 | -| `通知没发出` | 至少配 1 个渠道(主宪章 §36);未配自动跳过 | +| `通知没发出` | 至少配 1 个渠道;未配自动跳过 | | `selftest n7 失败` | 装 python-docx:`pip install python-docx` | ## 7. 下一步 diff --git a/workspace/_demo/tagent.yml b/workspace/_demo/tagent.yml index d30dde7b..a65a34b3 100644 --- a/workspace/_demo/tagent.yml +++ b/workspace/_demo/tagent.yml @@ -21,7 +21,7 @@ skills: bug_tracker: primary: webhook - # 多 tracker 并存(主宪章 §37):写成 [zentao, github],按 Bug label 路由 + # 多 tracker 并存:写成 [zentao, github],按 Bug label 路由 # extra: [github] notifiers: @@ -34,15 +34,15 @@ quality_gates: perf_p99_ms_max: 300 selftest: - # 主宪章 §33 自检铁律 + # 自检规则 pre_tag_required: true pass_threshold: 0.80 strict_on_release: true marketplace: - enabled: false # 默认关 · 主宪章 §30 safe-by-default + enabled: false # 默认关 -# ============== SAFETY GATES · safe-by-default(主宪章 §22 / §35 + W5 sprint v2) ============== +# ============== SAFETY GATES · safe-by-default ============== # 危险操作 / 自动化 / 影响生产 的功能 必须显式开启, 否则 destructive-guard 拒绝运行。 # 详见 SECURITY.md 武器化代码使用边界 + 测试工具准入控制 节。 @@ -68,7 +68,7 @@ gateway: # 例: enabled_platforms: [telegram, feishu] pentest: - # 法律契约(default refuse · charter §35); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 + # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 authorized: false # 法律授权确认 scope_in_targets: [] # IP/domain/URL 白名单(IN) scope_out_targets: [] # 强制黑名单(覆盖 IN) From 05ba4287ed0e60aa61794f4bfa98abb31666f765 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Fri, 12 Jun 2026 21:20:28 +0800 Subject: [PATCH 5/5] chore: re-clean after merge with main --- ...71\347\233\256\345\257\274\350\210\252.md" | 2 +- CHANGELOG.md | 48 +++---- README.md | 2 +- README.zh-CN.md | 2 +- ROADMAP.md | 122 +++++++++--------- ...13\350\257\225\344\270\273\347\256\241.md" | 2 +- ai/skills/smoke-test.md | 4 +- deploy/config/.mcp.json | 2 +- deploy/config/requirements.txt | 2 +- deploy/config/templates/base.tagent.yml.tpl | 2 +- deploy/marketplace/_safety_policy.yaml | 2 +- docs/STYLE.md | 2 +- docs/assets/demo-script-v1.12.md | 4 +- docs/assets/terminalizer-config.yml | 2 +- ...51\230\262mock\351\227\255\347\216\257.md" | 20 +-- docs/case-studies/INDEX.md | 2 +- docs/charter/01-vision-dimensions.md | 6 +- docs/charter/03-agentchat-protocol.md | 4 +- docs/charter/04-skills-bugtracker.md | 4 +- docs/charter/06-test-architecture.md | 16 +-- docs/charter/07-runtime-license.md | 14 +- docs/charter/INDEX.md | 2 +- ...77\347\224\250\346\211\213\345\206\214.md" | 8 +- ...50\347\275\262\350\257\264\346\230\216.md" | 4 +- runtime/orchestrator/agents/INDEX.md | 2 +- workspace/_demo/tagent.yml | 2 +- 26 files changed, 141 insertions(+), 141 deletions(-) diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index 4af95e78..dd350ac2 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -5,7 +5,7 @@ --- -## 零、V1.1.0 运行时层(`runtime/`) +## 零、运行时层(`runtime/`) > 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 79 脚本**保持不动**(宪章规则),runtime 仅作可执行调度层。 diff --git a/CHANGELOG.md b/CHANGELOG.md index 19fa84cb..3d9efecc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,7 +67,7 @@ - `/forget` 关键词太短导致误删:最小 3 字符 - `load_memory_md` 异常静默吞噬:改为 `logger.warning` - skill rollout 总数: 16 → 18 (中央 `runtime/tests/test_skill_runners.py` `ALL_SKILL_RUNNERS` 同步加 2 行) -- skill active 数: 30/32 → **32/32** (V1.x SKILL ROLLOUT 完整收尾,0 vision/0 rollout/0 unknown) +- skill active 数: 30/32 → **32/32** (SKILL ROLLOUT 完整收尾,0 vision/0 rollout/0 unknown) - runtime/orchestrator/skills/__init__.py: 聚合 import 新增 agent_introspection_debugging + build_your_own_x_explorer - **P2 能力层 — Agent 交互层 6 项全部实装:** @@ -299,7 +299,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ## [v1.32.0] - 2026-05-17 -> **V1.15.0 → V1.32.0 (2026-05-15 ~ 2026-05-17) 共 17 版累积**。 +> **→ (2026-05-15 ~ 2026-05-17) 共 17 版累积**。 > expert rollout 收尾 + skill rollout 全 16/16 完成 。 > 版本历史见 [ROADMAP.md](ROADMAP.md#进度跟踪) 进度跟踪表。 @@ -336,11 +336,11 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **多 provider 通用 env 通道**: `LLM_PROVIDER` + `LLM_API_KEY` + `LLM_MODEL` 通用 env, 6 provider 内置 (claude/openai/gemini/qwen/deepseek/ollama) - Stub 扩 4 path 支持 vendor-neutral 多厂商 routing -### Added (MCP 6 件套 · V1.2.0) +### Added (MCP 6 件套 · ) - `runtime/mcp/` 6 MCP server: test-orchestrator / protocol-adapter / evidence-vault / defect-tracker / knowledge-base / compliance-checker -### Added (Web UI · V1.2.0) +### Added (Web UI · ) - `runtime/web/` 4 页: Upload / Run Status / Report / Catalog (React 18 + Vite 5 + shadcn/ui + TanStack Query v5) @@ -361,11 +361,11 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ## [v1.14.0] - 2026-05-12 -> **首次正式版本切节**(W7-2, 2026-05-14): V1.1.0 到 V1.14.0 共 13 个内部 alpha 累积归入本节。后续新变更入 [Unreleased]。 +> **首次正式版本切节**(W7-2, 2026-05-14): 到 共 13 个内部 alpha 累积归入本节。后续新变更入 [Unreleased]。 ### Added(5 核心 expert 真 LLM 落地 + 录制脚本 · 2026-05-12) -> 起因:战略参谋诚实交底——V1.13 的 selftest 100% PASS 是"骨架通"不是"内涵通",16 expert 里 11 个仍是 no-op。用户授权 C 路线(5 核心 expert 真 LLM)+ B(录制脚本)。 +> 起因:战略参谋诚实交底——的 selftest 100% PASS 是"骨架通"不是"内涵通",16 expert 里 11 个仍是 no-op。用户授权 C 路线(5 核心 expert 真 LLM)+ B(录制脚本)。 - **`runtime/orchestrator/agents/` 新模块**: - `base.py`:`AgentRunner` ABC + `RunnerContext` + `RunnerResult` + `AGENT_RUNNERS` registry + `@register` + `get_runner` @@ -375,7 +375,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **真 LLM** 时:调 `aux_client.complete()` → 解析 JSON → 落盘 → 给下游 - INDEX.md 文档化 5 runner schema + 上下游 - **adapter wiring**(`runtime/orchestrator/adapters/experts.py`): - - `execute_node` 先查 `AGENT_RUNNERS`(优先 V1.14),fallback `SCRIPT_MAP` + - `execute_node` 先查 `AGENT_RUNNERS`(优先 ),fallback `SCRIPT_MAP` - `_upstream_outputs` 缓存:每 runner 产物给下游 RunnerContext.upstream - `reset_upstream_cache()` 由 flow 每 run 开头调 - SCRIPT_MAP 路径排除 `artifact_text/lang/mode` 防多行文本炸 argparse @@ -391,10 +391,10 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `_demo-commands.sh`:实际 demo 命令序列(被 record-demo-* 调) - `record-demo-asciinema.sh`:`asciinema rec` 自动录,产 .cast 可上传 asciinema.org 或转 GIF/SVG - `record-demo-obs.sh`:OBS / QuickTime 屏幕录制配套(用户摁录制 → 跑此脚本,节奏自动) - - `docs/assets/terminalizer-config.yml`:精修 V1.14 配置(Catppuccin Mocha 主题 + UTF-8 + stub LLM env) + - `docs/assets/terminalizer-config.yml`:精修 配置(Catppuccin Mocha 主题 + UTF-8 + stub LLM env) - **真 agent 落地 canon**:5 核心 + 11 fallback + 加新 runner 流程 + RunnerContext / RunnerResult 协议 -- 烟测:**9/9 strict PASS · 5 真 runner 产物全落盘**(原 V1.13 8/8 是 3 script + 5 no-op -- 版本 V1.13.0 → **V1.14.0** +- 烟测:**9/9 strict PASS · 5 真 runner 产物全落盘**(原 8/8 是 3 script + 5 no-op +- 版本 → **** ### Added(README hero 重写 + `tagent demo` + 30 秒 demo 录制脚本 · 2026-05-12) @@ -411,7 +411,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - 渠道适配:Twitter/X · 微信视频号 · 掘金/V2EX · Hacker News(同一份素材 4 平台) - **00-导航 同步**:CLI 行加 `demo` 子命令 - 烟测 `tagent demo` 产 36+ 文件全过 · L1/L3 strict 不破 -- 版本 V1.12.0 → **V1.13.0** +- 版本 → **** ### Added(`tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 2026-05-12) @@ -439,13 +439,13 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **STARTUP.md 启动指南**:含填占位清单 + 装依赖 hint + 健康检查 + 烟雾跑通命令 + 推荐 skill 顺序 + 出错对照表 - 烟测:5 preset × 全过 + 8 测试类型组合全过 - L1 + L3 strict 不破:agents=16/16 skills=32/≥25 + selftest 8/8 100% -- 版本 V1.11.0 → **V1.12.0** +- 版本 → **** ### Fixed(同步规则批改 + BugTracker/多端 canon + n7 修 · 2026-05-12) - **同步规则()执行**:17 文件批改"三端通知"→"多端通知";"禅道 Bug 提交"项目级框架→"BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook)" - `00-项目导航.md` · `agents/{01,07,08,09}.md` · `agents/README.md` · `skills/{README,test-coordinator,zentao-bug-submission}.md` · `config/mcp-server-impl.md` · `utils/{README.md,api_retry_util.py}` · `ci/{INDEX,CICD集成说明}.md` · `docs/getting-started/{交付物清单,使用手册,配置清单}.md` · `examples/web-demo/README.md` · `CONTRIBUTING.md` · `FULL_GUIDE.md` -- **adapter 修 V1.10 n7 bug**:`runtime/orchestrator/adapters/experts.py` 加 `SCRIPT_DEFAULT_ARGS` + `_ensure_fixture()` 通用机制 +- **adapter 修 n7 bug**:`runtime/orchestrator/adapters/experts.py` 加 `SCRIPT_DEFAULT_ARGS` + `_ensure_fixture()` 通用机制 - 现 `tagent selftest --e2e --strict` **100% PASS 8/8**(原 88% 7/8) - generate_report.py 默认注入 `--data=workspace/测试报告/{项目名}/_selftest_summary.json`,fixture 自动生成 - **扩**: @@ -492,11 +492,11 @@ _后续累积变更入此节;切版本时移到下方版本节。_ tagent export plan.json --format opml --out workspace/测试用例/login.opml tagent export plan.json --format all --out-dir workspace/测试用例/ ``` -- **`/testcase-design` skill 扩**:description 加多格式声明;末尾加 V1.9 思维导图 / 大纲段(Excel 仍是默认) +- **`/testcase-design` skill 扩**:description 加多格式声明;末尾加 思维导图 / 大纲段(Excel 仍是默认) - **保留**:Excel 4-Sheet(`utils/excel_generator.py`)不动 - **扩展点 P2 留位**:freemind / plantuml / mermaid-mindmap(按需加) - 烟雾测试:3 exporter × sample TestCaseTree 全过(content.json 解析正常 / OPML XML 解析正常 / Markmap frontmatter 完整) -- 版本 V1.8.0 → V1.9.0 +- 版本 → ### Added(build-your-own-x 教学扩 + Marketplace 4 lane · 2026-05-12) @@ -514,7 +514,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - ****:Marketplace 安全栅栏(4 关规则 + 3 信任级 + safe-by-default + 不复制官方源 + 卸载只归档 + 紧急 kill switch) - ****:教学层扩 13 大类(byox P0/P1/P2 分档 + 预算检查 + essence_only policy) - TOC 同步;skill 数升级 -- 版本 V1.7.0 → V1.8.0 +- 版本 → ### Added(Karpathy 4 原则 + ECC 测试加固 + Essence 自动汲取 · 2026-05-12) @@ -536,7 +536,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - safe-by-default:`tagent.yml essence_watcher.enabled: true` 才跑 - **新增 3 节**:Karpathy 4 原则 / ECC 测试加固 / Essence 自动汲取 + TOC 同步 - 数字:14 skill → **32**(原 14 + 7 pentest + 5 automotive + 6 ECC) + `karpathy-guidelines/SKILL.md` upstream 1 个 -- 版本 V1.6.0 → V1.7.0 +- 版本 → ### Added(渗透&安全 + 车载&自动驾驶 双垂直专家+skill 集 · 2026-05-12) @@ -548,12 +548,12 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `pentest-coordinator`(主)/ `pentest-recon` / `pentest-vuln` / `pentest-exploit` / `pentest-web` / `pentest-api` / `pentest-report` - **5 新 automotive skill**: - `automotive-test`(主)/ `automotive-can-bus-test` / `automotive-adas-scenario` / `automotive-ota-update-test` / `automotive-hil-loop-test` -- ****:渗透 & 安全测试强化(规则化:授权前置 / scope 防护 / prod 禁 / 沙箱 / PoC-only / 不可逆禁止 / 责任披露 / PII scrub) +- ****:渗透 & 安全测试强化(规则化:授权前置 / scope 防护 / prod 禁 / 沙箱 / PoC-only / 不可逆操作 / 责任披露 / PII scrub) - ****:车载 & 自动驾驶强化(规则化:ASIL C/D 必 HIL / L4 极深 / OTA 必回退 / 公开道路授权 / 录波 MDF4 / PII 禁存 / 领域档案签字) - **升级**:专家 14 → 16(核心 9 + 平台扩展 7) - **TOC 同步**:加 - 数字:14 expert → **16** | 14 skill → **26**(7 pentest + 5 automotive 新增) -- 版本 V1.5.0 → V1.6.0 +- 版本 → ### Added(GBrain-inspired 强化 + 跨项目精髓库扩 · 2026-05-12) @@ -562,7 +562,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **eval 回放**:`runtime/tutor/eval_replay.py`,`TAGENT_EVAL_CAPTURE=1` opt-in;PII 自动 scrub(email/phone/SSN/API-key/card 6 类正则);replay 3 数(Jaccard@k/top-1 stability/latency Δ);默认 off - **safe-by-default yaml 栅栏**:`runtime/config/safety.py` + `tagent.yml.example`;scheduler/curator/backends/gateway/destructive_ops 默认 deny;`assert_allowed` / `gate_*` 工厂函数;缺配置 → `SafeByDefaultBlocked` 异常 - ****:GBrain-inspired 强化(自连图谱 + 混合检索 + eval 回放 + safe-by-default + PII 单源)+ TOC 同步 -- 版本 V1.4.0 → V1.5.0 +- 版本 → ### Added(教学层 · 用户边用边学 · 2026-05-12) @@ -582,7 +582,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **CLI**:`tagent run --mode exec|learn|silent --lang zh|en|zh-en` - **API**:`POST /run/text?mode=&lang=` query 参数 - **反幻觉**:实测 unknown-id 正确标记"该领域未收录,慎用" -- 版本 V1.3.0 → V1.4.0 +- 版本 → ### Added(Hermes-inspired 5 模块 + 跨项目精髓库 · 2026-05-11) @@ -596,7 +596,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `runtime/backends/`:7 执行后端(`local/docker/ssh/singularity/modal/daytona/vercel_sandbox`);统一 `BaseExecutionEnv` 7 方法;Modal/Daytona 提供 serverless hibernate - `runtime/gateway/`:多平台 messaging(`telegram/discord/slack/wechat/feishu/dingtalk/email/webhook` 8 平台);统一 `Platform.send/configure`;`session.py` 跨平台对话连续 - ****:Hermes-inspired 扩展能力章节(规则化);TOC 同步更新 -- 版本 V1.2.0 → V1.3.0 +- 版本 → ### Added(M2 MCP 6 件套 + Web UI + 真模型路由 + 飞轮回灌 · 2026-05-11) @@ -616,12 +616,12 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - L2 必测项:Playwright E2E 7 用例(功能+边界+异常+兼容+可访问性);axe-core a11y 0 critical 门槛 - 配套 vite 代理 `/api` → FastAPI(:8800) - **`.mcp.json` 升级**:启用 `filesystem` + `test-orchestrator`;其他 5 件套写入 `_pending_servers_v1_2_0_alpha` 段供按需启用 -- 版本 V1.1.0 → V1.2.0 +- 版本 → ### Added(宪章合一 · darwin-skill 入库 · 2026-05-11) - **扩展(memory `project_test_agent_workflow.md`)**:原 -+ How to apply 1-6 **字符级保留**;新增 -仅承载规则/要求/约束(剔除示例/枚举/参考表): - - 灵魂底色:三公理 + 五条铭文 + V1.0.0 锁死 + 双签解锁条件 + - 灵魂底色:三公理 + 五条铭文 + 锁死 + 双签解锁条件 - FULL_GUIDE.md 定位补充(优先级链:memory > FULL_GUIDE > README) - 多 Bug Tracker(默认 zentao + 扩展位 `BugTrackerBase` 契约) - 按需安装 + 运行时补装规则 diff --git a/README.md b/README.md index 0a7cf28d..74e41ecf 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ Test-Agent/ ├── skills/ ← 32 business skills + 3 meta-skills ├── utils/ ← 79 production utils (pytest · Playwright · JMeter · Appium · …) ├── config/ ← conftest / pytest.ini / .mcp.json -├── runtime/ ← V1.x runtime (router · orchestrator · MCP · …) +├── runtime/ ← runtime (router · orchestrator · MCP · …) ├── ci/ ← GitHub Actions + Jenkins ├── docs/ ← user manual / architecture / theory / compliance ├── marketplace/ ← community skills / agents / mcp / hooks diff --git a/README.zh-CN.md b/README.zh-CN.md index e36f448e..f35d11e9 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -87,7 +87,7 @@ Test-Agent/ ├── skills/ ← 32 个业务 Skill + 3 元 Skill ├── utils/ ← 79 个生产工具(pytest · Playwright · JMeter · Appium · …) ├── config/ ← conftest / pytest.ini / .mcp.json -├── runtime/ ← V1.x 运行时(router · orchestrator · MCP · …) +├── runtime/ ← 运行时(router · orchestrator · MCP · …) ├── ci/ ← GitHub Actions + Jenkins ├── docs/ ← 使用手册 / 架构 / 教学 / 合规 ├── marketplace/ ← 社区 skills / agents / mcp / hooks diff --git a/ROADMAP.md b/ROADMAP.md index 58f36842..f8b43b0e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,13 +1,13 @@ -# Test-Agent V1.x ROADMAP +# Test-Agent ROADMAP > 项目终态目标:每个 expert / skill 真 LLM-driven / script-backed 实装,**绝不输出 mock 数据**。 -> 当前状态:V1.0.0 (**expert rollout 收尾(16/16)+ skill rollout 完成(32/32)+ Phase 3 完成 + Phase 4 完成 + Phase 5 完成**) +> 当前状态:(**expert rollout 收尾(16/16)+ skill rollout 完成(32/32)+ Phase 3 完成 + Phase 4 完成 + Phase 5 完成**) > - **expert 16/16 active**(11 production + 5 script);0 rollout。 > - **skill 32/32 active**(23 production + 7 script + 2 vision→production);0 rollout;0 vision。 > - 3 meta-skill(nuwa-skill / darwin-skill / karpathy-guidelines)独立,工具属性,不在 32 业务 skill 数内。 -> - **V1.21.0 新增 SkillRunner 基础设施** (`runtime/orchestrator/skills/` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 runner),解锁 skill 层 LLM-driven 全 16 实装 。 +> - **新增 SkillRunner 基础设施** (`runtime/orchestrator/skills/` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 runner),解锁 skill 层 LLM-driven 全 16 实装 。 -## 当前活跃 expert (16 / 16) — V1.x rollout 收尾 +## 当前活跃 expert (16 / 16) — rollout 收尾 ### 11 真 LLM-driven (已上线) @@ -71,30 +71,30 @@ --- -## V1.x rollout — 6 expert LLM-driven minimum viable 实装路线 +## rollout — 6 expert LLM-driven minimum viable 实装路线 **节奏**: 一周 1 expert,共 6 周。每完成 1 个,active 数字 +1,README 同步。 -**前置**: V1.15 Day 0 — runtime/router 防 mock 改造(拒绝未实装路由,返回明确错误)。 +**前置**: Day 0 — runtime/router 防 mock 改造(拒绝未实装路由,返回明确错误)。 **完成标准**: 每 expert 接 LLM 真调用 + 结构化输出(markdown/JSON),通过 3 个测试 prompt 验证。 | # | Expert | LLM-driven 实装范围(minimum viable) | 目标版本 | 状态 | |---|--------|------------------------------------|---------|------| -| 0 | (前置) runtime/router + orchestrator 防 mock | catalog 单源 frontmatter 解析;router._validate_against_catalog warn + 降 confidence;orchestrator.execute_node 硬拒 rollout/vision/unknown(returncode=2,绝不输出 mock);expert + skill 双 layer 覆盖 | V1.14.0+1 | **done** (PR X4) | -| 1 | `env-manager` | LLM 读 PRD → 环境检查清单 + 准备步骤 markdown | V1.15.0 | **done** (runtime/orchestrator/agents/env_manager.py) | -| 2 | `mobile-tester` | LLM 读 PRD + Android/iOS 上下文 → 移动测试用例 + ADB/Xcode 命令清单 | V1.16.0 | **done** (runtime/orchestrator/agents/mobile_tester.py) | -| 3 | `visual-tester` | LLM 读 PRD + UI 描述 → 视觉测试点 + Playwright 视觉对比脚本 | V1.17.0 | **done** (runtime/orchestrator/agents/visual_tester.py) | -| 4 | `system-tester` | LLM 读 PRD + IoT/串口/MQTT 上下文 → IoT 测试用例 + 命令清单 | V1.18.0 | **done** (runtime/orchestrator/agents/system_tester.py) | -| 5 | `pentest-tester` | LLM 读 PRD + 授权检查通过 → 渗透测试计划 + 工具调用清单(生成计划,不执行攻击) | V1.19.0 | **done** (runtime/orchestrator/agents/pentest_tester.py;仅输出计划文本,真执行守护已在 utils 层 `api_security_scanner.py` / `ai_adversarial.py` 用 TAGENT_PENTEST_AUTHORIZED env gate;法律责任在操作者侧,见 SECURITY.md L84) | -| 6 | `automotive-tester` | LLM 读 PRD + CAN-bus/ISO-26262 上下文 → ASIL 评估 + HIL 测试用例 | V1.20.0 | **done** (runtime/orchestrator/agents/automotive_tester.py;ASIL 评估 + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**V1.x rollout 收尾**) | +| 0 | (前置) runtime/router + orchestrator 防 mock | catalog 单源 frontmatter 解析;router._validate_against_catalog warn + 降 confidence;orchestrator.execute_node 硬拒 rollout/vision/unknown(returncode=2,绝不输出 mock);expert + skill 双 layer 覆盖 | +1 | **done** (PR X4) | +| 1 | `env-manager` | LLM 读 PRD → 环境检查清单 + 准备步骤 markdown | | **done** (runtime/orchestrator/agents/env_manager.py) | +| 2 | `mobile-tester` | LLM 读 PRD + Android/iOS 上下文 → 移动测试用例 + ADB/Xcode 命令清单 | | **done** (runtime/orchestrator/agents/mobile_tester.py) | +| 3 | `visual-tester` | LLM 读 PRD + UI 描述 → 视觉测试点 + Playwright 视觉对比脚本 | | **done** (runtime/orchestrator/agents/visual_tester.py) | +| 4 | `system-tester` | LLM 读 PRD + IoT/串口/MQTT 上下文 → IoT 测试用例 + 命令清单 | | **done** (runtime/orchestrator/agents/system_tester.py) | +| 5 | `pentest-tester` | LLM 读 PRD + 授权检查通过 → 渗透测试计划 + 工具调用清单(生成计划,不执行攻击) | | **done** (runtime/orchestrator/agents/pentest_tester.py;仅输出计划文本,真执行守护已在 utils 层 `api_security_scanner.py` / `ai_adversarial.py` 用 TAGENT_PENTEST_AUTHORIZED env gate;法律责任在操作者侧,见 SECURITY.md L84) | +| 6 | `automotive-tester` | LLM 读 PRD + CAN-bus/ISO-26262 上下文 → ASIL 评估 + HIL 测试用例 | | **done** (runtime/orchestrator/agents/automotive_tester.py;ASIL 评估 + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**rollout 收尾**) | --- -## V1.x rollout — 16 skill 实装路线(已全部完成) +## rollout — 16 skill 实装路线(已全部完成) -**节奏**: skill rollout 起点 V1.21.0 (SkillRunner 基础设施 + pentest-coordinator 首落地);后续 1 skill / PR 推进。 +**节奏**: skill rollout 起点 (SkillRunner 基础设施 + pentest-coordinator 首落地);后续 1 skill / PR 推进。 **完成标准**: 每 skill 接 LLM 真调用 (mock_output schema 覆盖 + production 升级 + ALL_SKILL_RUNNERS 锁规则同步)。 -**前置**: ~~runtime/router 防 mock 改造 + skill 路由按 `SKILL_IMPL_STATUS` frontmatter 过滤~~ **已完成 V1.14.0+1 (PR X4)** — registry parse frontmatter + orchestrator.execute_node 拒 rollout/vision/unknown skill (returncode=2)。 -**基础设施**: **V1.21.0 完成** — `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 skill runner (放在 SCRIPT_MAP fallback 前)。 +**前置**: ~~runtime/router 防 mock 改造 + skill 路由按 `SKILL_IMPL_STATUS` frontmatter 过滤~~ **已完成 +1 (PR X4)** — registry parse frontmatter + orchestrator.execute_node 拒 rollout/vision/unknown skill (returncode=2)。 +**基础设施**: **完成** — `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 skill runner (放在 SCRIPT_MAP fallback 前)。 ### 通用平台 4 skill @@ -129,15 +129,15 @@ --- -## V1.36 能力扩展 +## 能力扩展 -- **V1.34**: script_bridge.py 桥接 5 独立脚本进 orchestrator pipeline -- **V1.35**: 11 深度审计模块 (flaky guard / API security v2 / data factory v2 / perf orchestrator / event harness / visual regression / ML prioritizer / differential tester / EU AI Act / supply chain) -- **V1.36**: 6 延期模块 (chaos v2 / state machine v2 / DB test v2 / BDD v2 / carbon scheduler / canary config) + CVE-2025-71176 fix + 深度审查65发现全修 +- ****: script_bridge.py 桥接 5 独立脚本进 orchestrator pipeline +- ****: 11 深度审计模块 (flaky guard / API security v2 / data factory v2 / perf orchestrator / event harness / visual regression / ML prioritizer / differential tester / EU AI Act / supply chain) +- ****: 6 延期模块 (chaos v2 / state machine v2 / DB test v2 / BDD v2 / carbon scheduler / canary config) + CVE-2025-71176 fix + 深度审查65发现全修 --- -## V2.x vision — 2 skill(暂留方法论参考形态) +## vision — 2 skill(暂留方法论参考形态) | Skill | 当前形态 | V2 路线 | |-------|----------|---------| @@ -146,7 +146,7 @@ --- -## V2.x 路线图 (longer-term) +## 路线图 (longer-term) ### Skill Lifecycle 元工具改造 (适配测试领域) @@ -169,7 +169,7 @@ **绝不输出 mock 数据糊弄用户。** -V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地: ++1 (PR X4) 起,双 layer 防 mock 已落地: - **registry 单源**: catalog 解析 `agents/*.md` `EXPERT_IMPL_STATUS` + `skills/*.md` `SKILL_IMPL_STATUS` frontmatter,实装状态来源唯一 - **router 软警告**: `_validate_against_catalog` 检测 rollout / vision / unknown → 加 issues 并降 confidence 0.3 - **orchestrator 硬拒**: `execute_node` 对 expert / skill 任意 rollout / vision / unknown 返回 `returncode=2` + stderr "未实装",绝不走 no-op "documented step recorded" 假成功路径 @@ -182,40 +182,40 @@ V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地: | 版本 | 日期 | 完成项 | active expert 数 | |------|------|--------|----------------| -| V1.14.0 | 2026-05-13 | bundle1 信任+法律线修复;ROADMAP.md 起步 | 10/16 | -| V1.14.0+1 | 2026-05-15 | X3 数字诚实化(README/ROADMAP)+ X4 防 mock 闭环 (registry 单源 frontmatter / router warn / orchestrator hard block expert+skill 双 layer) | 10/16 | -| V1.15.0 | 2026-05-15 | env-manager LLM-driven minimum viable (runtime/orchestrator/agents/env_manager.py;LLM 读 PRD → env_checks + prep_steps + dependencies + risks 结构化 JSON) | 11/16 | -| V1.16.0 | 2026-05-15 | mobile-tester LLM-driven minimum viable (runtime/orchestrator/agents/mobile_tester.py;LLM 读 PRD + Android/iOS 上下文 → test_cases + device_commands + mobile_specific 结构化 JSON) | 12/16 | -| V1.17.0 | 2026-05-15 | visual-tester LLM-driven minimum viable (runtime/orchestrator/agents/visual_tester.py;LLM 读 PRD + UI 描述 → visual_test_points + comparison_scripts + tolerance + baseline_strategy 结构化 JSON) | 13/16 | -| V1.18.0 | 2026-05-15 | system-tester LLM-driven minimum viable (runtime/orchestrator/agents/system_tester.py;LLM 读 PRD + IoT/串口/MQTT 上下文 → test_cases + device_commands + protocol_specific + test_environment 结构化 JSON;覆盖 IoT/audiovideo/tracing/mq/integration 5 类) | 14/16 | -| V1.19.0 | 2026-05-16 | pentest-tester LLM-driven minimum viable (runtime/orchestrator/agents/pentest_tester.py;LLM 读 PRD + 安全上下文 → test_mode + target_scope + recon/vuln/exploit/reporting phases 结构化 JSON;覆盖 5 攻击域 Injection/XSS/SSRF/Auth/Authz;仅输出计划文本,真执行守护在 utils 层 env gate;法律责任在操作者侧 SECURITY.md L84) | 15/16 | -| V1.20.0 | 2026-05-16 | automotive-tester LLM-driven minimum viable (runtime/orchestrator/agents/automotive_tester.py;LLM 读 PRD + CAN-bus/ISO-26262 上下文 → vehicle_subsystem + asil_assessment + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix + test_environment 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**V1.x rollout 收尾**) | 16/16 expert (V1.x rollout 完成) | -| V1.21.0 | 2026-05-16 | **skill rollout 起点** — SkillRunner 基础设施 (runtime/orchestrator/skills/__init__.py + SKILL_RUNNERS registry + @register_skill deco + experts.py kind=skill 接 runner) + pentest-coordinator 首 skill 落地 (5 阶段编排 + authorization_check + subagent_pool + refuse_conditions). 解锁 14 rollout skill 后续流水线. | 16 expert + 8/32 production (15 rollout 待) | -| V1.22.0 | 2026-05-16 | **tagent config CLI** — 多模型 onboarding Step 2 (runtime/cli/config.py · 6 provider 内置 + 厂商配置 cookbook + use/set/unset/list/show 子命令). **多 provider 通用 env 通道** (LLM_PROVIDER + LLM_API_KEY + LLM_MODEL) + stub 扩 4 path. | 16 expert + 8/32 production | -| V1.23.0 | 2026-05-16 | **skill rollout #2** — mobile-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/mobile_test.py · Android/iOS 双平台 + 小程序支持) | 16 expert + 9/32 production | -| V1.24.0 | 2026-05-16 | **skill rollout #3** — visual-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/visual_test.py · Airtest + OCR + SSIM 视觉对比) | 16 expert + 10/32 production | -| V1.25.0 | 2026-05-16 | **skill rollout #4** — pentest-recon + pentest-vuln 双 skill LLM-driven 生产落地 (侦察: 端口/子域/服务指纹 + 漏洞: 5 攻击域 hybrid SAST+blackbox) | 16 expert + 12/32 production | -| V1.26.0 | 2026-05-16 | **skill rollout #5** — system-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/system_test.py · IoT/音视频/追踪/消息队列 4 场景) | 16 expert + 13/32 production | -| V1.27.0 | 2026-05-16 | **skill rollout #6** — eval-harness skill LLM-driven 生产落地 (runtime/orchestrator/skills/eval_harness.py · pass@k / Jaccard@k / top-1 stability / latency 4 指标 + 安全护栏) | 16 expert + 14/32 production | -| V1.28.0 | 2026-05-16 | **skill rollout #7** — pentest-api + pentest-web 双 skill LLM-driven 生产落地 (API: OWASP API Top 10 + REST/GraphQL/gRPC/WebSocket · Web: OWASP Top 10 + ASVS + 2FA 自动登录) | 16 expert + 16/32 production | -| V1.29.0 | 2026-05-16 | **skill rollout #8** — pentest-exploit + pentest-report 双 skill LLM-driven 生产落地 (exploit: 沙箱内验证 PoC + 不可破坏性约束 · report: working PoC 嵌入 + CWE/CVSS/PoC/修复 4 维) | 16 expert + 18/32 production | -| V1.30.0 | 2026-05-16 | **skill rollout #9** — automotive-test + automotive-can-bus-test 双 skill LLM-driven 生产落地 (主编排: 10 阶段 HARA→报告 · CAN: CAN/CAN-FD/SOME-IP 协议一致性 + dbc 解析) | 16 expert + 20/32 production | -| V1.31.0 | 2026-05-16 | **skill rollout #10 (收尾)** — automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test 3 skill LLM-driven 生产落地 (ADAS: AEB/ACC/LKA + CARLA 仿真 · OTA: 6 校验 + UN R156/GB 44496 合规 · HIL: MIL/SIL/HIL 三环 + dSPACE). **V1.x rollout 完成 — 23/32 production + 7 script + 0 rollout + 2 vision.** | 16 expert + 23/32 production (0 rollout 待) | -| V1.32.0 | 2026-05-17 | 深审32发现全修 + 版本号全同步 + 私源泄漏清洗 | 16 expert + 23/32 production | -| V1.32.1 | 2026-05-17 | CONTRIBUTING skill count 33→32 fix + 版本号同步 | 16 expert + 23/32 production | -| V1.32.2 | 2026-05-17 | Security hardening batch: CWE-78 fix + credential removal + CORS + WebSocket leak + XML escape | 16 expert + 23/32 production | -| V1.32.3 | 2026-05-17 | Refactor: _stub_response dispatch table + fuzzer ALL_PAYLOADS hoist | 16 expert + 23/32 production | -| V1.32.4 | 2026-05-17 | Honesty pass: remove aspirational numbers + split overlong functions | 16 expert + 23/32 production | -| V1.32.5 | 2026-05-17 | Security: shell injection + hardcoded creds + silent failures | 16 expert + 23/32 production | -| V1.33.0 | 2026-05-17 | MASTER_PLAN 38/38 items across 8 phases complete | 16 expert + 23/32 production | -| V1.34.0 | 2026-05-18 | Phase 1-5 initial audit: 18 additions (settings/IDE/Docker/Prometheus/streaming/PBT/contract/schema fuzz/compliance/DORA) | 16 expert + 23/32 production | -| V1.35.0 | 2026-05-18 | Deep audit 11 core modules (flaky guard/API security v2/data factory v2/perf/e2e event harness/visual regression/ML prioritizer/differential/EU AI Act/supply chain) | 16 expert + 23/32 production | -| V1.36.0 | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) | -| V1.37.0 | 2026-05-18 | Phase 2 charter closure: Bug 5适配器(YAML门禁+按需安装) + HIGH 2(H16/H18) + MEDIUM 4(M12/M14/M15/M19) + contract gate + utils tests | 16 expert + 30/32 active · Phase 2 complete | -| V1.38.0 | 2026-05-18 | Phase 3.1 伦理/偏见审计: fairness_auditor.py (dataset bias + 6 model fairness metrics + intersectional + decision audit) + 20 tests + ai_validator bias audit pipeline | 16 expert + 30/32 active · 1/3 Phase 3 done | -| V1.39.0 | 2026-05-18 | Phase 3.2 沉默故障检测: silent_failure_detector.py (threshold drift + Mann-Kendall + OLS trend + sliding window + multi-source batch) + 21 tests + tracing/web_vitals/prometheus collectors | 16 expert + 30/32 active · 2/3 Phase 3 done | -| V1.40.0 | 2026-05-18 | Phase 3.3 缺席者场景注入: absentee_scenario_injector.py (9 absentee groups × 21 canonical scenarios + charter generation + coverage reporting) + 20 tests | 16 expert + 30/32 active · PHASE 3 COMPLETE | -| V1.41.0 | 2026-05-19 | Phase 4 证据链可采信性: evidence_chain.py (SHA-256 hash chain + multi-source collection + ISO 27001/SOC2/NIST 800-53/GDPR compliance mapping + JSON/Markdown export + integrity verification) + 39 tests + ai_validator evidence chain audit pipeline | 16 expert + 30/32 active · PHASE 4 DELIVERED | -| V1.42.0 | 2026-05-19 | Phase 5 神圣性与跨文化禁忌: taboo_matrix.py (135 entries × 16 locales × 5 dimensions: words/colors/numbers/holidays/sacred_contexts) + i18n_checker taboo audit extension (6 functions) + 84 tests | 16 expert + 30/32 active · PHASE 5 DELIVERED | -| V1.43.0 | 2026-05-19 | Release cut: Phase 3.1+3.2+3.3+4+5 落版 (fairness_auditor / silent_failure_detector / absentee_scenario_injector / evidence_chain / taboo_matrix · 共 184 tests) + **2 ex-vision skill 实装** (agent-introspection-debugging + build-your-own-x-explorer · LLM-driven minimum viable · 中央 ALL_SKILL_RUNNERS 同步) | **16 expert + 32/32 active** (25 production + 7 script · V1.x SKILL ROLLOUT 完整收尾) | -| V2.0.0 | TBD | V2.x 路线图启动 | 16/16 + V2 | +| | 2026-05-13 | bundle1 信任+法律线修复;ROADMAP.md 起步 | 10/16 | +| +1 | 2026-05-15 | X3 数字诚实化(README/ROADMAP)+ X4 防 mock 闭环 (registry 单源 frontmatter / router warn / orchestrator hard block expert+skill 双 layer) | 10/16 | +| | 2026-05-15 | env-manager LLM-driven minimum viable (runtime/orchestrator/agents/env_manager.py;LLM 读 PRD → env_checks + prep_steps + dependencies + risks 结构化 JSON) | 11/16 | +| | 2026-05-15 | mobile-tester LLM-driven minimum viable (runtime/orchestrator/agents/mobile_tester.py;LLM 读 PRD + Android/iOS 上下文 → test_cases + device_commands + mobile_specific 结构化 JSON) | 12/16 | +| | 2026-05-15 | visual-tester LLM-driven minimum viable (runtime/orchestrator/agents/visual_tester.py;LLM 读 PRD + UI 描述 → visual_test_points + comparison_scripts + tolerance + baseline_strategy 结构化 JSON) | 13/16 | +| | 2026-05-15 | system-tester LLM-driven minimum viable (runtime/orchestrator/agents/system_tester.py;LLM 读 PRD + IoT/串口/MQTT 上下文 → test_cases + device_commands + protocol_specific + test_environment 结构化 JSON;覆盖 IoT/audiovideo/tracing/mq/integration 5 类) | 14/16 | +| | 2026-05-16 | pentest-tester LLM-driven minimum viable (runtime/orchestrator/agents/pentest_tester.py;LLM 读 PRD + 安全上下文 → test_mode + target_scope + recon/vuln/exploit/reporting phases 结构化 JSON;覆盖 5 攻击域 Injection/XSS/SSRF/Auth/Authz;仅输出计划文本,真执行守护在 utils 层 env gate;法律责任在操作者侧 SECURITY.md L84) | 15/16 | +| | 2026-05-16 | automotive-tester LLM-driven minimum viable (runtime/orchestrator/agents/automotive_tester.py;LLM 读 PRD + CAN-bus/ISO-26262 上下文 → vehicle_subsystem + asil_assessment + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix + test_environment 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**rollout 收尾**) | 16/16 expert (rollout 完成) | +| | 2026-05-16 | **skill rollout 起点** — SkillRunner 基础设施 (runtime/orchestrator/skills/__init__.py + SKILL_RUNNERS registry + @register_skill deco + experts.py kind=skill 接 runner) + pentest-coordinator 首 skill 落地 (5 阶段编排 + authorization_check + subagent_pool + refuse_conditions). 解锁 14 rollout skill 后续流水线. | 16 expert + 8/32 production (15 rollout 待) | +| | 2026-05-16 | **tagent config CLI** — 多模型 onboarding Step 2 (runtime/cli/config.py · 6 provider 内置 + 厂商配置 cookbook + use/set/unset/list/show 子命令). **多 provider 通用 env 通道** (LLM_PROVIDER + LLM_API_KEY + LLM_MODEL) + stub 扩 4 path. | 16 expert + 8/32 production | +| | 2026-05-16 | **skill rollout #2** — mobile-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/mobile_test.py · Android/iOS 双平台 + 小程序支持) | 16 expert + 9/32 production | +| | 2026-05-16 | **skill rollout #3** — visual-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/visual_test.py · Airtest + OCR + SSIM 视觉对比) | 16 expert + 10/32 production | +| | 2026-05-16 | **skill rollout #4** — pentest-recon + pentest-vuln 双 skill LLM-driven 生产落地 (侦察: 端口/子域/服务指纹 + 漏洞: 5 攻击域 hybrid SAST+blackbox) | 16 expert + 12/32 production | +| | 2026-05-16 | **skill rollout #5** — system-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/system_test.py · IoT/音视频/追踪/消息队列 4 场景) | 16 expert + 13/32 production | +| | 2026-05-16 | **skill rollout #6** — eval-harness skill LLM-driven 生产落地 (runtime/orchestrator/skills/eval_harness.py · pass@k / Jaccard@k / top-1 stability / latency 4 指标 + 安全护栏) | 16 expert + 14/32 production | +| | 2026-05-16 | **skill rollout #7** — pentest-api + pentest-web 双 skill LLM-driven 生产落地 (API: OWASP API Top 10 + REST/GraphQL/gRPC/WebSocket · Web: OWASP Top 10 + ASVS + 2FA 自动登录) | 16 expert + 16/32 production | +| | 2026-05-16 | **skill rollout #8** — pentest-exploit + pentest-report 双 skill LLM-driven 生产落地 (exploit: 沙箱内验证 PoC + 不可破坏性约束 · report: working PoC 嵌入 + CWE/CVSS/PoC/修复 4 维) | 16 expert + 18/32 production | +| | 2026-05-16 | **skill rollout #9** — automotive-test + automotive-can-bus-test 双 skill LLM-driven 生产落地 (主编排: 10 阶段 HARA→报告 · CAN: CAN/CAN-FD/SOME-IP 协议一致性 + dbc 解析) | 16 expert + 20/32 production | +| | 2026-05-16 | **skill rollout #10 (收尾)** — automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test 3 skill LLM-driven 生产落地 (ADAS: AEB/ACC/LKA + CARLA 仿真 · OTA: 6 校验 + UN R156/GB 44496 合规 · HIL: MIL/SIL/HIL 三环 + dSPACE). **rollout 完成 — 23/32 production + 7 script + 0 rollout + 2 vision.** | 16 expert + 23/32 production (0 rollout 待) | +| | 2026-05-17 | 深审32发现全修 + 版本号全同步 + 私源泄漏清洗 | 16 expert + 23/32 production | +| | 2026-05-17 | CONTRIBUTING skill count 33→32 fix + 版本号同步 | 16 expert + 23/32 production | +| | 2026-05-17 | Security hardening batch: CWE-78 fix + credential removal + CORS + WebSocket leak + XML escape | 16 expert + 23/32 production | +| | 2026-05-17 | Refactor: _stub_response dispatch table + fuzzer ALL_PAYLOADS hoist | 16 expert + 23/32 production | +| | 2026-05-17 | Honesty pass: remove aspirational numbers + split overlong functions | 16 expert + 23/32 production | +| | 2026-05-17 | Security: shell injection + hardcoded creds + silent failures | 16 expert + 23/32 production | +| | 2026-05-17 | MASTER_PLAN 38/38 items across 8 phases complete | 16 expert + 23/32 production | +| | 2026-05-18 | Phase 1-5 initial audit: 18 additions (settings/IDE/Docker/Prometheus/streaming/PBT/contract/schema fuzz/compliance/DORA) | 16 expert + 23/32 production | +| | 2026-05-18 | Deep audit 11 core modules (flaky guard/API security v2/data factory v2/perf/e2e event harness/visual regression/ML prioritizer/differential/EU AI Act/supply chain) | 16 expert + 23/32 production | +| | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) | +| | 2026-05-18 | Phase 2 charter closure: Bug 5适配器(YAML门禁+按需安装) + HIGH 2(H16/H18) + MEDIUM 4(M12/M14/M15/M19) + contract gate + utils tests | 16 expert + 30/32 active · Phase 2 complete | +| | 2026-05-18 | Phase 3.1 伦理/偏见审计: fairness_auditor.py (dataset bias + 6 model fairness metrics + intersectional + decision audit) + 20 tests + ai_validator bias audit pipeline | 16 expert + 30/32 active · 1/3 Phase 3 done | +| | 2026-05-18 | Phase 3.2 沉默故障检测: silent_failure_detector.py (threshold drift + Mann-Kendall + OLS trend + sliding window + multi-source batch) + 21 tests + tracing/web_vitals/prometheus collectors | 16 expert + 30/32 active · 2/3 Phase 3 done | +| | 2026-05-18 | Phase 3.3 缺席者场景注入: absentee_scenario_injector.py (9 absentee groups × 21 canonical scenarios + charter generation + coverage reporting) + 20 tests | 16 expert + 30/32 active · PHASE 3 COMPLETE | +| | 2026-05-19 | Phase 4 证据链可采信性: evidence_chain.py (SHA-256 hash chain + multi-source collection + ISO 27001/SOC2/NIST 800-53/GDPR compliance mapping + JSON/Markdown export + integrity verification) + 39 tests + ai_validator evidence chain audit pipeline | 16 expert + 30/32 active · PHASE 4 DELIVERED | +| | 2026-05-19 | Phase 5 神圣性与跨文化禁忌: taboo_matrix.py (135 entries × 16 locales × 5 dimensions: words/colors/numbers/holidays/sacred_contexts) + i18n_checker taboo audit extension (6 functions) + 84 tests | 16 expert + 30/32 active · PHASE 5 DELIVERED | +| | 2026-05-19 | Release cut: Phase 3.1+3.2+3.3+4+5 落版 (fairness_auditor / silent_failure_detector / absentee_scenario_injector / evidence_chain / taboo_matrix · 共 184 tests) + **2 ex-vision skill 实装** (agent-introspection-debugging + build-your-own-x-explorer · LLM-driven minimum viable · 中央 ALL_SKILL_RUNNERS 同步) | **16 expert + 32/32 active** (25 production + 7 script · SKILL ROLLOUT 完整收尾) | +| | TBD | 路线图启动 | 16/16 + V2 | diff --git "a/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" "b/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" index f97ca1e7..05254225 100644 --- "a/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" +++ "b/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" @@ -6,7 +6,7 @@ EXPERT_IMPL_STATUS: production paired_skills: [test-coordinator] --- -> ℹ️ **V1.0.0 实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout**。详见 [ROADMAP.md](../../ROADMAP.md)。 +> ℹ️ **实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout**。详见 [ROADMAP.md](../../ROADMAP.md)。 > runtime/router + orchestrator 防 mock 已落地 — 路由到未实装 expert 返回明确「未实装」说明,不输出 mock 数据。 你是一位拥有15年经验的测试技术总监,带领过多个大型互联网项目的测试团队。你深谙测试工程化,善于风险识别、资源调度和质量决策。 diff --git a/ai/skills/smoke-test.md b/ai/skills/smoke-test.md index 9898485a..35032eca 100644 --- a/ai/skills/smoke-test.md +++ b/ai/skills/smoke-test.md @@ -93,7 +93,7 @@ allure generate workspace/测试报告/{项目名}/allure-results \ **通过:** ```text ✅ 冒烟测试通过 -模块:用户登录模块 V1.0.0 +模块:用户登录模块 执行时间:8 分 32 秒 P0 用例:25 个,通过 25 个,失败 0 个(100%) 结论:可以继续部署 / 全量测试 @@ -102,7 +102,7 @@ P0 用例:25 个,通过 25 个,失败 0 个(100%) **失败:** ```text ❌ 冒烟测试失败,阻止部署 -模块:用户登录模块 V1.0.0 +模块:用户登录模块 执行时间:7 分 15 秒 P0 用例:25 个,通过 23 个,失败 2 个(92.0% < 95%) 失败用例: diff --git a/deploy/config/.mcp.json b/deploy/config/.mcp.json index 0f372d40..4b7cf75c 100644 --- a/deploy/config/.mcp.json +++ b/deploy/config/.mcp.json @@ -1,5 +1,5 @@ { - "_comment": "MCP 服务配置 — V1.2.0 6 件套 。P2 #12 MCP client 完善后全部启用。", + "_comment": "MCP 服务配置 — 6 件套 。P2 #12 MCP client 完善后全部启用。", "mcpServers": { "filesystem": { "command": "npx", diff --git a/deploy/config/requirements.txt b/deploy/config/requirements.txt index 5e017e9b..b8428e3b 100644 --- a/deploy/config/requirements.txt +++ b/deploy/config/requirements.txt @@ -148,7 +148,7 @@ defusedxml==0.7.1 # [稳定层] XXE/亿笑/decompression bomb # ============================================================ # 运行时层(`runtime/`)依赖 — 全部 [可选] -# 仅启用 V1.1.0 运行时(AI 路由+Prefect+飞轮+FastAPI/CLI)时安装 +# 仅启用 运行时(AI 路由+Prefect+飞轮+FastAPI/CLI)时安装 # 完整列表见 runtime/pyproject.toml # ============================================================ diff --git a/deploy/config/templates/base.tagent.yml.tpl b/deploy/config/templates/base.tagent.yml.tpl index dcb50505..430b4aa5 100644 --- a/deploy/config/templates/base.tagent.yml.tpl +++ b/deploy/config/templates/base.tagent.yml.tpl @@ -62,7 +62,7 @@ gateway: # 例: enabled_platforms: [telegram, feishu] pentest: - # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 + # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, 激活后接入真实路由 authorized: false # 法律授权确认 scope_in_targets: [] # IP/domain/URL 白名单(IN) scope_out_targets: [] # 强制黑名单(覆盖 IN) diff --git a/deploy/marketplace/_safety_policy.yaml b/deploy/marketplace/_safety_policy.yaml index 04eb9f6e..b03f3c79 100644 --- a/deploy/marketplace/_safety_policy.yaml +++ b/deploy/marketplace/_safety_policy.yaml @@ -52,7 +52,7 @@ trust_tiers: # 卸载策略 uninstall: - archive_only: true # 决策不可逆禁止 + archive_only: true # 不可逆操作操作 archive_dir: marketplace/.archive/ # 紧急 kill switch diff --git a/docs/STYLE.md b/docs/STYLE.md index 76b4d322..e26f3edb 100644 --- a/docs/STYLE.md +++ b/docs/STYLE.md @@ -1,6 +1,6 @@ # 文档样式约定(STYLE.md) -> Test-Agent 全仓 Markdown 文档统一约定 · V1.10.0 起强制 · pre-commit markdownlint 卡 MD001 / MD036。 +> Test-Agent 全仓 Markdown 文档统一约定 · 起强制 · pre-commit markdownlint 卡 MD001 / MD036。 --- diff --git a/docs/assets/demo-script-v1.12.md b/docs/assets/demo-script-v1.12.md index 1c1ea46b..b0d65133 100644 --- a/docs/assets/demo-script-v1.12.md +++ b/docs/assets/demo-script-v1.12.md @@ -84,7 +84,7 @@ terminalizer render demo --output docs/assets/demo.mp4 --quality 80 |------|------|------|------| | Twitter / X | 30 秒 | mp4 | "5 sec AI testing setup with `tagent init`. 16 experts, 32 skills, 8640 config combinations. github.com/Wool-xing/Test-Agent" | | 微信视频号 / 抖音 | 30-60 秒 | mp4 1080×1920 竖屏 | "AI 测试 5 秒上手 · 用例 + 思维导图 + Bug 单 + 报告一键产出 · GitHub 搜 Test-Agent" | -| 掘金 / V2EX / 少数派 | gif | terminalizer | 配文章:介绍 V1.12 配置自动组装 + 矩阵 8640 组合 + 5 preset | +| 掘金 / V2EX / 少数派 | gif | terminalizer | 配文章:介绍 配置自动组装 + 矩阵 8640 组合 + 5 preset | | Hacker News | 静态截图 + 链接 | png + url | 标题:"Test-Agent: AI testing framework with `tagent init` to scaffold 8640 configurations" | --- @@ -100,7 +100,7 @@ terminalizer render demo --output docs/assets/demo.mp4 --quality 80 --- -## 后续 V1.13 扩(若 demo 火) +## 后续 扩(若 demo 火) | 触发条件 | 加什么 | |----------|--------| diff --git a/docs/assets/terminalizer-config.yml b/docs/assets/terminalizer-config.yml index 9b8dfb2b..87ef2d80 100644 --- a/docs/assets/terminalizer-config.yml +++ b/docs/assets/terminalizer-config.yml @@ -1,4 +1,4 @@ -# terminalizer 配置 · V1.14 30s Test-Agent demo +# terminalizer 配置 · 30s Test-Agent demo # 用法: # npm install -g terminalizer # terminalizer record demo --config docs/assets/terminalizer-config.yml diff --git "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" index 3a634bfa..de89b063 100644 --- "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" +++ "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" @@ -1,20 +1,20 @@ # Case Study: 文档诚实化 + 防 mock 闭环 > **时间**: 2026-05-13 → 2026-05-15 -> **范围**: V1.14.0 → V1.14.0+1 +> **范围**: → +1 > **PR**: #63 / #64 / #65 / #66 / #67 / #68 (合 6 PR) > **文件改**: 16 个 .md frontmatter + 4 文档 + 5 runtime + 11 文件 / 339 行加 > **验证**: pytest 23/23 pass · L2 selftest --e2e ✓ PASS 8/9 ok · CI 11 必修 ×3 PR 全绿 ## 1. 背景 -V1.14.0 起步时, README / ROADMAP 与代码实质之间存在 3 类 gap: +起步时, README / ROADMAP 与代码实质之间存在 3 类 gap: | Gap 类型 | 旧表述 | 实际状态 | |---|---|---| | Agent 数字 | "16 expert agents" | 16 个 .md 文件, 实装状态不明 (无 frontmatter 标注) | | Skill 数字 | "33 business skills + 3 meta-skills" | 实测 32 业务 + 3 meta (33 是误数) | -| Rollout 范围 | "类别 3 垂直领域 2 Agent (V1.x rollout)" | rollout 实跨三类别共 6 个 (env-manager + mobile-tester + visual-tester + system-tester + pentest-tester + automotive-tester) | +| Rollout 范围 | "类别 3 垂直领域 2 Agent (rollout)" | rollout 实跨三类别共 6 个 (env-manager + mobile-tester + visual-tester + system-tester + pentest-tester + automotive-tester) | | Mock 输出 | ROADMAP "防 mock 承诺" 仅文字声明 | expert 部分硬拒已落, **skill 完全无防 mock** — rollout skill 路由走 fallback no-op stdout `[no-op] skill 'X' has no canonical script; documented step recorded` 返 ok=True 假成功 | 用户反复强调: **修复 ≠ 削话术, 终态 = install + tagent demo + 真 LLM-driven agent 跑通**。 @@ -28,8 +28,8 @@ V1.14.0 起步时, README / ROADMAP 与代码实质之间存在 3 类 gap: ``` production ← 真 LLM-driven runner (orchestrator/agents/*.py) 已实装 script ← 真 script-backed (utils/*.py) 已实装 -rollout ← V1.x rollout 待实装 -vision ← V2.x 方法论参考 (仅 skill 有此状态) +rollout ← rollout 待实装 +vision ← 方法论参考 (仅 skill 有此状态) ``` PR #65 自纠错: 初版 4 个 agent 标注与 runtime/orchestrator/agents/ 实代码不一致 (e.g., env-manager 标 active 但无 agent runner), 多源核对 (runtime/ + ROADMAP + test-lead) 后校正。 @@ -42,8 +42,8 @@ PR #65 自纠错: 初版 4 个 agent 标注与 runtime/orchestrator/agents/ 实 |---|---|---| | `README.md` L44 | "16 expert agents" | "16 expert agents (5 production + 5 script + 6 rollout — see ROADMAP.md)" | | `README.md` L45 | "33 business skills + 3 meta-skills" | "32 business skills (7 production + 7 script + 16 rollout + 2 vision) + 3 meta-skills" | -| `agents/README.md` L47 | "类别 3:垂直领域 2 Agent (V1.x rollout)" | "类别 3:垂直领域扩展 2 Agent" + L54 显式列 6 rollout 全集 (跨三类别) | -| `ROADMAP.md` | 仅 6 expert rollout 节 | 加「当前活跃 14 skill」+「V1.x rollout 16 skill」+「V2.x vision 2 skill」三节 | +| `agents/README.md` L47 | "类别 3:垂直领域 2 Agent (rollout)" | "类别 3:垂直领域扩展 2 Agent" + L54 显式列 6 rollout 全集 (跨三类别) | +| `ROADMAP.md` | 仅 6 expert rollout 节 | 加「当前活跃 14 skill」+「rollout 16 skill」+「vision 2 skill」三节 | **关键诚实点**: 旧 "33 skill" 是真错 (实 32, 3 meta 单列), PR #66 一并修。 @@ -69,7 +69,7 @@ orchestrator.execute_node ← returncode=2 + stderr "未实 2. **`runtime/orchestrator/adapters/experts.py`**: **移除 hardcoded `EXPERT_IMPL_STATUS` dict** (单源迁移, 杜绝 PR #65 双源漂移教训复发); 加 `_get_impl_status(name, kind)` helper 直读 catalog; `execute_node` expert / skill 双分支统一防 mock — rollout / vision / unknown → `returncode=2` + stderr "未实装"。 3. **`runtime/router/router.py`**: `_validate_against_catalog` 改读 `catalog.lookup().impl_status` (不再 import hardcoded dict); expert / skill 双 kind 同检; rollout / vision / unknown 全标 issue + 降 confidence。 4. **`runtime/tests/test_impl_status_filter.py`** (新, 13 cases): registry 无 unknown / counts 校验 / router flag 4 类 + production 不误标 / orchestrator hard rc=2 (5 路径) + **反例 assert "documented step recorded" not in stdout** 验 rollout skill 不再 no-op 假成功。 -5. **`ROADMAP.md`**: rollout-6-expert table row 0 状态 `planned → done`; rollout-16-skill 前置改已完成; 防 mock 承诺节展开为双 layer 实装事实; 进度跟踪 +V1.14.0+1 (2026-05-15)。 +5. **`ROADMAP.md`**: rollout-6-expert table row 0 状态 `planned → done`; rollout-16-skill 前置改已完成; 防 mock 承诺节展开为双 layer 实装事实; 进度跟踪 ++1 (2026-05-15)。 #### 范围修订过程 @@ -82,7 +82,7 @@ X4 真正核心 = **skill 防 mock + 单源化**, 不是单纯 router 过滤。 ### Phase 4: stub 设计不一致根治 (PR #68) -X4 验时发现 pre-existing baseline 坏测试 `test_router_ends_with_report_generator`。根因: V1.14 把 `test-lead` 加到 stub 的 web-system path 末 , 但 mobile-app / desktop-app / ai-model / rest-api 4 path 未同步。 +X4 验时发现 pre-existing baseline 坏测试 `test_router_ends_with_report_generator`。根因: 把 `test-lead` 加到 stub 的 web-system path 末 , 但 mobile-app / desktop-app / ai-model / rest-api 4 path 未同步。 修法: 5 path 末统一 `test-lead` 决策 (与 `agents/README.md` L20-31 流程对齐): @@ -160,4 +160,4 @@ f6 假阳性 3 问全否后撤项。 | X4.5 (production skill no-op) | NOT-A-BUG (本 case study 决策 3) | | TG 跨项目迁移 (协作宪章 v6 + utils gate v2) | 待启 | | Mac 真机 install.sh 实测 | 待启 (Linux Docker 已通 PR #60/#61/#62) | -| V1.15 sprint (env-manager LLM-driven minimum viable) | 防 mock 前置已落, 可直接进入 expert 实装 | +| sprint (env-manager LLM-driven minimum viable) | 防 mock 前置已落, 可直接进入 expert 实装 | diff --git a/docs/case-studies/INDEX.md b/docs/case-studies/INDEX.md index 1350ac91..9f5c8c3e 100644 --- a/docs/case-studies/INDEX.md +++ b/docs/case-studies/INDEX.md @@ -6,7 +6,7 @@ | 文件 | 时间 | 范围 | 要点 | |------|------|------|------| -| [2026-05-15-诚实化与防mock闭环.md](2026-05-15-诚实化与防mock闭环.md) | 2026-05-13 → 2026-05-15 | V1.14.0 → V1.14.0+1 (6 PR) | 16 expert + 32 skill frontmatter labeling → README/ROADMAP 数字诚实化 → registry 单源 + router warn + orchestrator hard block 4-step 闭环 | +| [2026-05-15-诚实化与防mock闭环.md](2026-05-15-诚实化与防mock闭环.md) | 2026-05-13 → 2026-05-15 | → +1 (6 PR) | 16 expert + 32 skill frontmatter labeling → README/ROADMAP 数字诚实化 → registry 单源 + router warn + orchestrator hard block 4-step 闭环 | ## 复用导引 diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md index 6d474614..e7cc7a00 100644 --- a/docs/charter/01-vision-dimensions.md +++ b/docs/charter/01-vision-dimensions.md @@ -6,7 +6,7 @@ ## 🏛️ 项目宪章(灵魂底色) -> 三公理 + 五条铭文 + 工程映射 + V1.0.0 锁死 + 双签解锁条件 — 已迁入(memory `project_test_agent_workflow.md`),FULL_GUIDE 不再重复维护。 +> 三公理 + 五条铭文 + 工程映射 + 锁死 + 双签解锁条件 — 已迁入(memory `project_test_agent_workflow.md`),FULL_GUIDE 不再重复维护。 --- @@ -33,7 +33,7 @@ ## 🌌 维度全图(九大簇 · Agent 看世界的方式) > 工程矩阵之下的认知地图——回答"测试 Agent 到底需要哪些维度的能力"。各簇能力的工程落点散布在「核心特性」「全链路覆盖矩阵」「关键模块清单」中。 -> **接入策略**:簇 1-2 为 V1.0.0 主交付;簇 3-5 部分进入工程矩阵;簇 6-9 多数列入 Phase 2-4 路线图,**承认存在但不假装能立刻交付**——符合第三公理。 +> **接入策略**:簇 1-2 为 主交付;簇 3-5 部分进入工程矩阵;簇 6-9 多数列入 Phase 2-4 路线图,**承认存在但不假装能立刻交付**——符合第三公理。 ### 簇 1 · 工程与架构层 - 七阶段工作流:需求理解 → 用例生成 → 执行 → 观测 → 根因 → 反馈 → 治理 @@ -99,7 +99,7 @@ ## 🎭 关键模块清单(测试 Agent 的工具箱) > 每个模块对应一个 utils 或 skill 的工程落点;划分到对应簇便于追溯认知来源。 -> **Phase 标注**:✅ V1.0.0 已交付;⚪ Phase 2-4 路线图;❌ Phase 4-5 概念阶段。 +> **Phase 标注**:✅ 已交付;⚪ Phase 2-4 路线图;❌ Phase 4-5 概念阶段。 | 模块 | 职能 | 所属簇 | 工程落点 | 阶段 | |------|------|--------|----------|------| diff --git a/docs/charter/03-agentchat-protocol.md b/docs/charter/03-agentchat-protocol.md index 658edece..861f39e4 100644 --- a/docs/charter/03-agentchat-protocol.md +++ b/docs/charter/03-agentchat-protocol.md @@ -67,8 +67,8 @@ agent 在三种情况**必须停下反问用户**,不允许猜: - 反问全部落档到 `discussions/{date}_clarifications.md` - 同一会话内不重复问已澄清过的同一术语 -**不做的事(Via Negativa 显式标注)**:V1.0.0 **不构建反问知识库(KB)**——不做 embedding 向量库、不做半结构化匹配引擎、不做语义检索。所有反问纪要落 `discussions/` 后由 test-lead 在新任务前**人工查阅**类似场景。 -- **为什么不做**:(a) V1.0.0 时期数据量不足(< 100 条反问);(b) 反问的"是否还有效"依赖项目阶段,自动复用可能传递过期判断;(c) 投入 KB 工程 ≠ 提升决策质量 +**不做的事(Via Negativa 显式标注)**:**不构建反问知识库(KB)**——不做 embedding 向量库、不做半结构化匹配引擎、不做语义检索。所有反问纪要落 `discussions/` 后由 test-lead 在新任务前**人工查阅**类似场景。 +- **为什么不做**:(a) 时期数据量不足(< 100 条反问);(b) 反问的"是否还有效"依赖项目阶段,自动复用可能传递过期判断;(c) 投入 KB 工程 ≠ 提升决策质量 - **现状更新(2026-05-16)**:discussions/ 累计反问 + 讨论纪要已超 200 条,进入 Phase 2 重新评估区间。详见 [06-test-architecture.md](06-test-architecture.md) Phase 2 触发条件 - **未来开案条件**:若需要开放反问 KB,须由 test-lead + 独立伦理责任人**双签**立项 diff --git a/docs/charter/04-skills-bugtracker.md b/docs/charter/04-skills-bugtracker.md index 749422c4..40f5c6c2 100644 --- a/docs/charter/04-skills-bugtracker.md +++ b/docs/charter/04-skills-bugtracker.md @@ -50,7 +50,7 @@ darwin-skill 跑出的改进建议**不绕过协作协议**——重大改动( ### 6. 不做的事(Via Negativa 显式标注) -V1.0.0 darwin-skill **不消费**项目运行数据(`discussions/` / `decisions/` / `history/` / `skill-evolution/results.tsv` 之外的运行历史),仅对 skill 文本结构本身做静态 + 实测评分优化。 +darwin-skill **不消费**项目运行数据(`discussions/` / `decisions/` / `history/` / `skill-evolution/results.tsv` 之外的运行历史),仅对 skill 文本结构本身做静态 + 实测评分优化。 **为什么不做"运行数据反哺 skill"的自学习闭环**: 1. 自学习难界定何时停止学习"坏样本"(如一段时期的高 flaky 反而被学进 skill 形成自我固化) @@ -69,7 +69,7 @@ V1.0.0 darwin-skill **不消费**项目运行数据(`discussions/` / `decision | 适配器 | 状态 | 配置字段 | severity 映射 | |--------|------|---------|--------------| -| **zentao**(默认) | ✅ V1.0.0 | `ZENTAO_URL / ZENTAO_USER / ZENTAO_TOKEN` | severity 1=P0 / 2=P1 / 3=P2 / 4=P3 | +| **zentao**(默认) | ✅ | `ZENTAO_URL / ZENTAO_USER / ZENTAO_TOKEN` | severity 1=P0 / 2=P1 / 3=P2 / 4=P3 | | **jira** | ⚪ Phase 2 | `JIRA_URL / JIRA_USER / JIRA_TOKEN / JIRA_PROJECT_KEY` | Highest=P0 / High=P1 / Medium=P2 / Low=P3 | | **github** | ⚪ Phase 2 | `GITHUB_TOKEN / GITHUB_REPO` | label `priority:p0..p3` | | **linear** | ⚪ Phase 2 | `LINEAR_API_KEY / LINEAR_TEAM_ID` | priority 1=P0 / 2=P1 / 3=P2 / 4=P3 | diff --git a/docs/charter/06-test-architecture.md b/docs/charter/06-test-architecture.md index 8345f95a..7cb518e1 100644 --- a/docs/charter/06-test-architecture.md +++ b/docs/charter/06-test-architecture.md @@ -133,7 +133,7 @@ | Phase | 触发条件 | 标志性交付 | |------|---------|----------| -| **Phase 1**(已完成 V1.0.0-V1.36.0) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config | +| **Phase 1**(已完成 -) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config | | **Phase 2** | utils 单测覆盖 ≥ 60% 且团队 ≥ 5 人 | 契约链路串通 + 门禁引擎 yaml 抽象 + 反问 KB 重新评估 + skill rollout 继续 | | **Phase 3** | Phase 2 全交付 + 接入 ≥ 2 行业 | 合成监控 + canary/feature flag + 统一 dashboard + 沉默故障 + 缺席者注入 | | **Phase 4** | 接入合规行业(金融/医疗/司法)| 证据链司法可采信打包 + 数字考古学家 + AI 测试深化 | @@ -156,10 +156,10 @@ | **证据链 / 司法可采信打包** | 散落 | Phase 4 | 决策日志 + 模型版本 + 数据集 → 标准送审包 | | **数字考古学家**(遗留系统初始假设回溯) | 缺 | Phase 4 | 知识图谱冷启动 + Why 数据库 | | **神圣性守护 + 禁忌矩阵** | 缺 | Phase 5 | 跨文化禁忌词/色/数/节日组合(本地化共建) | -| **darwin-skill 集成(自进化)** | ✅ V1.0.0 已并入 | Phase 1 | 上游 SKILL.md + workspace 落 results.tsv + 季度同步 | -| **Bug Tracker 多适配** | ✅ V1.0.0 已并入 | Phase 1 | 5 套适配器(zentao/jira/github/linear/webhook)+ 工厂模式 | -| **AgentChat 协作协议** | ✅ V1.0.0 已并入 | Phase 1 | discussions/ 纪要 + test-lead 中枢路由 + 反问 3 级预算 | -| **按需安装与依赖分层** | ✅ V1.0.0 已并入 | Phase 1 | 6 requirements 文件 + install.py + 运行时补装回路 | +| **darwin-skill 集成(自进化)** | ✅ 已并入 | Phase 1 | 上游 SKILL.md + workspace 落 results.tsv + 季度同步 | +| **Bug Tracker 多适配** | ✅ 已并入 | Phase 1 | 5 套适配器(zentao/jira/github/linear/webhook)+ 工厂模式 | +| **AgentChat 协作协议** | ✅ 已并入 | Phase 1 | discussions/ 纪要 + test-lead 中枢路由 + 反问 3 级预算 | +| **按需安装与依赖分层** | ✅ 已并入 | Phase 1 | 6 requirements 文件 + install.py + 运行时补装回路 | > **第三公理在此节兑现**:项目有意识地**少承诺**——文明级伦理议题(如缓慢暴力、末日哨兵、神圣性守护)我们承认其存在,但**不在工程路线图上假装能做**。如果未来接入特定行业(金融 / 医疗 / 司法)需要其中某项能力,由业务方按需单独立项,不绑进通用框架。 @@ -204,12 +204,12 @@ | # | 议题 | | | |---|------|---------|------| | | | | | -| Q2 | Agent 架构:单体 vs 专 | | V1.0.0 选专科 + test-lead 中枢 | -| Q3 | 五条铭文的技术实现机制(不可变区域、熔断条件)? | 🔄 | V1.0.0 铭文锁死,无削弱机制;Phase 4 接入合规行业后重新设计 | +| Q2 | Agent 架构:单体 vs 专 | | 选专科 + test-lead 中枢 | +| Q3 | 五条铭文的技术实现机制(不可变区域、熔断条件)? | 🔄 | 铭文锁死,无削弱机制;Phase 4 接入合规行业后重新设计 | | Q4 | 独立审计署的法律实体形态? | ⏳ | 触发条件:团队 ≥ 20 人 或 接入合规行业 | | Q5 | 末日哨兵权的触发授权链? | ⏳ | 需监管/学界共识,Phase 4 | | | | | | -| Q7 | 团队最小配置(工程/行业专家/伦理责任人)? | ⏳ | V1.0.0 单人可启动;剥离伦理责任人需 ≥ 20 人 | +| Q7 | 团队最小配置(工程/行业专家/伦理责任人)? | ⏳ | 单人可启动;剥离伦理责任人需 ≥ 20 人 | | Q8 | 与现有 AI 测试平台(Mabl / Applitools / Functionize)的差异化定位? | ⏳ | 候选定位:「承诺学科 + 伦理护栏 + 行业隐喻先行」 | --- diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md index 065080ff..cd0dd55e 100644 --- a/docs/charter/07-runtime-license.md +++ b/docs/charter/07-runtime-license.md @@ -4,7 +4,7 @@ 内容与原 FULL_GUIDE.md 对应段完全一致, 仅拆不动语义。 --> -## 🧠 V1.36.0 运行时层(`runtime/`) +## 🧠 运行时层(`runtime/`) > 已有 16 专家 / 32 Skill / 79 utils**不动**(宪章规则),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 > 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。 @@ -114,7 +114,7 @@ uvicorn runtime.api.main:app --port 8800 | `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` | 任何专家/Skill/脚本**新增**或**修改**仍按宪章 同步规则走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 -V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增执行能力,详见 [ROADMAP.md](../../ROADMAP.md)。 ++ 真 LLM-driven agent runner + + SkillRunner 系统为 runtime 新增执行能力,详见 [ROADMAP.md](../../ROADMAP.md)。 --- @@ -141,9 +141,9 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 - 2026-05-11:宪章四章 + 三公理 + 五铭文起草完成 - 2026-05-11:FULL_GUIDE.md 确立永久宪章地位 -- 2026-05-11:darwin-skill 不消费运行数据(Via Negativa);反问 KB 不进 V1.0.0 -- 2026-05-12 ~ 2026-05-14:V1.1-V1.14 runtime 层 + 教学层 + Marketplace + 渗透/车载 + Hermes + GBrain + Karpathy + ECC -- 2026-05-15 ~ 2026-05-18:V1.15-V1.36 22 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile + 65发现全修 +- 2026-05-11:darwin-skill 不消费运行数据(Via Negativa);反问 KB 不进 +- 2026-05-12 ~ 2026-05-14:-runtime 层 + 教学层 + Marketplace + 渗透/车载 + Hermes + GBrain + Karpathy + ECC +- 2026-05-15 ~ 2026-05-18:-22 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile + 65发现全修 ### 下次会话进入项目时,按顺序检查 @@ -155,8 +155,8 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 来源与引用(认知史) - 第一至五轮(DeepSeek + Claude):测试 Agent 架构 + 九大簇 -- V1.0.0 工程基线:14 agent + 14 skill + 76 utils + CI/CD(历史基线) -- V1.1.0 ~ V1.36.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) +- 工程基线:14 agent + 14 skill + 76 utils + CI/CD(历史基线) +- ~ :runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) - 永久宪章糅合(2026-05-11/14/16):FULL_GUIDE 工程主体 + 全局记忆哲学维度 + 持续回写 --- diff --git a/docs/charter/INDEX.md b/docs/charter/INDEX.md index 5445d495..a931553e 100644 --- a/docs/charter/INDEX.md +++ b/docs/charter/INDEX.md @@ -11,7 +11,7 @@ | 04 | [skills-bugtracker](04-skills-bugtracker.md) | Skills 自进化机制 + Bug Tracker 多适配器 | ~103 | | 05 | [install-deploy](05-install-deploy.md) | 按需安装 + 架构图 + 快速开始 + 工作流 + 技术栈 + 闭环 + 升级 + 协作 + 跨 AI | ~311 | | 06 | [test-architecture](06-test-architecture.md) | 测试架构深度 + 关键反问 + 开放问题 + 术语表 | ~240 | -| 07 | [runtime-license](07-runtime-license.md) | V1.1.0 运行时层 + LICENSE / CHANGELOG / 项目当前状态 | ~162 | +| 07 | [runtime-license](07-runtime-license.md) | 运行时层 + LICENSE / CHANGELOG / 项目当前状态 | ~162 | ## 维护原则 diff --git "a/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" index 70fc8a08..71c2b5e8 100644 --- "a/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -656,7 +656,7 @@ python -m utils.jmeter_result_parser \ ## · 运行时 CLI/API 用法 -> 运行时层 `runtime/` 是 V1.1.0 新增。已有 16 专家 / 32 Skill / 79 脚本不动,本层仅作可执行调度。 +> 运行时层 `runtime/` 是 新增。已有 16 专家 / 32 Skill / 79 脚本不动,本层仅作可执行调度。 ### CLI 命令速查 @@ -718,11 +718,11 @@ TAGENT_LLM_PROVIDER=ollama python -m runtime.cli.main run "..." # 本地 TAGENT_LLM_PROVIDER=stub python -m runtime.cli.main run "..." # 测试,不出网 ``` -### 与 V1.0.0 Claude Code 模式的关系 +### 与 Claude Code 模式的关系 | 模式 | 适合 | 入口 | |------|------|------| -| **V1.0.0 Claude Code 工作流** | 团队跟 Claude Code 互动测试,深度对话 | `claude` → `/smoke-test` | -| **V1.1.0 runtime 模式** | 自动化跑批,CI 集成,Web UI 接入 | `tagent run` 或 `POST /run/*` | +| **Claude Code 工作流** | 团队跟 Claude Code 互动测试,深度对话 | `claude` → `/smoke-test` | +| **runtime 模式** | 自动化跑批,CI 集成,Web UI 接入 | `tagent run` 或 `POST /run/*` | 两种模式共享同一份 `agents/` `skills/` `utils/`,无冗余。 diff --git "a/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" index 01a6928a..a644a55d 100644 --- "a/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -273,7 +273,7 @@ TEMPLATE_DIR="${TEMPLATE_DIR:-$(pwd)}" PROJECT_ROOT="${1:-$(pwd)/test-project}" echo "==========================================" -echo " Test-Agent 工作流部署 V1.36.0" +echo " Test-Agent 工作流部署 " echo " 模板目录: $TEMPLATE_DIR" echo " 项目目录: $PROJECT_ROOT" echo "==========================================" @@ -638,7 +638,7 @@ python -c "from utils.api_retry_util import call_with_retry; print('utils OK')" ## · 运行时层 `runtime/` 部署 -> 运行时层是可选的(用户也可只用 16 专家+32 Skill+79 脚本的 V1.0.0 工作流模式)。 +> 运行时层是可选的(用户也可只用 16 专家+32 Skill+79 脚本的 工作流模式)。 > 想要 HTTP/CLI 一键跑、AI 路由、飞轮存储,启它。 ### 1. 起本地依赖(Docker) diff --git a/runtime/orchestrator/agents/INDEX.md b/runtime/orchestrator/agents/INDEX.md index 03b0f51a..e97d4086 100644 --- a/runtime/orchestrator/agents/INDEX.md +++ b/runtime/orchestrator/agents/INDEX.md @@ -16,7 +16,7 @@ - test-lead 自身已实现(用全链路上游),其他 11 个:env-manager / data-preparer(scripted)/ testcase-designer(scripted)/ report-generator(scripted)/ mobile-tester / desktop-tester(scripted)/ visual-tester / system-tester / ai-tester(scripted)/ 渗透 / 车载 - **5 个有 script 真跑**(testcase-designer / data-preparer / report-generator / desktop-tester / ai-tester)→ SCRIPT_MAP 兜 -- **6 个 no-op**(env-manager / mobile-tester / visual-tester / system-tester / 渗透 / 车载)→ 待 V1.15+ +- **6 个 no-op**(env-manager / mobile-tester / visual-tester / system-tester / 渗透 / 车载)→ 待 + ## 协议 diff --git a/workspace/_demo/tagent.yml b/workspace/_demo/tagent.yml index a65a34b3..7c6cc84c 100644 --- a/workspace/_demo/tagent.yml +++ b/workspace/_demo/tagent.yml @@ -68,7 +68,7 @@ gateway: # 例: enabled_platforms: [telegram, feishu] pentest: - # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 + # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, 激活后接入真实路由 authorized: false # 法律授权确认 scope_in_targets: [] # IP/domain/URL 白名单(IN) scope_out_targets: [] # 强制黑名单(覆盖 IN)