diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e853399..9c484a3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -347,7 +347,7 @@ jobs: { echo "❌ .gitignore 未排除源 MD"; exit 1; } echo "✅ 敏感本地文档受保护" - - name: 验证 .gitignore 排除精髓库(V1.10 §34) + - name: 验证 .gitignore 排除精髓库 run: | if [ -d "_精髓库" ] || [ -d "**/_精髓库" ]; then echo "❌ 私有知识库不应出现在仓库" @@ -357,7 +357,7 @@ jobs: { echo "❌ .gitignore 未排除私有知识库"; exit 1; } echo "✅ 私有知识库受保护" - # ===== 8. L2 self-test(mock LLM,主宪章 §33,V1.10)===== + # ===== 8. L2 self-test===== selftest-mock: name: L2 self-test · mock LLM e2e runs-on: ubuntu-latest @@ -375,7 +375,7 @@ jobs: # 兜底:确保关键运行时模块在 pip install pydantic pydantic-settings typer rich loguru pyyaml openpyxl factory-boy faker prefect defusedxml prompt-toolkit - - name: L1 frontmatter lint(主宪章 §33 第 1 层) + - name: L1 frontmatter lint run: | python -m runtime.healthcheck.agent_smoke diff --git a/.github/workflows/selftest-weekly.yml b/.github/workflows/selftest-weekly.yml index da1d2840..2f90138e 100644 --- a/.github/workflows/selftest-weekly.yml +++ b/.github/workflows/selftest-weekly.yml @@ -1,4 +1,4 @@ -name: 周自检 · L4 真 LLM e2e(主宪章 §33) +name: 周自检 · L4 真 LLM e2e on: schedule: @@ -12,7 +12,7 @@ permissions: env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true -# 仅 main 分支 push 触发,fork PR 不跑(防 secrets 泄露,主宪章 §30) +# 仅 main 分支 push 触发,fork PR 不跑 jobs: selftest-weekly: name: L4 · 真 LLM e2e diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 99f9b29d..42ca2721 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: trailing-whitespace exclude: \.md$ - id: end-of-file-fixer - # upstream skill 目录不动(主宪章 §14 §27 darwin-skill / karpathy-guidelines 禁本地 fork) + # upstream skill 目录不动 exclude: ^skills/(darwin-skill|karpathy-guidelines)/ - id: check-yaml - id: check-json @@ -56,10 +56,10 @@ repos: pass_filenames: false always_run: true - # 防上游引用误入(主宪章 §29 + V1.10 铁律):上游参考是私有跨项目知识,绝不入 Test-Agent repo + # 防上游引用误入:上游参考是私有跨项目知识,绝不入 Test-Agent repo - id: forbid-essence-library name: 禁止提交 upstream 参考路径(私有跨项目知识) - entry: bash -c 'M=$(git diff --cached --name-only | grep -E "(^|/)_?精髓库(/|$)" || true); if [ -n "$M" ]; then echo "❌ 禁止提交私有上游参考内容(主宪章 §29 + V1.10 铁律)"; echo "命中文件:"; echo "$M"; echo "处置:git rm --cached && 移出 repo"; exit 1; fi' + entry: bash -c 'M=$(git diff --cached --name-only | grep -E "(^|/)_?精髓库(/|$)" || true); if [ -n "$M" ]; then echo "❌ 禁止提交私有上游参考内容"; echo "命中文件:"; echo "$M"; echo "处置:git rm --cached && 移出 repo"; exit 1; fi' language: system pass_filenames: false always_run: true @@ -72,7 +72,7 @@ repos: pass_filenames: false always_run: true - # L1 自检:agent/skill frontmatter lint(主宪章 §33,V1.10) + # L1 自检:agent/skill frontmatter lint - id: agent-frontmatter-lint name: Agent/Skill frontmatter L1 自检 entry: bash -c 'cd "$(git rev-parse --show-toplevel)" && python -m runtime.healthcheck.agent_smoke || exit 1' @@ -102,7 +102,7 @@ repos: # MD040(fenced-code-language) / MD014(dollar-prefix) / MD009(trailing) / MD012(multi-blank) / MD010(hard-tab) # / MD025(single-h1) / MD026(trailing-punct):中文项目常见 nit,与现有 disable 风格一致 args: ['--disable', 'MD013', 'MD033', 'MD041', 'MD036', 'MD022', 'MD031', 'MD032', 'MD024', 'MD034', 'MD040', 'MD014', 'MD025', 'MD026', 'MD050', 'MD049', 'MD007', 'MD035', 'MD038', 'MD039', '--'] - # upstream skill dirs 不改本地:darwin-skill / karpathy-guidelines(主宪章 §14 §27) + # upstream skill dirs 不改本地:darwin-skill / karpathy-guidelines exclude: ^(skills/(darwin-skill|karpathy-guidelines)/.*)$ # 配置:项目根 .markdownlint.json 自定义规则 diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index 3c657be6..dd350ac2 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -5,9 +5,9 @@ --- -## 零、V1.1.0 运行时层(`runtime/`) +## 零、运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 79 脚本**保持不动**(宪章铁律),runtime 仅作可执行调度层。 +> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 79 脚本**保持不动**(宪章规则),runtime 仅作可执行调度层。 | 模块 | 路径 | 用途 | |------|------|------| @@ -16,10 +16,10 @@ | 编排 | `runtime/orchestrator/` | Prefect 2.x flow + Direct 降级执行器(双轨) | | API | `runtime/api/` | FastAPI 入口,多格式输入(PDF/Word/MD/exe/APK/IPA/Docker/URL/口头) | | CLI | `runtime/cli/` | `tagent run|plan|catalog|doctor|search|install|verify|export|selftest|init|demo` | -| 导出 | `runtime/exporters/` | 用例多格式导出:xmind/markmap/opml(V1.9,用户自选) | -| 自检 | `runtime/healthcheck/` | 4 层 selftest:L1 lint/L2 mock/L3 真 LLM/L4 周自检(V1.10) | -| 配置 init | `runtime/init/` | `tagent init` 5 分钟自动组装 .env + tagent.yml + STARTUP.md(V1.12,8 类型 × 6 平台 × 5 LLM × 6 BugTracker × 6 通知 = 8640 组合) | -| 真 agent | `runtime/orchestrator/agents/` | 5 核心 expert LLM 真跑:requirements/automation/test-executor/bug-manager/test-lead(V1.14) | +| 导出 | `runtime/exporters/` | 用例多格式导出:xmind/markmap/opml | +| 自检 | `runtime/healthcheck/` | 4 层 selftest:L1 lint/L2 mock/L3 真 LLM/L4 周自检 | +| 配置 init | `runtime/init/` | `tagent init` 5 分钟自动组装 .env + tagent.yml + STARTUP.md | +| 真 agent | `runtime/orchestrator/agents/` | 5 核心 expert LLM 真跑:requirements/automation/test-executor/bug-manager/test-lead | | 飞轮 | `runtime/storage/` | Postgres+pgvector + MinIO + Alembic | | 观测 | `runtime/observability/` | OpenTelemetry + Loguru | | 配置 | `runtime/config/settings.py` | pydantic-settings(`TAGENT_*` env) | @@ -52,7 +52,7 @@ | `/smoke-test` | `ai/skills/smoke-test.md` | P0 冒烟(≥95% 门禁) | | `/test-coordinator` | `ai/skills/test-coordinator.md` | 完整流程编排(自动平台路由) | | `/regression-test` | `ai/skills/regression-test.md` | P0+P1 回归 + Flaky + JMeter 基线对比 | -| `/testcase-design` | `ai/skills/testcase-design.md` | 默认 4 Sheet Excel,V1.9 加 xmind/markmap/opml 多格式自选 | +| `/testcase-design` | `ai/skills/testcase-design.md` | 默认 4 Sheet Excel | `/python-script-gen` | `ai/skills/python-script-gen.md` | pytest UI/API 脚本 | | `/jmeter-script-gen` | `ai/skills/jmeter-script-gen.md` | JMeter JMX(双模式 ci_quick/full) | | `/data-preparation` | `ai/skills/data-preparation.md` | 测试数据 + JMeter CSV | @@ -271,7 +271,7 @@ --- -## V1.2.0 · MCP 6 件套(2026-05-11) +## · MCP 6 件套(2026-05-11) | MCP server | 路径 | 工具 | |------------|------|------| @@ -283,11 +283,11 @@ | compliance-checker | `runtime/mcp/compliance_checker/` | list_profiles/get_profile/check_compliance | 合规规则插槽:`profiles/compliance/`(10 框架空载 YAML) -Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) +Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + L2 测试套件) --- -## V1.3.0 · Hermes-inspired 5 模块(2026-05-11) +## · Hermes-inspired 5 模块(2026-05-11) | 模块 | 路径 | 用途 | |------|------|------| @@ -297,11 +297,11 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) | 后端 | `runtime/backends/` | local/docker/ssh/singularity/modal/daytona/vercel_sandbox | | Gateway | `runtime/gateway/` | telegram/discord/slack/wechat/feishu/dingtalk/email/webhook | -运行时 prompt 全扫 / 决策不可逆 / 隔离 client / Backend+Platform 抽象。 +运行时 prompt 全扫 / 不可逆操作 / 隔离 client / Backend+Platform 抽象。 --- -## V1.4.0 · 教学层(2026-05-12) +## · 教学层(2026-05-12) 用户部署后可**边用边学**:exec(老手)/learn(新手)/silent(CI) 三模式 + zh/en/zh-en 三语言。 @@ -316,7 +316,7 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) --- -## V1.5.0 · GBrain-inspired 强化(2026-05-12) +## · GBrain-inspired 强化(2026-05-12) | 能力 | 路径 | |------|------| @@ -328,7 +328,7 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) --- -## V1.6.0 · 渗透&安全 + 车载&自动驾驶(2026-05-12) +## · 渗透&安全 + 车载&自动驾驶(2026-05-12) ### 2 新专家(平台扩展 6+7) @@ -356,11 +356,11 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件) 总数:14 专家 → **16** | 14 skill → **32** -installing/upstream-licensing 收录铁律。 +installing/upstream-licensing 收录规则。 --- -## V1.7.0 · Karpathy + ECC + Essence 自动汲取(2026-05-12) +## · Karpathy + ECC + Essence 自动汲取(2026-05-12) ### 新 skill(7 个) @@ -382,11 +382,11 @@ installing/upstream-licensing 收录铁律。 总数:14 → 16 专家 | 14 → **32** skill -marketplace/agent-introspection/essence-watcher 铁律。 +marketplace/agent-introspection/essence-watcher 规则。 --- -## V1.8.0 · byox + Marketplace(2026-05-12) +## · byox + Marketplace(2026-05-12) ### 教学层 KB 扩 13 类 @@ -404,4 +404,4 @@ marketplace/agent-introspection/essence-watcher 铁律。 | `runtime/marketplace/` | catalog + verifier + installer | | CLI `tagent search/list/install/uninstall/verify` | 5 子命令 | -byox/build-your-own 收录铁律。 +byox/build-your-own 收录规则。 diff --git a/CHANGELOG.md b/CHANGELOG.md index 1761cc69..3d9efecc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,7 +67,7 @@ - `/forget` 关键词太短导致误删:最小 3 字符 - `load_memory_md` 异常静默吞噬:改为 `logger.warning` - skill rollout 总数: 16 → 18 (中央 `runtime/tests/test_skill_runners.py` `ALL_SKILL_RUNNERS` 同步加 2 行) -- skill active 数: 30/32 → **32/32** (V1.x SKILL ROLLOUT 完整收尾,0 vision/0 rollout/0 unknown) +- skill active 数: 30/32 → **32/32** (SKILL ROLLOUT 完整收尾,0 vision/0 rollout/0 unknown) - runtime/orchestrator/skills/__init__.py: 聚合 import 新增 agent_introspection_debugging + build_your_own_x_explorer - **P2 能力层 — Agent 交互层 6 项全部实装:** @@ -250,7 +250,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ### Changed - README: 数字诚实化(8640→~12 CI-validated; 95% aspirational→removed; 32 skills→30 active+2 vision) -- 00-项目导航/ROADMAP: 清除12处主宪章内部引用 +- 00-项目导航/ROADMAP: 清除12处内部引用 - generate_report.py: 143行→6 helper functions - mobile_driver.py: 107行→_build_monkey_cmd+_analyze_monkey_log @@ -299,55 +299,55 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ## [v1.32.0] - 2026-05-17 -> **V1.15.0 → V1.32.0 (2026-05-15 ~ 2026-05-17) 共 17 版累积**。 -> expert rollout 收尾 (V1.15-V1.20) + skill rollout 全 16/16 完成 (V1.21-V1.31)。 +> **→ (2026-05-15 ~ 2026-05-17) 共 17 版累积**。 +> expert rollout 收尾 + skill rollout 全 16/16 完成 。 > 版本历史见 [ROADMAP.md](ROADMAP.md#进度跟踪) 进度跟踪表。 -### Added (V1.15-V1.20 · expert rollout 收尾 — 6 expert 真 LLM 落地) +### Added - **6 expert runner LLM-driven** (`runtime/orchestrator/agents/`): - - `env_manager.py` (V1.15) · 测试环境管理 — LLM 读 PRD → env_checks + prep_steps + dependencies + risks - - `mobile_tester.py` (V1.16) · 移动端 — Android/iOS 原生 + 小程序 - - `visual_tester.py` (V1.17) · 视觉/游戏 — 图像识别 + OCR + AI 视觉对比 - - `system_tester.py` (V1.18) · 系统集成 — IoT/串口/MQTT/音视频/追踪/消息队列 - - `pentest_tester.py` (V1.19) · 渗透安全 — 5 攻击域 + recon/vuln/exploit/reporting 4 阶段 - - `automotive_tester.py` (V1.20) · 车载/自动驾驶 — ISO 26262 ASIL + CAN/LIN/FlexRay + ECU/ADAS/IVI/V2X + - `env_manager.py` · 测试环境管理 — LLM 读 PRD → env_checks + prep_steps + dependencies + risks + - `mobile_tester.py` · 移动端 — Android/iOS 原生 + 小程序 + - `visual_tester.py` · 视觉/游戏 — 图像识别 + OCR + AI 视觉对比 + - `system_tester.py` · 系统集成 — IoT/串口/MQTT/音视频/追踪/消息队列 + - `pentest_tester.py` · 渗透安全 — 5 攻击域 + recon/vuln/exploit/reporting 4 阶段 + - `automotive_tester.py` · 车载/自动驾驶 — ISO 26262 ASIL + CAN/LIN/FlexRay + ECU/ADAS/IVI/V2X - expert 16/16 **全 active** (11 production + 5 script, 0 rollout) -### Added (V1.21-V1.31 · skill rollout — 16 skill LLM-driven 全落地) +### Added -- **SkillRunner 基础设施** (V1.21): `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco +- **SkillRunner 基础设施** : `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco - **16 skill runner LLM-driven** (`runtime/orchestrator/skills/`): - - pentest-coordinator (V1.21) · 渗透流程主编排 (5 阶段 + 授权检查 + 子代理池) - - mobile-test (V1.23) · Android/iOS 双平台 + 小程序 - - visual-test (V1.24) · Airtest + OCR + SSIM 视觉对比 - - pentest-recon + pentest-vuln (V1.25) · 侦察 + 漏洞 (5 攻击域 hybrid) - - system-test (V1.26) · IoT/音视频/追踪/消息队列 4 场景 - - eval-harness (V1.27) · pass@k / Jaccard@k / top-1 stability / latency + 安全护栏 - - pentest-api + pentest-web (V1.28) · API 安全 + Web 应用安全 - - pentest-exploit + pentest-report (V1.29) · PoC 验证 + 报告生成 - - automotive-test + automotive-can-bus-test (V1.30) · 10 阶段主编排 + CAN 协议测试 - - automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test (V1.31) · ADAS 场景 + OTA 升级 + HIL 环测 + - pentest-coordinator · 渗透流程主编排 (5 阶段 + 授权检查 + 子代理池) + - mobile-test · Android/iOS 双平台 + 小程序 + - visual-test · Airtest + OCR + SSIM 视觉对比 + - pentest-recon + pentest-vuln · 侦察 + 漏洞 (5 攻击域 hybrid) + - system-test · IoT/音视频/追踪/消息队列 4 场景 + - eval-harness · pass@k / Jaccard@k / top-1 stability / latency + 安全护栏 + - pentest-api + pentest-web · API 安全 + Web 应用安全 + - pentest-exploit + pentest-report · PoC 验证 + 报告生成 + - automotive-test + automotive-can-bus-test · 10 阶段主编排 + CAN 协议测试 + - automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test · ADAS 场景 + OTA 升级 + HIL 环测 - skill 23/32 production + 7 script + 0 rollout + 2 vision -### Added (V1.22 · CLI + 多 provider) +### Added (CLI + 多 provider) - **tagent config CLI** (`runtime/cli/config.py`): `tagent config use/set/unset/list/show` - **多 provider 通用 env 通道**: `LLM_PROVIDER` + `LLM_API_KEY` + `LLM_MODEL` 通用 env, 6 provider 内置 (claude/openai/gemini/qwen/deepseek/ollama) - Stub 扩 4 path 支持 vendor-neutral 多厂商 routing -### Added (MCP 6 件套 · V1.2.0) +### Added (MCP 6 件套 · ) - `runtime/mcp/` 6 MCP server: test-orchestrator / protocol-adapter / evidence-vault / defect-tracker / knowledge-base / compliance-checker -### Added (Web UI · V1.2.0) +### Added (Web UI · ) - `runtime/web/` 4 页: Upload / Run Status / Report / Catalog (React 18 + Vite 5 + shadcn/ui + TanStack Query v5) ### Changed - **数字升级**: 14 expert → 16 expert, 14 skill → 32 skill -- **防 mock 双 layer** (V1.14.0+1): registry 单源 frontmatter + router warn + orchestrator hard block +- **防 mock 双 layer** : registry 单源 frontmatter + router warn + orchestrator hard block - vendor-neutral 命名规范 (禁 zhipu/deepseek/openai 厂商名) - README/ROADMAP 数字诚实化 (多轮校准) @@ -361,11 +361,11 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ## [v1.14.0] - 2026-05-12 -> **首次正式版本切节**(W7-2, 2026-05-14): V1.1.0 到 V1.14.0 共 13 个内部 alpha 累积归入本节。后续新变更入 [Unreleased]。 +> **首次正式版本切节**(W7-2, 2026-05-14): 到 共 13 个内部 alpha 累积归入本节。后续新变更入 [Unreleased]。 -### Added(V1.14.0 · 5 核心 expert 真 LLM 落地 + 录制脚本 · 2026-05-12) +### Added(5 核心 expert 真 LLM 落地 + 录制脚本 · 2026-05-12) -> 起因:战略参谋诚实交底——V1.13 的 selftest 100% PASS 是"骨架通"不是"内涵通",16 expert 里 11 个仍是 no-op。用户授权 C 路线(5 核心 expert 真 LLM)+ B(录制脚本)。 +> 起因:战略参谋诚实交底——的 selftest 100% PASS 是"骨架通"不是"内涵通",16 expert 里 11 个仍是 no-op。用户授权 C 路线(5 核心 expert 真 LLM)+ B(录制脚本)。 - **`runtime/orchestrator/agents/` 新模块**: - `base.py`:`AgentRunner` ABC + `RunnerContext` + `RunnerResult` + `AGENT_RUNNERS` registry + `@register` + `get_runner` @@ -375,7 +375,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **真 LLM** 时:调 `aux_client.complete()` → 解析 JSON → 落盘 → 给下游 - INDEX.md 文档化 5 runner schema + 上下游 - **adapter wiring**(`runtime/orchestrator/adapters/experts.py`): - - `execute_node` 先查 `AGENT_RUNNERS`(优先 V1.14),fallback `SCRIPT_MAP`(主宪章 §9 不破坏) + - `execute_node` 先查 `AGENT_RUNNERS`(优先 ),fallback `SCRIPT_MAP` - `_upstream_outputs` 缓存:每 runner 产物给下游 RunnerContext.upstream - `reset_upstream_cache()` 由 flow 每 run 开头调 - SCRIPT_MAP 路径排除 `artifact_text/lang/mode` 防多行文本炸 argparse @@ -391,12 +391,12 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `_demo-commands.sh`:实际 demo 命令序列(被 record-demo-* 调) - `record-demo-asciinema.sh`:`asciinema rec` 自动录,产 .cast 可上传 asciinema.org 或转 GIF/SVG - `record-demo-obs.sh`:OBS / QuickTime 屏幕录制配套(用户摁录制 → 跑此脚本,节奏自动) - - `docs/assets/terminalizer-config.yml`:精修 V1.14 配置(Catppuccin Mocha 主题 + UTF-8 + stub LLM env) -- **主宪章 §40 真 agent 落地 canon**:5 核心 + 11 fallback + 加新 runner 流程 + RunnerContext / RunnerResult 协议 -- 烟测:**9/9 strict PASS · 5 真 runner 产物全落盘**(原 V1.13 8/8 是 3 script + 5 no-op,V1.14 是 5 真 runner + 3 script + 1 no-op) -- 版本 V1.13.0 → **V1.14.0** + - `docs/assets/terminalizer-config.yml`:精修 配置(Catppuccin Mocha 主题 + UTF-8 + stub LLM env) +- **真 agent 落地 canon**:5 核心 + 11 fallback + 加新 runner 流程 + RunnerContext / RunnerResult 协议 +- 烟测:**9/9 strict PASS · 5 真 runner 产物全落盘**(原 8/8 是 3 script + 5 no-op +- 版本 → **** -### Added(V1.13.0 · README hero 重写 + `tagent demo` + 30 秒 demo 录制脚本 · 2026-05-12) +### Added(README hero 重写 + `tagent demo` + 30 秒 demo 录制脚本 · 2026-05-12) - **`tagent demo` 子命令**:0 API key / 0 配置一键跑通 4 步——init minimal preset + L1 lint + L2 e2e + 产物清单 - 自动 stub LLM + 重置 settings 缓存,避免 `_kernel` 模块加载顺序问题 @@ -411,9 +411,9 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - 渠道适配:Twitter/X · 微信视频号 · 掘金/V2EX · Hacker News(同一份素材 4 平台) - **00-导航 同步**:CLI 行加 `demo` 子命令 - 烟测 `tagent demo` 产 36+ 文件全过 · L1/L3 strict 不破 -- 版本 V1.12.0 → **V1.13.0** +- 版本 → **** -### Added(V1.12.0 · `tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 2026-05-12) +### Added(`tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 2026-05-12) - **新模块 `runtime/init/`**: - `matrix.py`:`load_matrix()` 加载 `config/templates/matrix.yaml`(单源真理) @@ -433,30 +433,30 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - 测试类型:web/api/mobile/desktop/iot/car/ai_model/security - 平台:linux/windows/mac/android/ios/embedded - LLM:claude/openai/qwen/deepseek/ollama - - BugTracker(主宪章 §37):zentao/jira/github/gitlab/linear/webhook - - 通知(主宪章 §36):wechat/feishu/dingtalk/slack/email/teams(可多选) + - BugTracker:zentao/jira/github/gitlab/linear/webhook + - 通知:wechat/feishu/dingtalk/slack/email/teams(可多选) - **加新选项**:改 `matrix.yaml` 一处,wizard/CLI 自动列出(无需改代码) - **STARTUP.md 启动指南**:含填占位清单 + 装依赖 hint + 健康检查 + 烟雾跑通命令 + 推荐 skill 顺序 + 出错对照表 - 烟测:5 preset × 全过 + 8 测试类型组合全过 - L1 + L3 strict 不破:agents=16/16 skills=32/≥25 + selftest 8/8 100% -- 版本 V1.11.0 → **V1.12.0** +- 版本 → **** -### Fixed(V1.11.0 · 同步铁律批改 + BugTracker/多端 canon + n7 修 · 2026-05-12) +### Fixed(同步规则批改 + BugTracker/多端 canon + n7 修 · 2026-05-12) -- **同步铁律(§1)执行**:17 文件批改"三端通知"→"多端通知";"禅道 Bug 提交"项目级框架→"BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook)" +- **同步规则()执行**:17 文件批改"三端通知"→"多端通知";"禅道 Bug 提交"项目级框架→"BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook)" - `00-项目导航.md` · `agents/{01,07,08,09}.md` · `agents/README.md` · `skills/{README,test-coordinator,zentao-bug-submission}.md` · `config/mcp-server-impl.md` · `utils/{README.md,api_retry_util.py}` · `ci/{INDEX,CICD集成说明}.md` · `docs/getting-started/{交付物清单,使用手册,配置清单}.md` · `examples/web-demo/README.md` · `CONTRIBUTING.md` · `FULL_GUIDE.md` -- **adapter 修 V1.10 n7 bug**:`runtime/orchestrator/adapters/experts.py` 加 `SCRIPT_DEFAULT_ARGS` + `_ensure_fixture()` 通用机制 +- **adapter 修 n7 bug**:`runtime/orchestrator/adapters/experts.py` 加 `SCRIPT_DEFAULT_ARGS` + `_ensure_fixture()` 通用机制 - 现 `tagent selftest --e2e --strict` **100% PASS 8/8**(原 88% 7/8) - generate_report.py 默认注入 `--data=workspace/测试报告/{项目名}/_selftest_summary.json`,fixture 自动生成 -- **主宪章扩**: - - §36 多端通知 canon(扩 §6,6 渠道权威清单 + env 字段 + 业务语言铁律) - - §37 BugTracker canon(扩 §12,6 adapter 权威清单 + measurement env + 措辞规范) - - §10 五铭文 + §6 MCP 接入:"三端通知" → "多端通知" +- **扩**: + - 多端通知 canon(扩 ,6 渠道权威清单 + env 字段 + 业务语言规则) + - BugTracker canon(扩 ,6 adapter 权威清单 + measurement env + 措辞规范) + - 五铭文 + MCP 接入:"三端通知" → "多端通知" - VERSION:1.10.0 → **1.11.0** -### Added(V1.10.0 · 4 层自检 + 精髓库三重防线 + 字体粗细 · 2026-05-12) +### Added(4 层自检 + 精髓库三重防线 + 字体粗细 · 2026-05-12) -- **4 层自检铁律(主宪章 §33)**: +- **4 层自检规则**: - L1 frontmatter lint(无 LLM):`runtime/healthcheck/agent_smoke.py` + pre-push hook - L2 CI mock e2e(stub LLM,0 成本):`selftest-mock` job 每 push 跑 - L3 真 LLM(~$4/release):`tagent doctor --agents --probe` + `tagent selftest --e2e` @@ -466,20 +466,20 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `tagent doctor --agents [--probe]`:L1 + 可选 L3 LLM ping 16 agent - `tagent selftest --e2e [--strict] [--pass-threshold 0.80]`:整体 e2e - **`LLMClient.complete()`**:plain text completion(原仅 `complete_json`),probe 用 -- **上游引用三重防线(主宪章 §34)**: +- **上游引用三重防线**: - `.gitignore` 加 upstream 排除规则 - - pre-commit hook 防止误入(主宪章 §29 + V1.10 铁律) + - pre-commit hook 防止误入 - CI file-count job 双校验 -- **字体粗细统一(主宪章 §35)**:`docs/STYLE.md`(标题 ≤3 级,`**bold**` 仅 3 场景,中英空格) +- **字体粗细统一**:`docs/STYLE.md`(标题 ≤3 级,`**bold**` 仅 3 场景,中英空格) - **补缺顶级 INDEX**:`docs/INDEX.md` + `examples/INDEX.md` + `profiles/INDEX.md` + `scripts/INDEX.md` - **pre-tag hook**:`scripts/git-pre-tag.sh` 卡 `git tag v1.x`(7 天内必须有 L3 log) - **fixture**:`examples/_smoke_prd.md` 触发完整 16 agent DAG -- 主宪章扩 §33/§34/§35;VERSION 1.9.0 → **1.10.0** +- 扩 //;VERSION 1.9.0 → **1.10.0** - 烟雾测试:L1 16/16+32/≥25 全过;L2 stub e2e 88% PASS(7/8 节点) -### Added(V1.9.0 · 用例多格式导出 · 用户自选 · 2026-05-12) +### Added(用例多格式导出 · 用户自选 · 2026-05-12) -- **`runtime/exporters/` 新模块**(对标主宪章 §5 多格式 I/O): +- **`runtime/exporters/` 新模块**: - `base.py`:`TestCaseTree` + `TestCaseNode` IR + `Exporter` ABC + `REGISTRY` + `@register` 装饰器 - `xmind.py`:XMind 8/ZEN/2020+ `.xmind`(ZIP:content.json + metadata.json + manifest.json,P0→priority-1 marker 自动转,无第三方 lib) - `markmap.py`:Markmap `.md`(frontmatter + nested headings/list,GitHub README 直渲,markmap.js / VSCode 插件兼容) @@ -492,13 +492,13 @@ _后续累积变更入此节;切版本时移到下方版本节。_ tagent export plan.json --format opml --out workspace/测试用例/login.opml tagent export plan.json --format all --out-dir workspace/测试用例/ ``` -- **`/testcase-design` skill 扩**:description 加多格式声明;末尾加 V1.9 思维导图 / 大纲段(Excel 仍是默认) -- **保留**:Excel 4-Sheet(`utils/excel_generator.py`)不动,§27 简洁优先 +- **`/testcase-design` skill 扩**:description 加多格式声明;末尾加 思维导图 / 大纲段(Excel 仍是默认) +- **保留**:Excel 4-Sheet(`utils/excel_generator.py`)不动 - **扩展点 P2 留位**:freemind / plantuml / mermaid-mindmap(按需加) - 烟雾测试:3 exporter × sample TestCaseTree 全过(content.json 解析正常 / OPML XML 解析正常 / Markmap frontmatter 完整) -- 版本 V1.8.0 → V1.9.0 +- 版本 → -### Added(V1.8.0 · build-your-own-x 教学扩 + Marketplace 4 lane · 2026-05-12) +### Added(build-your-own-x 教学扩 + Marketplace 4 lane · 2026-05-12) - **上游参考扩**:`build-your-own-x.md`(codecrafters/build-your-own-x curated list 萃取);加 INDEX 条目 - **教学层 KB 扩 13 大类**(原 12 → 13,加 `13-build-your-own/`): @@ -509,36 +509,36 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `marketplace/{skills,agents,mcp,hooks}/` 目录 - `marketplace/INDEX.md` + `registry.json` + `_safety_policy.yaml`(4 关安全门 + 3 信任级源) - `runtime/marketplace/`:catalog.py + verifier.py + installer.py + INDEX - - 4 关安全门:签名校验(SHA256/ed25519) + 注入扫(复用 §22 scheduler 模块) + 沙箱试跑(Docker network=none) + darwin 评分(≥75) + - 4 关安全门:签名校验(SHA256/ed25519) + 注入扫(复用 scheduler 模块) + 沙箱试跑(Docker network=none) + darwin 评分(≥75) - **CLI 加 5 子命令**:`tagent search/list/install/uninstall/verify` -- **主宪章 §30**:Marketplace 安全栅栏(4 关铁律 + 3 信任级 + safe-by-default + 不复制官方源 + 卸载只归档 + 紧急 kill switch) -- **主宪章 §31**:教学层扩 13 大类(byox P0/P1/P2 分档 + 预算检查 + essence_only policy) +- ****:Marketplace 安全栅栏(4 关规则 + 3 信任级 + safe-by-default + 不复制官方源 + 卸载只归档 + 紧急 kill switch) +- ****:教学层扩 13 大类(byox P0/P1/P2 分档 + 预算检查 + essence_only policy) - TOC 同步;skill 数升级 -- 版本 V1.7.0 → V1.8.0 +- 版本 → -### Added(V1.7.0 · Karpathy 4 原则 + ECC 测试加固 + Essence 自动汲取 · 2026-05-12) +### Added(Karpathy 4 原则 + ECC 测试加固 + Essence 自动汲取 · 2026-05-12) - **上游参考扩 2 条目**: - `karpathy-skills.md`(125k★ · LLM 写代码 4 原则元层) - `everything-claude-code.md`(179k★ · AI agent harness 性能优化 200 skill / 53 agent / Homunculus instincts / Selective install) -- **Karpathy 4 原则**(主宪章 §27,元层贯穿):Think Before / Simplicity First / Surgical Changes / Goal-Driven Execution;`skills/karpathy-guidelines/SKILL.md` 部署 upstream 原文(类 darwin-skill 不改本地) -- **ECC 6 测试 skill 入库**(对测试有用的,§28): +- **Karpathy 4 原则**:Think Before / Simplicity First / Surgical Changes / Goal-Driven Execution;`skills/karpathy-guidelines/SKILL.md` 部署 upstream 原文(类 darwin-skill 不改本地) +- **ECC 6 测试 skill 入库**(对测试有用的 - `tdd-workflow` · TDD 80%+ 覆盖 - `verification-loop` · 5-phase verify(build→typecheck→lint→test→coverage) - `e2e-testing` · Playwright + 2FA/TOTP/SSO + 视觉回归 + 录屏 - `eval-harness` · pass@k / Jaccard@k / top-1 / latency Δ - - `security-review` · 代码层白盒 5 维(与 §25 pentest 应用层互补) + - `security-review` · 代码层白盒 5 维(与 pentest 应用层互补) - `agent-introspection-debugging` · 决策回放 + OTel + token + 上下文 -- **Essence 自动汲取**(主宪章 §29):`runtime/essence_watcher/` +- **Essence 自动汲取**:`runtime/essence_watcher/` - parser + tracker(gh API)+ delta_extractor(aux LLM)+ runner - 周期跑;新 commit → LLM 萃取 delta → 写 upstream update 文件 标 `llm-draft-unreviewed` 待审 - `apply_policy.example.yaml`:auto_propose / essence_only / never 三档 - safe-by-default:`tagent.yml essence_watcher.enabled: true` 才跑 -- **主宪章新增 3 节**:§27 Karpathy 4 原则 / §28 ECC 测试加固 / §29 Essence 自动汲取 + TOC 同步 +- **新增 3 节**:Karpathy 4 原则 / ECC 测试加固 / Essence 自动汲取 + TOC 同步 - 数字:14 skill → **32**(原 14 + 7 pentest + 5 automotive + 6 ECC) + `karpathy-guidelines/SKILL.md` upstream 1 个 -- 版本 V1.6.0 → V1.7.0 +- 版本 → -### Added(V1.6.0 · 渗透&安全 + 车载&自动驾驶 双垂直专家+skill 集 · 2026-05-12) +### Added(渗透&安全 + 车载&自动驾驶 双垂直专家+skill 集 · 2026-05-12) - **上游参考扩**:`pentest-ai-agents.md` 合并萃取 pentagi(黑盒)+ shannon(白盒);10 节;含对比表+应用 checklist - **2 新专家**: @@ -548,25 +548,25 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `pentest-coordinator`(主)/ `pentest-recon` / `pentest-vuln` / `pentest-exploit` / `pentest-web` / `pentest-api` / `pentest-report` - **5 新 automotive skill**: - `automotive-test`(主)/ `automotive-can-bus-test` / `automotive-adas-scenario` / `automotive-ota-update-test` / `automotive-hil-loop-test` -- **主宪章 §25**:渗透 & 安全测试强化(规则化:授权前置 / scope 防护 / prod 禁 / 沙箱 / PoC-only / 不可逆禁止 / 责任披露 / PII scrub) -- **主宪章 §26**:车载 & 自动驾驶强化(规则化:ASIL C/D 必 HIL / L4 极深 / OTA 必回退 / 公开道路授权 / 录波 MDF4 / PII 禁存 / 领域档案签字) -- **主宪章 §2 升级**:专家 14 → 16(核心 9 + 平台扩展 7) -- **TOC 同步**:加 §25 §26 +- ****:渗透 & 安全测试强化(规则化:授权前置 / scope 防护 / prod 禁 / 沙箱 / PoC-only / 不可逆操作 / 责任披露 / PII scrub) +- ****:车载 & 自动驾驶强化(规则化:ASIL C/D 必 HIL / L4 极深 / OTA 必回退 / 公开道路授权 / 录波 MDF4 / PII 禁存 / 领域档案签字) +- **升级**:专家 14 → 16(核心 9 + 平台扩展 7) +- **TOC 同步**:加 - 数字:14 expert → **16** | 14 skill → **26**(7 pentest + 5 automotive 新增) -- 版本 V1.5.0 → V1.6.0 +- 版本 → -### Added(V1.5.0 · GBrain-inspired 强化 + 跨项目精髓库扩 · 2026-05-12) +### Added(GBrain-inspired 强化 + 跨项目精髓库扩 · 2026-05-12) - **上游参考扩**:`gbrain.md`(完整 10 节萃取,300+ 行)+ INDEX 更新 - **KB 自连图谱**:`runtime/tutor/graph.py`,零 LLM 抽取 typed link(6 种边:related_to/superseded_by/extends/prerequisite_of/contradicts/tool_implements);BFS walk + backlink-boosted ranking。实测 12 卡 → 40 edges + 44 nodes - **eval 回放**:`runtime/tutor/eval_replay.py`,`TAGENT_EVAL_CAPTURE=1` opt-in;PII 自动 scrub(email/phone/SSN/API-key/card 6 类正则);replay 3 数(Jaccard@k/top-1 stability/latency Δ);默认 off - **safe-by-default yaml 栅栏**:`runtime/config/safety.py` + `tagent.yml.example`;scheduler/curator/backends/gateway/destructive_ops 默认 deny;`assert_allowed` / `gate_*` 工厂函数;缺配置 → `SafeByDefaultBlocked` 异常 -- **主宪章 §24**:GBrain-inspired 强化(自连图谱 + 混合检索 + eval 回放 + safe-by-default + PII 单源)+ TOC 同步 -- 版本 V1.4.0 → V1.5.0 +- ****:GBrain-inspired 强化(自连图谱 + 混合检索 + eval 回放 + safe-by-default + PII 单源)+ TOC 同步 +- 版本 → -### Added(V1.4.0 · 教学层 · 用户边用边学 · 2026-05-12) +### Added(教学层 · 用户边用边学 · 2026-05-12) -- **主宪章 §23 教学层准则**:exec(老手)/learn(新手)双模式 + 反幻觉 3 层 + 双语切换 + 持续累积 +- **教学层准则**:exec(老手)/learn(新手)双模式 + 反幻觉 3 层 + 双语切换 + 持续累积 - **Theory KB**:`docs/theory/`,12 大类目录(工具/编程/基础理论/策略/方法/协议/平台/门禁/安全/AI测试/合规/流程) - `_schema.yaml`:卡片字段定义(id/category/level/authority/confidence/last_reviewed) - `_authority_sources.yaml`:权威源白名单(国际 ISTQB/IEEE/ISO/IEC/NIST/OWASP/MITRE/Google/Microsoft/Fowler/arXiv/ICSE/ISSTA + 中国 GB/T/等保/阿里/腾讯/美团/字节/CCF + AI HF/Anthropic/OpenAI/DeepEval + 经典书 Beizer/Myers/Crispin/Kaner) @@ -582,9 +582,9 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - **CLI**:`tagent run --mode exec|learn|silent --lang zh|en|zh-en` - **API**:`POST /run/text?mode=&lang=` query 参数 - **反幻觉**:实测 unknown-id 正确标记"该领域未收录,慎用" -- 版本 V1.3.0 → V1.4.0 +- 版本 → -### Added(V1.3.0 · Hermes-inspired 5 模块 + 跨项目精髓库 · 2026-05-11) +### Added(Hermes-inspired 5 模块 + 跨项目精髓库 · 2026-05-11) - **跨项目上游参考**:已建立 - `INDEX.md`:精髓库索引 @@ -595,51 +595,51 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `runtime/learning_loop/`:封闭学习循环(curator 闲置触发 + FTS5 跨会话搜 + 用户画像);`curator.py / session_search.py / user_model.py`;只归档不删 - `runtime/backends/`:7 执行后端(`local/docker/ssh/singularity/modal/daytona/vercel_sandbox`);统一 `BaseExecutionEnv` 7 方法;Modal/Daytona 提供 serverless hibernate - `runtime/gateway/`:多平台 messaging(`telegram/discord/slack/wechat/feishu/dingtalk/email/webhook` 8 平台);统一 `Platform.send/configure`;`session.py` 跨平台对话连续 -- **主宪章 §22**:Hermes-inspired 扩展能力章节(规则化);TOC 同步更新 -- 版本 V1.2.0 → V1.3.0 +- ****:Hermes-inspired 扩展能力章节(规则化);TOC 同步更新 +- 版本 → -### Added(V1.2.0 · M2 MCP 6 件套 + Web UI + 真模型路由 + 飞轮回灌 · 2026-05-11) +### Added(M2 MCP 6 件套 + Web UI + 真模型路由 + 飞轮回灌 · 2026-05-11) -- **MCP 6 件套全部实现**(主宪章 §16): +- **MCP 6 件套全部实现**: - `runtime/mcp/test_orchestrator/`:包装 runtime/router + orchestrator,5 工具(catalog/plan/run/status/report);Claude Code 可直接调用 - `runtime/mcp/protocol_adapter/`:统一 ProtocolAdapter 抽象 + 5 起步 adapter(HTTP/gRPC/WS/MQTT/Kafka);HTTP 实测 ping 通过 - `runtime/mcp/evidence_vault/`:证据归档 5 工具(upload_evidence/upload_evidence_path/list/get/search),MinIO + Postgres - - `runtime/mcp/defect_tracker/`:工单桥 5 工具(create/get/update/query_bugs/list_trackers),默认 zentao + 预留扩展位(主宪章 §12 契约) + - `runtime/mcp/defect_tracker/`:工单桥 5 工具(create/get/update/query_bugs/list_trackers),默认 zentao + 预留扩展位 - `runtime/mcp/knowledge_base/`:pgvector 向量检索 4 工具(embed/index_case/index_defect/search_similar),LiteLLM embedding + stub 兜底 - `runtime/mcp/compliance_checker/`:行业合规规则 3 工具(list_profiles/get_profile/check_compliance);10 框架 profile 起步空载(SOC2/PCI-DSS/HIPAA/IEC 62304/IEC 61508/ISO 26262/DO-178C/GDPR/PIPL/CCPA) - - 共享基类 `runtime/mcp/base.py`:make_server / run_stdio / @tool_decision_logged(决策落 `workspace/测试报告/{项目名}/decisions/` 符合主宪章 §18-12) + - 共享基类 `runtime/mcp/base.py`:make_server / run_stdio / @tool_decision_logged - **行业合规规则插槽** `profiles/compliance/`:10 框架空载示例 YAML,真规则由领域专家+test-lead 双签签字后入库 - **飞轮回灌路由**(M2-9):`runtime/router/retrieval.py` 历史相似用例 → LLM prompt few-shot;router 透明集成,无 KB 时降级 -- **真模型路由测试套件**(M2-7):`runtime/tests/test_router_real.py` 20 样本(4 类型 × 5)真模型测试;门槛单模型 ≥85%、双模型投票 ≥95%;无 API key 自动 skip;失败自动落 decisions/ 含 seed+模型版本+输入快照(主宪章 §21 横切准则) +- **真模型路由测试套件**(M2-7):`runtime/tests/test_router_real.py` 20 样本(4 类型 × 5)真模型测试;门槛单模型 ≥85%、双模型投票 ≥95%;无 API key 自动 skip;失败自动落 decisions/ 含 seed+模型版本+输入快照 - **Web UI MVP**(M2-8):`runtime/web/` Vite+React 18+TypeScript+shadcn/ui+TanStack Query+React Router v7 - 4 页:Upload(text/file/URL 三模式) / RunStatus(SSE 进度条) / Report(节点结果表) / Catalog(14 专家+14 skill) - - §21 L2 必测项:Playwright E2E 7 用例(功能+边界+异常+兼容+可访问性);axe-core a11y 0 critical 门槛 + - L2 必测项:Playwright E2E 7 用例(功能+边界+异常+兼容+可访问性);axe-core a11y 0 critical 门槛 - 配套 vite 代理 `/api` → FastAPI(:8800) - **`.mcp.json` 升级**:启用 `filesystem` + `test-orchestrator`;其他 5 件套写入 `_pending_servers_v1_2_0_alpha` 段供按需启用 -- 版本 V1.1.0 → V1.2.0 - -### Added(V1.1.0 · 宪章合一 · darwin-skill 入库 · 2026-05-11) - -- **主宪章扩展(memory `project_test_agent_workflow.md`)**:原 §0-§9 + How to apply 1-6 **字符级保留**;新增 §10-§20 仅承载规则/要求/约束(剔除示例/枚举/参考表): - - §10 灵魂底色:三公理 + 五条铭文 + V1.0.0 锁死 + 双签解锁条件 - - §11 FULL_GUIDE.md 定位补充(优先级链:memory > FULL_GUIDE > README) - - §12 多 Bug Tracker(默认 zentao + 扩展位 `BugTrackerBase` 契约) - - §13 按需安装 + 运行时补装铁律 - - §14 darwin-skill 自进化(棘轮 + Via Negativa 不消费运行数据) - - §15 AgentChat 协作协议(test-lead 中枢 + 反问 3 级预算 + 争议未落档不签发) - - §16 MCP 服务扩展位(6 件套 Phase 2) - - §17 九大簇维度边界(认知地图;承认存在不假装能交付) - - §18 测试架构 + 5 层门禁分层 + Flaky vs Reruns 哲学 - - §19 闭环约定 18 条(扩展 §8 质量闭环) - - §20 Phase 触发条件(不绑月份) +- 版本 → + +### Added(宪章合一 · darwin-skill 入库 · 2026-05-11) + +- **扩展(memory `project_test_agent_workflow.md`)**:原 -+ How to apply 1-6 **字符级保留**;新增 -仅承载规则/要求/约束(剔除示例/枚举/参考表): + - 灵魂底色:三公理 + 五条铭文 + 锁死 + 双签解锁条件 + - FULL_GUIDE.md 定位补充(优先级链:memory > FULL_GUIDE > README) + - 多 Bug Tracker(默认 zentao + 扩展位 `BugTrackerBase` 契约) + - 按需安装 + 运行时补装规则 + - darwin-skill 自进化(棘轮 + Via Negativa 不消费运行数据) + - AgentChat 协作协议(test-lead 中枢 + 反问 3 级预算 + 争议未落档不签发) + - MCP 服务扩展位(6 件套 Phase 2) + - 九大簇维度边界(认知地图;承认存在不假装能交付) + - 测试架构 + 5 层门禁分层 + Flaky vs Reruns 哲学 + - 闭环约定 18 条(扩展 质量闭环) + - Phase 触发条件(不绑月份) - How to apply 7-12 扩展项(铭文优先级 / 决策可追溯 / 纪要不可删 / darwin 棘轮 / 依赖补装反问 / 修改四关) -- **行业适配参照表全删除**(主宪章 + FULL_GUIDE 双删) +- **行业适配参照表全删除** - **darwin-skill 入库**:`skills/darwin-skill/` 完整部署(SKILL.md + scripts/ + templates/ + assets/ + docs/),upstream 原文不改;13 Skill → 14 Skill -- **FULL_GUIDE.md 优化**:三公理/铭文 + 18 闭环段替换为"已迁主宪章 §X"指引(避免双份维护);Bug Tracker / 按需安装 / darwin / AgentChat 详节保留作为深度参考;附 runtime 章节(M1-11 留存) +- **FULL_GUIDE.md 优化**:三公理/铭文 + 18 闭环段替换为"已迁X"指引(避免双份维护);Bug Tracker / 按需安装 / darwin / AgentChat 详节保留作为深度参考;附 runtime 章节(M1-11 留存) -### Added(V1.1.0 · 运行时层) +### Added(运行时层) -- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 67 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章铁律),`runtime/` 仅作调度层。 +- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 67 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章规则),`runtime/` 仅作调度层。 - `runtime/router/`:AI 路由(LiteLLM 多厂商:Claude/OpenAI/Gemini/Qwen/DeepSeek/Ollama)。被测物 → 专家+Skill DAG。含 stub provider 供 CI 离线测,准确率 5/5 类型(web/api/mobile/desktop/ai-model) - `runtime/registry/`:扫 `agents/*.md` + `skills/*.md` frontmatter 生成统一目录(14 expert + 13 skill,实测通过) - `runtime/orchestrator/`:**双轨**——Prefect 2.x flow(全功能,带 UI/重试/状态机)+ Direct 执行器(无 Prefect 也能跑,ThreadPoolExecutor 并发,降级方案) @@ -679,8 +679,8 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - 新建 `CHANGELOG.md` + `VERSION` 文件,启动语义版本管理。 - **W3 信息架构重塑**: - - `README_DETAIL.md` 改名为 `FULL_GUIDE.md`(宪章§0 文件分发策略:README.md 简明入口 ≤ 200 行 / FULL_GUIDE.md 详细指南) - - 新建 `docs/getting-started/INDEX.md` / `config/INDEX.md` / `ci/INDEX.md`(宪章§3 每目录索引;02/03/05 已有 README.md 等价于 INDEX) + - `README_DETAIL.md` 改名为 `FULL_GUIDE.md`(宪章文件分发策略:README.md 简明入口 ≤ 200 行 / FULL_GUIDE.md 详细指南) + - 新建 `docs/getting-started/INDEX.md` / `config/INDEX.md` / `ci/INDEX.md`(宪章每目录索引;02/03/05 已有 README.md 等价于 INDEX) - `README.md` 头加项目代号 `test-agent-team` + 版本 + License - `README.md` 删除三视角矩阵段(迁移至 FULL_GUIDE.md,避免双份维护) - `README.md` 行数从 240 降至 168 行 @@ -689,7 +689,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `.pre-commit-config.yaml` 加 gitleaks hook(凭据扫描) - `.gitignore` 补漏:`.ruff_cache/` / `*.jtl` / `*.pem` / `*.key` / `*.crt` / `*.p12` / `*.pfx` / `*.jks` / `id_rsa` / `id_ed25519` / `coverage.xml` / `pip-wheel-metadata/` - **W3 收尾 · 方法论沉淀(F'+J+K)**: - - `CONTRIBUTING.md` 末尾追加:**同步铁律段**(联动改动清单速查 + 自动化保障)+ **RACI 协作矩阵浓缩版**(14 专家 × 35 测试维度,含责任边界冲突解决与质量门禁联动) + - `CONTRIBUTING.md` 末尾追加:**同步规则段**(联动改动清单速查 + 自动化保障)+ **RACI 协作矩阵浓缩版**(14 专家 × 35 测试维度,含责任边界冲突解决与质量门禁联动) - `FULL_GUIDE.md` 末尾追加:**测试架构合理性深度章节**(6 子节:金字塔 2024 现代版 / Shift-Left 7 层 / Shift-Right 9 层 / 可观测三柱 + 测试可视化 / 五层质量门禁 + Flaky vs Reruns 哲学 / 调整路径 Phase 2-4 落地点) - 新建 `examples/web-demo/`:8 文件最小可跑 Web 测试示例(pytest + Playwright + Page Object,演示 `https://playwright.dev`,5 分钟跑通) - `FULL_GUIDE.md:395` 漏修补救:`utils/*.py(12 个)` → `67 个,含 __init__.py` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4692f02e..0d0ad511 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -134,7 +134,7 @@ pytest --collect-only --- -## 同步铁律(项目宪章 §1) +## 同步规则(项目宪章 ) 任一文档/代码改动 → 必须同步到所有引用方,并加 `CHANGELOG.md` 条目。 diff --git a/FULL_GUIDE.md b/FULL_GUIDE.md index a686b8e7..1cb27d86 100644 --- a/FULL_GUIDE.md +++ b/FULL_GUIDE.md @@ -5,7 +5,7 @@ > **维护原则**:决策入档、开放问题入档、不打脸的承诺才写。重大决策须更新「📋 开放问题」与「🗺️ 项目当前状态」两节。 **项目名称**:`Test-Agent`(内部代号 `test-agent-team`) -**当前阶段**:Phase 2 前期(V1.0.0 · 16 expert + 32/32 skill active (11 production + 5 script-backed) + 0 rollout + 0 vision) +**当前阶段**:Phase 2 前期(16 expert + 32/32 skill active (11 production + 5 script-backed) + 0 rollout + 0 vision) **版本**:V1.0.0(详见 [VERSION](VERSION) + [CHANGELOG.md](CHANGELOG.md)) **更新日期**:2026-06-04 **模型**:Claude 4.x 系列(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理) diff --git a/README.md b/README.md index 0a7cf28d..74e41ecf 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ Test-Agent/ ├── skills/ ← 32 business skills + 3 meta-skills ├── utils/ ← 79 production utils (pytest · Playwright · JMeter · Appium · …) ├── config/ ← conftest / pytest.ini / .mcp.json -├── runtime/ ← V1.x runtime (router · orchestrator · MCP · …) +├── runtime/ ← runtime (router · orchestrator · MCP · …) ├── ci/ ← GitHub Actions + Jenkins ├── docs/ ← user manual / architecture / theory / compliance ├── marketplace/ ← community skills / agents / mcp / hooks diff --git a/README.zh-CN.md b/README.zh-CN.md index e36f448e..f35d11e9 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -87,7 +87,7 @@ Test-Agent/ ├── skills/ ← 32 个业务 Skill + 3 元 Skill ├── utils/ ← 79 个生产工具(pytest · Playwright · JMeter · Appium · …) ├── config/ ← conftest / pytest.ini / .mcp.json -├── runtime/ ← V1.x 运行时(router · orchestrator · MCP · …) +├── runtime/ ← 运行时(router · orchestrator · MCP · …) ├── ci/ ← GitHub Actions + Jenkins ├── docs/ ← 使用手册 / 架构 / 教学 / 合规 ├── marketplace/ ← 社区 skills / agents / mcp / hooks diff --git a/ROADMAP.md b/ROADMAP.md index 4423c88f..f8b43b0e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,13 +1,13 @@ -# Test-Agent V1.x ROADMAP +# Test-Agent ROADMAP > 项目终态目标:每个 expert / skill 真 LLM-driven / script-backed 实装,**绝不输出 mock 数据**。 -> 当前状态:V1.0.0 (**expert rollout 收尾(16/16)+ skill rollout 完成(32/32)+ Phase 3 完成 + Phase 4 完成 + Phase 5 完成**) +> 当前状态:(**expert rollout 收尾(16/16)+ skill rollout 完成(32/32)+ Phase 3 完成 + Phase 4 完成 + Phase 5 完成**) > - **expert 16/16 active**(11 production + 5 script);0 rollout。 > - **skill 32/32 active**(23 production + 7 script + 2 vision→production);0 rollout;0 vision。 > - 3 meta-skill(nuwa-skill / darwin-skill / karpathy-guidelines)独立,工具属性,不在 32 业务 skill 数内。 -> - **V1.21.0 新增 SkillRunner 基础设施** (`runtime/orchestrator/skills/` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 runner),解锁 skill 层 LLM-driven 全 16 实装 (V1.21-V1.31)。 +> - **新增 SkillRunner 基础设施** (`runtime/orchestrator/skills/` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 runner),解锁 skill 层 LLM-driven 全 16 实装 。 -## 当前活跃 expert (16 / 16) — V1.x rollout 收尾 +## 当前活跃 expert (16 / 16) — rollout 收尾 ### 11 真 LLM-driven (已上线) @@ -18,12 +18,12 @@ | `automation-engineer` | Web/API 脚本编写 + 性能测试编排 | | `test-executor` | 测试执行与监控 | | `bug-manager` | Bug 提交与追踪 | -| `env-manager` | 环境检查清单 + 准备步骤(V1.15.0 minimum viable) | -| `mobile-tester` | 移动测试用例 + ADB/Xcode 命令清单(V1.16.0 minimum viable) | -| `visual-tester` | 视觉测试点 + 对比脚本片段 + 容差配置(V1.17.0 minimum viable) | -| `system-tester` | IoT/串口/MQTT 测试用例 + 命令清单 + 协议特定配置(V1.18.0 minimum viable) | -| `pentest-tester` | 5 攻击域渗透测试计划 + 工具清单 + PoC plan(V1.19.0 minimum viable;仅输出计划文本,真执行守护在 utils 层 env gate) | -| `automotive-tester` | ASIL 评估 + HIL 测试 + ADAS 场景 + OTA 升级 + 合规矩阵(V1.20.0 minimum viable;V1.x rollout 收尾) | +| `env-manager` | 环境检查清单 + 准备步骤 | +| `mobile-tester` | 移动测试用例 + ADB/Xcode 命令清单 | +| `visual-tester` | 视觉测试点 + 对比脚本片段 + 容差配置 | +| `system-tester` | IoT/串口/MQTT 测试用例 + 命令清单 + 协议特定配置 | +| `pentest-tester` | 5 攻击域渗透测试计划 + 工具清单 + PoC plan | +| `automotive-tester` | ASIL 评估 + HIL 测试 + ADAS 场景 + OTA 升级 + 合规矩阵 | ### 5 script-backed (已上线) @@ -45,17 +45,17 @@ |-------|------| | `tdd-workflow` | TDD 工作流 | | `e2e-testing` | E2E 测试 | -| `automotive-test` | 车载主编排(V1.31.0 · automotive batch) | -| `automotive-can-bus-test` | CAN总线测试(V1.31.0) | -| `automotive-adas-scenario` | ADAS场景库(V1.31.0) | -| `automotive-ota-update-test` | OTA升级测试(V1.31.0) | -| `automotive-hil-loop-test` | HIL环路测试(V1.31.0) | +| `automotive-test` | 车载主编排(automotive batch) | +| `automotive-can-bus-test` | CAN总线测试 | +| `automotive-adas-scenario` | ADAS场景库 | +| `automotive-ota-update-test` | OTA升级测试 | +| `automotive-hil-loop-test` | HIL环路测试 | | `regression-test` | 回归测试 | | `smoke-test` | 冒烟测试 | | `testcase-design` | 用例设计 | | `test-coordinator` | 测试流程编排 | | `verification-loop` | 5-phase 验证循环 | -| `eval-harness` | LLM 评测编排(V1.27.0 · skill rollout #5) | +| `eval-harness` | LLM 评测编排(skill rollout #5) | ### 7 script-backed (已上线) @@ -71,73 +71,73 @@ --- -## V1.x rollout — 6 expert LLM-driven minimum viable 实装路线 +## rollout — 6 expert LLM-driven minimum viable 实装路线 **节奏**: 一周 1 expert,共 6 周。每完成 1 个,active 数字 +1,README 同步。 -**前置**: V1.15 Day 0 — runtime/router 防 mock 改造(拒绝未实装路由,返回明确错误)。 +**前置**: Day 0 — runtime/router 防 mock 改造(拒绝未实装路由,返回明确错误)。 **完成标准**: 每 expert 接 LLM 真调用 + 结构化输出(markdown/JSON),通过 3 个测试 prompt 验证。 | # | Expert | LLM-driven 实装范围(minimum viable) | 目标版本 | 状态 | |---|--------|------------------------------------|---------|------| -| 0 | (前置) runtime/router + orchestrator 防 mock | catalog 单源 frontmatter 解析;router._validate_against_catalog warn + 降 confidence;orchestrator.execute_node 硬拒 rollout/vision/unknown(returncode=2,绝不输出 mock);expert + skill 双 layer 覆盖 | V1.14.0+1 | **done** (PR X4) | -| 1 | `env-manager` | LLM 读 PRD → 环境检查清单 + 准备步骤 markdown | V1.15.0 | **done** (runtime/orchestrator/agents/env_manager.py) | -| 2 | `mobile-tester` | LLM 读 PRD + Android/iOS 上下文 → 移动测试用例 + ADB/Xcode 命令清单 | V1.16.0 | **done** (runtime/orchestrator/agents/mobile_tester.py) | -| 3 | `visual-tester` | LLM 读 PRD + UI 描述 → 视觉测试点 + Playwright 视觉对比脚本 | V1.17.0 | **done** (runtime/orchestrator/agents/visual_tester.py) | -| 4 | `system-tester` | LLM 读 PRD + IoT/串口/MQTT 上下文 → IoT 测试用例 + 命令清单 | V1.18.0 | **done** (runtime/orchestrator/agents/system_tester.py) | -| 5 | `pentest-tester` | LLM 读 PRD + 授权检查通过 → 渗透测试计划 + 工具调用清单(生成计划,不执行攻击) | V1.19.0 | **done** (runtime/orchestrator/agents/pentest_tester.py;仅输出计划文本,真执行守护已在 utils 层 `api_security_scanner.py` / `ai_adversarial.py` 用 TAGENT_PENTEST_AUTHORIZED env gate;法律责任在操作者侧,见 SECURITY.md L84) | -| 6 | `automotive-tester` | LLM 读 PRD + CAN-bus/ISO-26262 上下文 → ASIL 评估 + HIL 测试用例 | V1.20.0 | **done** (runtime/orchestrator/agents/automotive_tester.py;ASIL 评估 + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**V1.x rollout 收尾**) | +| 0 | (前置) runtime/router + orchestrator 防 mock | catalog 单源 frontmatter 解析;router._validate_against_catalog warn + 降 confidence;orchestrator.execute_node 硬拒 rollout/vision/unknown(returncode=2,绝不输出 mock);expert + skill 双 layer 覆盖 | +1 | **done** (PR X4) | +| 1 | `env-manager` | LLM 读 PRD → 环境检查清单 + 准备步骤 markdown | | **done** (runtime/orchestrator/agents/env_manager.py) | +| 2 | `mobile-tester` | LLM 读 PRD + Android/iOS 上下文 → 移动测试用例 + ADB/Xcode 命令清单 | | **done** (runtime/orchestrator/agents/mobile_tester.py) | +| 3 | `visual-tester` | LLM 读 PRD + UI 描述 → 视觉测试点 + Playwright 视觉对比脚本 | | **done** (runtime/orchestrator/agents/visual_tester.py) | +| 4 | `system-tester` | LLM 读 PRD + IoT/串口/MQTT 上下文 → IoT 测试用例 + 命令清单 | | **done** (runtime/orchestrator/agents/system_tester.py) | +| 5 | `pentest-tester` | LLM 读 PRD + 授权检查通过 → 渗透测试计划 + 工具调用清单(生成计划,不执行攻击) | | **done** (runtime/orchestrator/agents/pentest_tester.py;仅输出计划文本,真执行守护已在 utils 层 `api_security_scanner.py` / `ai_adversarial.py` 用 TAGENT_PENTEST_AUTHORIZED env gate;法律责任在操作者侧,见 SECURITY.md L84) | +| 6 | `automotive-tester` | LLM 读 PRD + CAN-bus/ISO-26262 上下文 → ASIL 评估 + HIL 测试用例 | | **done** (runtime/orchestrator/agents/automotive_tester.py;ASIL 评估 + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**rollout 收尾**) | --- -## V1.x rollout — 16 skill 实装路线(已全部完成) +## rollout — 16 skill 实装路线(已全部完成) -**节奏**: skill rollout 起点 V1.21.0 (SkillRunner 基础设施 + pentest-coordinator 首落地);后续 1 skill / PR 推进。 +**节奏**: skill rollout 起点 (SkillRunner 基础设施 + pentest-coordinator 首落地);后续 1 skill / PR 推进。 **完成标准**: 每 skill 接 LLM 真调用 (mock_output schema 覆盖 + production 升级 + ALL_SKILL_RUNNERS 锁规则同步)。 -**前置**: ~~runtime/router 防 mock 改造 + skill 路由按 `SKILL_IMPL_STATUS` frontmatter 过滤~~ **已完成 V1.14.0+1 (PR X4)** — registry parse frontmatter + orchestrator.execute_node 拒 rollout/vision/unknown skill (returncode=2)。 -**基础设施**: **V1.21.0 完成** — `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 skill runner (放在 SCRIPT_MAP fallback 前)。 +**前置**: ~~runtime/router 防 mock 改造 + skill 路由按 `SKILL_IMPL_STATUS` frontmatter 过滤~~ **已完成 +1 (PR X4)** — registry parse frontmatter + orchestrator.execute_node 拒 rollout/vision/unknown skill (returncode=2)。 +**基础设施**: **完成** — `runtime/orchestrator/skills/__init__.py` + `SKILL_RUNNERS` registry + `@register_skill` deco + `experts.py` kind=skill 接 skill runner (放在 SCRIPT_MAP fallback 前)。 ### 通用平台 4 skill | Skill | 范围 | 关联 expert | 状态 | |-------|------|-------------|------| -| `mobile-test` | Android/iOS + 小程序 自动化 | mobile-tester | **done** (V1.23.0 · runtime/orchestrator/skills/mobile_test.py) | -| `visual-test` | 图像识别 + OCR + SSIM 视觉回归 | visual-tester | **done** (V1.24.0 · runtime/orchestrator/skills/visual_test.py) | -| `system-test` | IoT/串口/MQTT/音视频/Jaeger/Kafka | system-tester | **done** (V1.26.0 · runtime/orchestrator/skills/system_test.py) | -| `eval-harness` | LLM 评测(pass@k / Jaccard / stability) | ai-tester(深化) | **done** (V1.27.0 · runtime/orchestrator/skills/eval_harness.py · 5 阶段编排 + 质量门禁 + 安全护栏) | +| `mobile-test` | Android/iOS + 小程序 自动化 | mobile-tester | **done** (runtime/orchestrator/skills/mobile_test.py) | +| `visual-test` | 图像识别 + OCR + SSIM 视觉回归 | visual-tester | **done** (runtime/orchestrator/skills/visual_test.py) | +| `system-test` | IoT/串口/MQTT/音视频/Jaeger/Kafka | system-tester | **done** (runtime/orchestrator/skills/system_test.py) | +| `eval-harness` | LLM 评测(pass@k / Jaccard / stability) | ai-tester(深化) | **done** (runtime/orchestrator/skills/eval_harness.py · 5 阶段编排 + 质量门禁 + 安全护栏) | ### Pentest 7 skill(已全部完成 · SECURITY.md 武器化授权 wiring 已实装) | Skill | 范围 | 状态 | |-------|------|------| -| `pentest-coordinator` | 渗透总编排(授权 → 侦察 → 漏洞 → 利用 → 报告) | **done** (V1.21.0 · runtime/orchestrator/skills/pentest_coordinator.py · 5 阶段编排 + authorization_check + subagent_pool + refuse_conditions) | -| `pentest-recon` | 侦察(被动+主动信息收集) | **done** (V1.25.0) | -| `pentest-vuln` | 漏洞发现(5 攻击域 + SAST/DAST) | **done** (V1.25.0) | -| `pentest-exploit` | 漏洞利用(沙箱 PoC,不真破坏) | **done** (V1.30.0 · pentest batch 2) | -| `pentest-api` | API 渗透(OWASP API Top 10 2023) | **done** (V1.30.0 · pentest batch 2) | -| `pentest-web` | Web 渗透(OWASP Top 10 + ASVS) | **done** (V1.30.0 · pentest batch 2) | -| `pentest-report` | 渗透报告(仅 working PoC 入报告,shannon 哲学) | **done** (V1.30.0 · pentest batch 2) | +| `pentest-coordinator` | 渗透总编排(授权 → 侦察 → 漏洞 → 利用 → 报告) | **done** (runtime/orchestrator/skills/pentest_coordinator.py · 5 阶段编排 + authorization_check + subagent_pool + refuse_conditions) | +| `pentest-recon` | 侦察(被动+主动信息收集) | **done** | +| `pentest-vuln` | 漏洞发现(5 攻击域 + SAST/DAST) | **done** | +| `pentest-exploit` | 漏洞利用(沙箱 PoC,不真破坏) | **done** (pentest batch 2) | +| `pentest-api` | API 渗透(OWASP API Top 10 2023) | **done** (pentest batch 2) | +| `pentest-web` | Web 渗透(OWASP Top 10 + ASVS) | **done** (pentest batch 2) | +| `pentest-report` | 渗透报告(仅 working PoC 入报告,shannon 哲学) | **done** (pentest batch 2) | ### Automotive 5 skill | Skill | 范围 | |-------|------| -| `automotive-test` | 整车主编排(ECU + ADAS + IVI + V2X) | **done** (V1.31.0 · automotive batch) | -| `automotive-can-bus-test` | CAN/CAN-FD/LIN/FlexRay/SOME-IP | **done** (V1.31.0 · automotive batch) | -| `automotive-adas-scenario` | ADAS 场景库 + SOTIF(ISO 21448) | **done** (V1.31.0 · automotive batch) | -| `automotive-ota-update-test` | OTA 升级(UN R156 / GB 44496-2024) | **done** (V1.31.0 · automotive batch) | -| `automotive-hil-loop-test` | HIL/SIL/MIL/PIL 环路 | **done** (V1.31.0 · automotive batch) | +| `automotive-test` | 整车主编排(ECU + ADAS + IVI + V2X) | **done** (automotive batch) | +| `automotive-can-bus-test` | CAN/CAN-FD/LIN/FlexRay/SOME-IP | **done** (automotive batch) | +| `automotive-adas-scenario` | ADAS 场景库 + SOTIF(ISO 21448) | **done** (automotive batch) | +| `automotive-ota-update-test` | OTA 升级(UN R156 / GB 44496-2024) | **done** (automotive batch) | +| `automotive-hil-loop-test` | HIL/SIL/MIL/PIL 环路 | **done** (automotive batch) | --- -## V1.34-V1.36 能力扩展 +## 能力扩展 -- **V1.34**: script_bridge.py 桥接 5 独立脚本进 orchestrator pipeline -- **V1.35**: 11 深度审计模块 (flaky guard / API security v2 / data factory v2 / perf orchestrator / event harness / visual regression / ML prioritizer / differential tester / EU AI Act / supply chain) -- **V1.36**: 6 延期模块 (chaos v2 / state machine v2 / DB test v2 / BDD v2 / carbon scheduler / canary config) + CVE-2025-71176 fix + 深度审查65发现全修 +- ****: script_bridge.py 桥接 5 独立脚本进 orchestrator pipeline +- ****: 11 深度审计模块 (flaky guard / API security v2 / data factory v2 / perf orchestrator / event harness / visual regression / ML prioritizer / differential tester / EU AI Act / supply chain) +- ****: 6 延期模块 (chaos v2 / state machine v2 / DB test v2 / BDD v2 / carbon scheduler / canary config) + CVE-2025-71176 fix + 深度审查65发现全修 --- -## V2.x vision — 2 skill(暂留方法论参考形态) +## vision — 2 skill(暂留方法论参考形态) | Skill | 当前形态 | V2 路线 | |-------|----------|---------| @@ -146,7 +146,7 @@ --- -## V2.x 路线图 (longer-term) +## 路线图 (longer-term) ### Skill Lifecycle 元工具改造 (适配测试领域) @@ -169,7 +169,7 @@ **绝不输出 mock 数据糊弄用户。** -V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地: ++1 (PR X4) 起,双 layer 防 mock 已落地: - **registry 单源**: catalog 解析 `agents/*.md` `EXPERT_IMPL_STATUS` + `skills/*.md` `SKILL_IMPL_STATUS` frontmatter,实装状态来源唯一 - **router 软警告**: `_validate_against_catalog` 检测 rollout / vision / unknown → 加 issues 并降 confidence 0.3 - **orchestrator 硬拒**: `execute_node` 对 expert / skill 任意 rollout / vision / unknown 返回 `returncode=2` + stderr "未实装",绝不走 no-op "documented step recorded" 假成功路径 @@ -182,40 +182,40 @@ V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地: | 版本 | 日期 | 完成项 | active expert 数 | |------|------|--------|----------------| -| V1.14.0 | 2026-05-13 | bundle1 信任+法律线修复;ROADMAP.md 起步 | 10/16 | -| V1.14.0+1 | 2026-05-15 | X3 数字诚实化(README/ROADMAP)+ X4 防 mock 闭环 (registry 单源 frontmatter / router warn / orchestrator hard block expert+skill 双 layer) | 10/16 | -| V1.15.0 | 2026-05-15 | env-manager LLM-driven minimum viable (runtime/orchestrator/agents/env_manager.py;LLM 读 PRD → env_checks + prep_steps + dependencies + risks 结构化 JSON) | 11/16 | -| V1.16.0 | 2026-05-15 | mobile-tester LLM-driven minimum viable (runtime/orchestrator/agents/mobile_tester.py;LLM 读 PRD + Android/iOS 上下文 → test_cases + device_commands + mobile_specific 结构化 JSON) | 12/16 | -| V1.17.0 | 2026-05-15 | visual-tester LLM-driven minimum viable (runtime/orchestrator/agents/visual_tester.py;LLM 读 PRD + UI 描述 → visual_test_points + comparison_scripts + tolerance + baseline_strategy 结构化 JSON) | 13/16 | -| V1.18.0 | 2026-05-15 | system-tester LLM-driven minimum viable (runtime/orchestrator/agents/system_tester.py;LLM 读 PRD + IoT/串口/MQTT 上下文 → test_cases + device_commands + protocol_specific + test_environment 结构化 JSON;覆盖 IoT/audiovideo/tracing/mq/integration 5 类) | 14/16 | -| V1.19.0 | 2026-05-16 | pentest-tester LLM-driven minimum viable (runtime/orchestrator/agents/pentest_tester.py;LLM 读 PRD + 安全上下文 → test_mode + target_scope + recon/vuln/exploit/reporting phases 结构化 JSON;覆盖 5 攻击域 Injection/XSS/SSRF/Auth/Authz;仅输出计划文本,真执行守护在 utils 层 env gate;法律责任在操作者侧 SECURITY.md L84) | 15/16 | -| V1.20.0 | 2026-05-16 | automotive-tester LLM-driven minimum viable (runtime/orchestrator/agents/automotive_tester.py;LLM 读 PRD + CAN-bus/ISO-26262 上下文 → vehicle_subsystem + asil_assessment + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix + test_environment 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**V1.x rollout 收尾**) | 16/16 expert (V1.x rollout 完成) | -| V1.21.0 | 2026-05-16 | **skill rollout 起点** — SkillRunner 基础设施 (runtime/orchestrator/skills/__init__.py + SKILL_RUNNERS registry + @register_skill deco + experts.py kind=skill 接 runner) + pentest-coordinator 首 skill 落地 (5 阶段编排 + authorization_check + subagent_pool + refuse_conditions). 解锁 14 rollout skill 后续流水线. | 16 expert + 8/32 production (15 rollout 待) | -| V1.22.0 | 2026-05-16 | **tagent config CLI** — 多模型 onboarding Step 2 (runtime/cli/config.py · 6 provider 内置 + 厂商配置 cookbook + use/set/unset/list/show 子命令). **多 provider 通用 env 通道** (LLM_PROVIDER + LLM_API_KEY + LLM_MODEL) + stub 扩 4 path. | 16 expert + 8/32 production | -| V1.23.0 | 2026-05-16 | **skill rollout #2** — mobile-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/mobile_test.py · Android/iOS 双平台 + 小程序支持) | 16 expert + 9/32 production | -| V1.24.0 | 2026-05-16 | **skill rollout #3** — visual-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/visual_test.py · Airtest + OCR + SSIM 视觉对比) | 16 expert + 10/32 production | -| V1.25.0 | 2026-05-16 | **skill rollout #4** — pentest-recon + pentest-vuln 双 skill LLM-driven 生产落地 (侦察: 端口/子域/服务指纹 + 漏洞: 5 攻击域 hybrid SAST+blackbox) | 16 expert + 12/32 production | -| V1.26.0 | 2026-05-16 | **skill rollout #5** — system-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/system_test.py · IoT/音视频/追踪/消息队列 4 场景) | 16 expert + 13/32 production | -| V1.27.0 | 2026-05-16 | **skill rollout #6** — eval-harness skill LLM-driven 生产落地 (runtime/orchestrator/skills/eval_harness.py · pass@k / Jaccard@k / top-1 stability / latency 4 指标 + 安全护栏) | 16 expert + 14/32 production | -| V1.28.0 | 2026-05-16 | **skill rollout #7** — pentest-api + pentest-web 双 skill LLM-driven 生产落地 (API: OWASP API Top 10 + REST/GraphQL/gRPC/WebSocket · Web: OWASP Top 10 + ASVS + 2FA 自动登录) | 16 expert + 16/32 production | -| V1.29.0 | 2026-05-16 | **skill rollout #8** — pentest-exploit + pentest-report 双 skill LLM-driven 生产落地 (exploit: 沙箱内验证 PoC + 不可破坏性约束 · report: working PoC 嵌入 + CWE/CVSS/PoC/修复 4 维) | 16 expert + 18/32 production | -| V1.30.0 | 2026-05-16 | **skill rollout #9** — automotive-test + automotive-can-bus-test 双 skill LLM-driven 生产落地 (主编排: 10 阶段 HARA→报告 · CAN: CAN/CAN-FD/SOME-IP 协议一致性 + dbc 解析) | 16 expert + 20/32 production | -| V1.31.0 | 2026-05-16 | **skill rollout #10 (收尾)** — automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test 3 skill LLM-driven 生产落地 (ADAS: AEB/ACC/LKA + CARLA 仿真 · OTA: 6 校验 + UN R156/GB 44496 合规 · HIL: MIL/SIL/HIL 三环 + dSPACE). **V1.x rollout 完成 — 23/32 production + 7 script + 0 rollout + 2 vision.** | 16 expert + 23/32 production (0 rollout 待) | -| V1.32.0 | 2026-05-17 | 深审32发现全修 + 版本号全同步 + 私源泄漏清洗 | 16 expert + 23/32 production | -| V1.32.1 | 2026-05-17 | CONTRIBUTING skill count 33→32 fix + 版本号同步 | 16 expert + 23/32 production | -| V1.32.2 | 2026-05-17 | Security hardening batch: CWE-78 fix + credential removal + CORS + WebSocket leak + XML escape | 16 expert + 23/32 production | -| V1.32.3 | 2026-05-17 | Refactor: _stub_response dispatch table + fuzzer ALL_PAYLOADS hoist | 16 expert + 23/32 production | -| V1.32.4 | 2026-05-17 | Honesty pass: remove aspirational numbers + split overlong functions | 16 expert + 23/32 production | -| V1.32.5 | 2026-05-17 | Security: shell injection + hardcoded creds + silent failures | 16 expert + 23/32 production | -| V1.33.0 | 2026-05-17 | MASTER_PLAN 38/38 items across 8 phases complete | 16 expert + 23/32 production | -| V1.34.0 | 2026-05-18 | Phase 1-5 initial audit: 18 additions (settings/IDE/Docker/Prometheus/streaming/PBT/contract/schema fuzz/compliance/DORA) | 16 expert + 23/32 production | -| V1.35.0 | 2026-05-18 | Deep audit 11 core modules (flaky guard/API security v2/data factory v2/perf/e2e event harness/visual regression/ML prioritizer/differential/EU AI Act/supply chain) | 16 expert + 23/32 production | -| V1.36.0 | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) | -| V1.37.0 | 2026-05-18 | Phase 2 charter closure: Bug 5适配器(YAML门禁+按需安装) + HIGH 2(H16/H18) + MEDIUM 4(M12/M14/M15/M19) + contract gate + utils tests | 16 expert + 30/32 active · Phase 2 complete | -| V1.38.0 | 2026-05-18 | Phase 3.1 伦理/偏见审计: fairness_auditor.py (dataset bias + 6 model fairness metrics + intersectional + decision audit) + 20 tests + ai_validator bias audit pipeline | 16 expert + 30/32 active · 1/3 Phase 3 done | -| V1.39.0 | 2026-05-18 | Phase 3.2 沉默故障检测: silent_failure_detector.py (threshold drift + Mann-Kendall + OLS trend + sliding window + multi-source batch) + 21 tests + tracing/web_vitals/prometheus collectors | 16 expert + 30/32 active · 2/3 Phase 3 done | -| V1.40.0 | 2026-05-18 | Phase 3.3 缺席者场景注入: absentee_scenario_injector.py (9 absentee groups × 21 canonical scenarios + charter generation + coverage reporting) + 20 tests | 16 expert + 30/32 active · PHASE 3 COMPLETE | -| V1.41.0 | 2026-05-19 | Phase 4 证据链可采信性: evidence_chain.py (SHA-256 hash chain + multi-source collection + ISO 27001/SOC2/NIST 800-53/GDPR compliance mapping + JSON/Markdown export + integrity verification) + 39 tests + ai_validator evidence chain audit pipeline | 16 expert + 30/32 active · PHASE 4 DELIVERED | -| V1.42.0 | 2026-05-19 | Phase 5 神圣性与跨文化禁忌: taboo_matrix.py (135 entries × 16 locales × 5 dimensions: words/colors/numbers/holidays/sacred_contexts) + i18n_checker taboo audit extension (6 functions) + 84 tests | 16 expert + 30/32 active · PHASE 5 DELIVERED | -| V1.43.0 | 2026-05-19 | Release cut: Phase 3.1+3.2+3.3+4+5 落版 (fairness_auditor / silent_failure_detector / absentee_scenario_injector / evidence_chain / taboo_matrix · 共 184 tests) + **2 ex-vision skill 实装** (agent-introspection-debugging + build-your-own-x-explorer · LLM-driven minimum viable · 中央 ALL_SKILL_RUNNERS 同步) | **16 expert + 32/32 active** (25 production + 7 script · V1.x SKILL ROLLOUT 完整收尾) | -| V2.0.0 | TBD | V2.x 路线图启动 | 16/16 + V2 | +| | 2026-05-13 | bundle1 信任+法律线修复;ROADMAP.md 起步 | 10/16 | +| +1 | 2026-05-15 | X3 数字诚实化(README/ROADMAP)+ X4 防 mock 闭环 (registry 单源 frontmatter / router warn / orchestrator hard block expert+skill 双 layer) | 10/16 | +| | 2026-05-15 | env-manager LLM-driven minimum viable (runtime/orchestrator/agents/env_manager.py;LLM 读 PRD → env_checks + prep_steps + dependencies + risks 结构化 JSON) | 11/16 | +| | 2026-05-15 | mobile-tester LLM-driven minimum viable (runtime/orchestrator/agents/mobile_tester.py;LLM 读 PRD + Android/iOS 上下文 → test_cases + device_commands + mobile_specific 结构化 JSON) | 12/16 | +| | 2026-05-15 | visual-tester LLM-driven minimum viable (runtime/orchestrator/agents/visual_tester.py;LLM 读 PRD + UI 描述 → visual_test_points + comparison_scripts + tolerance + baseline_strategy 结构化 JSON) | 13/16 | +| | 2026-05-15 | system-tester LLM-driven minimum viable (runtime/orchestrator/agents/system_tester.py;LLM 读 PRD + IoT/串口/MQTT 上下文 → test_cases + device_commands + protocol_specific + test_environment 结构化 JSON;覆盖 IoT/audiovideo/tracing/mq/integration 5 类) | 14/16 | +| | 2026-05-16 | pentest-tester LLM-driven minimum viable (runtime/orchestrator/agents/pentest_tester.py;LLM 读 PRD + 安全上下文 → test_mode + target_scope + recon/vuln/exploit/reporting phases 结构化 JSON;覆盖 5 攻击域 Injection/XSS/SSRF/Auth/Authz;仅输出计划文本,真执行守护在 utils 层 env gate;法律责任在操作者侧 SECURITY.md L84) | 15/16 | +| | 2026-05-16 | automotive-tester LLM-driven minimum viable (runtime/orchestrator/agents/automotive_tester.py;LLM 读 PRD + CAN-bus/ISO-26262 上下文 → vehicle_subsystem + asil_assessment + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix + test_environment 结构化 JSON;覆盖 ECU/ADAS/IVI/V2X 4 子系统 + 8 协议 + 8 合规标准。**rollout 收尾**) | 16/16 expert (rollout 完成) | +| | 2026-05-16 | **skill rollout 起点** — SkillRunner 基础设施 (runtime/orchestrator/skills/__init__.py + SKILL_RUNNERS registry + @register_skill deco + experts.py kind=skill 接 runner) + pentest-coordinator 首 skill 落地 (5 阶段编排 + authorization_check + subagent_pool + refuse_conditions). 解锁 14 rollout skill 后续流水线. | 16 expert + 8/32 production (15 rollout 待) | +| | 2026-05-16 | **tagent config CLI** — 多模型 onboarding Step 2 (runtime/cli/config.py · 6 provider 内置 + 厂商配置 cookbook + use/set/unset/list/show 子命令). **多 provider 通用 env 通道** (LLM_PROVIDER + LLM_API_KEY + LLM_MODEL) + stub 扩 4 path. | 16 expert + 8/32 production | +| | 2026-05-16 | **skill rollout #2** — mobile-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/mobile_test.py · Android/iOS 双平台 + 小程序支持) | 16 expert + 9/32 production | +| | 2026-05-16 | **skill rollout #3** — visual-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/visual_test.py · Airtest + OCR + SSIM 视觉对比) | 16 expert + 10/32 production | +| | 2026-05-16 | **skill rollout #4** — pentest-recon + pentest-vuln 双 skill LLM-driven 生产落地 (侦察: 端口/子域/服务指纹 + 漏洞: 5 攻击域 hybrid SAST+blackbox) | 16 expert + 12/32 production | +| | 2026-05-16 | **skill rollout #5** — system-test skill LLM-driven 生产落地 (runtime/orchestrator/skills/system_test.py · IoT/音视频/追踪/消息队列 4 场景) | 16 expert + 13/32 production | +| | 2026-05-16 | **skill rollout #6** — eval-harness skill LLM-driven 生产落地 (runtime/orchestrator/skills/eval_harness.py · pass@k / Jaccard@k / top-1 stability / latency 4 指标 + 安全护栏) | 16 expert + 14/32 production | +| | 2026-05-16 | **skill rollout #7** — pentest-api + pentest-web 双 skill LLM-driven 生产落地 (API: OWASP API Top 10 + REST/GraphQL/gRPC/WebSocket · Web: OWASP Top 10 + ASVS + 2FA 自动登录) | 16 expert + 16/32 production | +| | 2026-05-16 | **skill rollout #8** — pentest-exploit + pentest-report 双 skill LLM-driven 生产落地 (exploit: 沙箱内验证 PoC + 不可破坏性约束 · report: working PoC 嵌入 + CWE/CVSS/PoC/修复 4 维) | 16 expert + 18/32 production | +| | 2026-05-16 | **skill rollout #9** — automotive-test + automotive-can-bus-test 双 skill LLM-driven 生产落地 (主编排: 10 阶段 HARA→报告 · CAN: CAN/CAN-FD/SOME-IP 协议一致性 + dbc 解析) | 16 expert + 20/32 production | +| | 2026-05-16 | **skill rollout #10 (收尾)** — automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test 3 skill LLM-driven 生产落地 (ADAS: AEB/ACC/LKA + CARLA 仿真 · OTA: 6 校验 + UN R156/GB 44496 合规 · HIL: MIL/SIL/HIL 三环 + dSPACE). **rollout 完成 — 23/32 production + 7 script + 0 rollout + 2 vision.** | 16 expert + 23/32 production (0 rollout 待) | +| | 2026-05-17 | 深审32发现全修 + 版本号全同步 + 私源泄漏清洗 | 16 expert + 23/32 production | +| | 2026-05-17 | CONTRIBUTING skill count 33→32 fix + 版本号同步 | 16 expert + 23/32 production | +| | 2026-05-17 | Security hardening batch: CWE-78 fix + credential removal + CORS + WebSocket leak + XML escape | 16 expert + 23/32 production | +| | 2026-05-17 | Refactor: _stub_response dispatch table + fuzzer ALL_PAYLOADS hoist | 16 expert + 23/32 production | +| | 2026-05-17 | Honesty pass: remove aspirational numbers + split overlong functions | 16 expert + 23/32 production | +| | 2026-05-17 | Security: shell injection + hardcoded creds + silent failures | 16 expert + 23/32 production | +| | 2026-05-17 | MASTER_PLAN 38/38 items across 8 phases complete | 16 expert + 23/32 production | +| | 2026-05-18 | Phase 1-5 initial audit: 18 additions (settings/IDE/Docker/Prometheus/streaming/PBT/contract/schema fuzz/compliance/DORA) | 16 expert + 23/32 production | +| | 2026-05-18 | Deep audit 11 core modules (flaky guard/API security v2/data factory v2/perf/e2e event harness/visual regression/ML prioritizer/differential/EU AI Act/supply chain) | 16 expert + 23/32 production | +| | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) | +| | 2026-05-18 | Phase 2 charter closure: Bug 5适配器(YAML门禁+按需安装) + HIGH 2(H16/H18) + MEDIUM 4(M12/M14/M15/M19) + contract gate + utils tests | 16 expert + 30/32 active · Phase 2 complete | +| | 2026-05-18 | Phase 3.1 伦理/偏见审计: fairness_auditor.py (dataset bias + 6 model fairness metrics + intersectional + decision audit) + 20 tests + ai_validator bias audit pipeline | 16 expert + 30/32 active · 1/3 Phase 3 done | +| | 2026-05-18 | Phase 3.2 沉默故障检测: silent_failure_detector.py (threshold drift + Mann-Kendall + OLS trend + sliding window + multi-source batch) + 21 tests + tracing/web_vitals/prometheus collectors | 16 expert + 30/32 active · 2/3 Phase 3 done | +| | 2026-05-18 | Phase 3.3 缺席者场景注入: absentee_scenario_injector.py (9 absentee groups × 21 canonical scenarios + charter generation + coverage reporting) + 20 tests | 16 expert + 30/32 active · PHASE 3 COMPLETE | +| | 2026-05-19 | Phase 4 证据链可采信性: evidence_chain.py (SHA-256 hash chain + multi-source collection + ISO 27001/SOC2/NIST 800-53/GDPR compliance mapping + JSON/Markdown export + integrity verification) + 39 tests + ai_validator evidence chain audit pipeline | 16 expert + 30/32 active · PHASE 4 DELIVERED | +| | 2026-05-19 | Phase 5 神圣性与跨文化禁忌: taboo_matrix.py (135 entries × 16 locales × 5 dimensions: words/colors/numbers/holidays/sacred_contexts) + i18n_checker taboo audit extension (6 functions) + 84 tests | 16 expert + 30/32 active · PHASE 5 DELIVERED | +| | 2026-05-19 | Release cut: Phase 3.1+3.2+3.3+4+5 落版 (fairness_auditor / silent_failure_detector / absentee_scenario_injector / evidence_chain / taboo_matrix · 共 184 tests) + **2 ex-vision skill 实装** (agent-introspection-debugging + build-your-own-x-explorer · LLM-driven minimum viable · 中央 ALL_SKILL_RUNNERS 同步) | **16 expert + 32/32 active** (25 production + 7 script · SKILL ROLLOUT 完整收尾) | +| | TBD | 路线图启动 | 16/16 + V2 | diff --git a/SECURITY.md b/SECURITY.md index 764624a2..23bedeec 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -77,7 +77,7 @@ - [ ] 仅在**自己拥有 / 经书面授权**的系统上运行上述工具 - [ ] 在 `tagent.yml` 显式设置 `pentest.authorized: true`(此为操作者自证授权,不构成第三方授权证明) - [ ] 遵守所在司法管辖区法律: - - **中国**:《刑法》§285-§287(非法侵入 / 破坏 / 非法控制计算机信息系统罪);《网络安全法》§27 / §63 + - **中国**:《刑法》-(非法侵入 / 破坏 / 非法控制计算机信息系统罪);《网络安全法》/ - **美国**:Computer Fraud and Abuse Act(CFAA, 18 U.S.C. § 1030) - **欧盟**:NIS2 Directive(EU 2022/2555) diff --git a/ai/CLAUDE.md b/ai/CLAUDE.md index 751b9494..6dff5211 100644 --- a/ai/CLAUDE.md +++ b/ai/CLAUDE.md @@ -162,7 +162,7 @@ python -c "from runtime.registry.registry import build_catalog; c=build_catalog( 每次提交前问自己: 1. 有没有硬编码路径?→ 应该全部走 settings -2. 新文件放对目录了吗?→ 对照 §3.1 +2. 新文件放对目录了吗?→ 对照 3. 有没有不该提交的文件?→ `git status` 确认 4. 核心功能实测通过了吗?→ 至少 registry catalog 能构建 5. 改 `utils/` 了吗?→ 如果改了,要测双模式都正常 diff --git "a/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" "b/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" index f97ca1e7..05254225 100644 --- "a/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" +++ "b/ai/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" @@ -6,7 +6,7 @@ EXPERT_IMPL_STATUS: production paired_skills: [test-coordinator] --- -> ℹ️ **V1.0.0 实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout**。详见 [ROADMAP.md](../../ROADMAP.md)。 +> ℹ️ **实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout**。详见 [ROADMAP.md](../../ROADMAP.md)。 > runtime/router + orchestrator 防 mock 已落地 — 路由到未实装 expert 返回明确「未实装」说明,不输出 mock 数据。 你是一位拥有15年经验的测试技术总监,带领过多个大型互联网项目的测试团队。你深谙测试工程化,善于风险识别、资源调度和质量决策。 diff --git "a/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" "b/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" index 0c0706d7..ccc70970 100644 --- "a/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" +++ "b/ai/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" @@ -26,7 +26,7 @@ paired_skills: [smoke-test, regression-test] ## 协作输出 - 向 **test-lead**:执行结果 JSON(含 pass_rate / 失败分类 / 性能门禁状态) -- 向 **bug-manager**:failure_type=product_bug 列表(自动批量提交 BugTracker,默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,主宪章 §12) +- 向 **bug-manager**:failure_type=product_bug 列表 - 向 **report-generator**:完整结果 JSON + Allure results + JMeter HTML - 向 **automation-engineer**:test_code_bug 反馈(脚本错误) - 向 **env-manager**:environment_issue 反馈(环境异常) diff --git "a/ai/agents/08-Bug\347\256\241\347\220\206.md" "b/ai/agents/08-Bug\347\256\241\347\220\206.md" index 35b686b0..ab1d9b0e 100644 --- "a/ai/agents/08-Bug\347\256\241\347\220\206.md" +++ "b/ai/agents/08-Bug\347\256\241\347\220\206.md" @@ -1,6 +1,6 @@ --- name: bug-manager -description: Bug管理专家 - 规范提交Bug到BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,主宪章 §12 BugTrackerBase 统一契约),追踪Bug修复进度,验证修复结果,生成Bug统计分析报告。默认实现 utils/zentao_bug_manager.py(权威 severity 1=P0/2=P1/3=P2/4=P3);切换 adapter 由 .env `BUG_TRACKER` 字段指定。 +description: Bug管理专家 - 规范提交Bug到BugTracker,追踪Bug修复进度,验证修复结果,生成Bug统计分析报告。默认实现 utils/zentao_bug_manager.py(权威 severity 1=P0/2=P1/3=P2/4=P3);切换 adapter 由 .env `BUG_TRACKER` 字段指定。 tools: Read, Write, Bash, Grep, Glob EXPERT_IMPL_STATUS: production paired_skills: [zentao-bug-submission] diff --git "a/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" "b/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" index 35b33af9..6e0c7c24 100644 --- "a/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" +++ "b/ai/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" @@ -52,15 +52,15 @@ paired_skills: [pentest-coordinator] **Static-Dynamic Correlation**(shannon 差异化):SAST 发现的不报为"理论风险",必须 exploit agent 真打成功 + 关联源码才入报告。 -## 主宪章铁律(必遵守) +## 规则(必遵守) -- **§24 safe-by-default**:`tagent.yml` `pentest.authorized: true` + `pentest.scope: [list]` 显式才允许;否则拒绝运行 -- **§24 PII scrub**:报告含敏感数据自动脱敏(email/phone/SSN/card) -- **§22 决策不可逆禁止**:不真删数据;不真破坏文件;不 force-push;PoC 仅生成不执行不可逆操作 -- **§22 隔离 client**:子 agent 用 `subagent.aux_client`,不污染主 session -- **§22 沙箱**:危险 exploit 必须在 Docker/VM 内跑,不在 host -- **§21 横切可复现性**:每个 PoC 必带 seed + recordings(Burp HAR / 截图) -- **§21 测试深度 L4**:渗透是 L4 极深,必须含安全渗透 + 决策可追溯 + 监管送审证据链 +- **safe-by-default**:`tagent.yml` `pentest.authorized: true` + `pentest.scope: [list]` 显式才允许;否则拒绝运行 +- **PII scrub**:报告含敏感数据自动脱敏(email/phone/SSN/card) +- **不可逆操作**:不真删数据;不真破坏文件;不 force-push;PoC 仅生成不执行 +- **隔离 client**:子 agent 用 `subagent.aux_client`,不污染主 session +- **沙箱**:危险 exploit 必须在 Docker/VM 内跑,不在 host +- **横切可复现性**:每个 PoC 必带 seed + recordings(Burp HAR / 截图) +- **测试深度 L4**:渗透是 L4 极深,必须含安全渗透 + 决策可追溯 + 监管送审证据链 ## 调用 Skill 速查 @@ -79,7 +79,7 @@ paired_skills: [pentest-coordinator] - 完整渗透报告(Markdown + PDF):仅 PoC 验证漏洞 - 每条漏洞:CWE id + CVSS 评分 + 受影响组件 + PoC 步骤 + 修复建议 + 引用 OWASP/NIST - 证据包:HAR / 截图 / 录屏 / 日志 → `mcp-evidence-vault` -- Bug 单:严重度按 CVSS 映射到 1=P0 / 2=P1 / 3=P2 / 4=P3(主宪章 §18-4) +- Bug 单:严重度按 CVSS 映射到 1=P0 / 2=P1 / 3=P2 / 4=P3 ## 不做的事(Via Negativa) @@ -87,4 +87,4 @@ paired_skills: [pentest-coordinator] - 不做真破坏 — 即使授权也只生成 PoC,不执行 `rm -rf`/`DROP TABLE` - 不报无 PoC 的"理论风险" — 假阳性疲劳是渗透报告头号杀手 - 不在 prod 环境直接打 — 沙箱/staging 优先;真 prod 测必 staging-mirror -- 不存客户 PII 在飞轮(主宪章 §24 PII 单源 scrub) +- 不存客户 PII 在飞轮 diff --git "a/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" "b/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" index 54d58f04..65672cc2 100644 --- "a/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" +++ "b/ai/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" @@ -40,7 +40,7 @@ paired_skills: [automotive-test] | **PIL** Processor-in-the-Loop | 真 ECU + 仿真环境 | 算法+硬件初步 | | **HIL** Hardware-in-the-Loop | 真 ECU + 真 I/O(模拟外设)| 集成 + 故障注入 | -**ASIL C/D 必须 HIL 通过**(主宪章 §21 L4 极深)。 +**ASIL C/D 必须 HIL 通过**。 ## ADAS 场景库 @@ -59,7 +59,7 @@ paired_skills: [automotive-test] | 标准 | 范围 | 触发 | |------|------|------| -| **ISO 26262**(主宪章 §21 已有) | 功能安全 ASIL A-D | 所有汽车 ECU | +| **ISO 26262** | 功能安全 ASIL A-D | 所有汽车 ECU | | **ISO 21448(SOTIF)** | 预期功能安全 | ADAS / AD 必有 | | **UN R155** | 网络安全 CSMS | 2024 起新车强制(欧盟) | | **UN R156** | OTA 升级管理 SUMS | 同 R155 | @@ -79,14 +79,14 @@ paired_skills: [automotive-test] 6. **诊断 DTC 升级前后对比** 7. **UN R156 文档化**:升级日志 + 用户通知 + 回退证据 -## 主宪章铁律 +## 规则 -- **§21 测试深度**:车载 ECU 默认 L3+,ADAS/底盘/转向 L4 极深 -- **§22 决策不可逆禁止**:OTA 包必含回退;真车测试必含 kill-switch -- **§19 行业适配**:接入车载行业必先《领域档案》+ 领域专家签字 -- **§24 safe-by-default**:`tagent.yml` `automotive.fleet_test_authorized: true` 才允许真车数据 -- **§21 横切可复现性**:HIL 测试必含 seed + 录波(MDF/MF4 格式) -- **§18-12 决策可追溯**:ADAS 场景每次过/挂必落 `decisions/` 含场景 id + 仿真版本 + 算法版本 +- **测试深度**:车载 ECU 默认 L3+,ADAS/底盘/转向 L4 极深 +- **不可逆操作**:OTA 包必含回退;真车测试必含 kill-switch +- **行业适配**:接入车载行业必先《领域档案》+ 领域专家签字 +- **safe-by-default**:`tagent.yml` `automotive.fleet_test_authorized: true` 才允许真车数据 +- **横切可复现性**:HIL 测试必含 seed + 录波(MDF/MF4 格式) +- **决策可追溯**:ADAS 场景每次过/挂必落 `decisions/` 含场景 id + 仿真版本 + 算法版本 ## 调用 Skill 速查 @@ -105,11 +105,11 @@ paired_skills: [automotive-test] - HIL 测试录波(MDF/MF4) - 场景库结果矩阵(过/挂/未跑) - 合规审计包(SOTIF / R155 / R156)→ `mcp-compliance-checker` -- Bug 单 → 按 ASIL 严重度映射 P0-P3(主宪章 §18-4) +- Bug 单 → 按 ASIL 严重度映射 P0-P3 ## 不做的事 - 不在公开道路擅自跑 — 法规授权 + 安全员 - 不绕过 ASIL 等级 — 降级必须签字 -- 不存车主 PII / 车 VIN 在飞轮(主宪章 §24) -- 不测试电控涉及生命安全功能时跳过 HIL — 主宪章 §21 L4 铁律 +- 不存车主 PII / 车 VIN 在飞轮 +- 不测试电控涉及生命安全功能时跳过 HIL — L4 规则 diff --git a/ai/agents/README.md b/ai/agents/README.md index 7919cdca..d27c696a 100644 --- a/ai/agents/README.md +++ b/ai/agents/README.md @@ -17,7 +17,7 @@ | 05 | `05-数据准备.md` | data-preparer | test_data.json + jmeter_users.csv | | 06 | `06-自动化脚本.md` | automation-engineer | pytest UI/API 脚本 + 协调 JMeter | | 07 | `07-测试执行.md` | test-executor | 执行结果 JSON + Allure + JMeter | -| 08 | `08-Bug管理.md` | bug-manager | BugTracker Bug ID 列表 + 日报(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,主宪章 §12) | +| 08 | `08-Bug管理.md` | bug-manager | BugTracker Bug ID 列表 + 日报 | | 09 | `09-报告生成.md` | report-generator | Word 报告 + 多端通知(企微/飞书/钉钉/Slack/邮件/Teams) | ### 流程依赖关系 diff --git a/ai/skills/agent-introspection-debugging.md b/ai/skills/agent-introspection-debugging.md index 0edb272a..2e901ec5 100644 --- a/ai/skills/agent-introspection-debugging.md +++ b/ai/skills/agent-introspection-debugging.md @@ -1,6 +1,6 @@ --- name: agent-introspection-debugging -description: "Agent 自省调试 Skill。LLM 决策 / 工具调用 / 上下文 / token / 状态机透明化。失败用例分析 + 决策回放。派生自 ECC 同名 skill(主宪章 §28)。" +description: "Agent 自省调试 Skill。LLM 决策 / 工具调用 / 上下文 / token / 状态机透明化。失败用例分析 + 决策回放。派生自 ECC 同名 skill。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -24,7 +24,7 @@ SKILL_IMPL_STATUS: production | **上下文** | prompt 长度 + 截断点 + 主-子 session 隔离审查 | | **状态机** | Prefect flow run state(`runtime/orchestrator/flows.py`)| -## 决策回放(主宪章 §18-12 满足) +## 决策回放 每次 routing / curator / scheduler 决策已自动落 `decisions/{date}_{tool}_{run_id}.json`。 含:输入快照 + 模型版本 + 阈值 + 判断结论 + 理由。 @@ -51,17 +51,17 @@ flow.run # api.request 总 span ## token 异常诊断 - 单 LLM call > 10k tokens → 输入太大(catalog 没裁?) -- 主-子 session 共享 cache → §22 子代理 aux_client 隔离失效 +- 主-子 session 共享 cache → 子代理 aux_client 隔离失效 - 重复调相同 LLM(无 cache)→ Anthropic prompt cache 没设 ttl -## 与主宪章融合 +## 与融合 -- §18-12 决策可追溯(本 skill 直接消费) -- §22 隔离 client(本 skill 检测违反) -- §21 横切可复现性(本 skill 必带 seed + snapshot) +- 决策可追溯(本 skill 直接消费) +- 隔离 client(本 skill 检测违反) +- 横切可复现性(本 skill 必带 seed + snapshot) ## 不做 - 不只看 LLM 输出(必看输入 + token + 上下文) - 不靠 print 调试(必走 OTel + Loguru 结构化) -- 不删 decisions/(主宪章 §1+§18-16 不可删) +- 不删 decisions/ diff --git a/ai/skills/automotive-hil-loop-test.md b/ai/skills/automotive-hil-loop-test.md index 1efaf22a..5bd8c5ad 100644 --- a/ai/skills/automotive-hil-loop-test.md +++ b/ai/skills/automotive-hil-loop-test.md @@ -17,7 +17,7 @@ SKILL_IMPL_STATUS: production | **PIL** | Processor-in-the-Loop | 真 ECU + 仿真环境 | | **HIL** | Hardware-in-the-Loop | **真 ECU + 真 I/O**(模拟外设)| -**ASIL C / D 必经 HIL**(主宪章 §21 L4 极深);ASIL A/B 可 PIL 替代。 +**ASIL C / D 必经 HIL**;ASIL A/B 可 PIL 替代。 ## HIL 平台 @@ -39,7 +39,7 @@ SKILL_IMPL_STATUS: production - 格式:**MDF 4.x** / MF4(AUTOSAR 标准) - 工具:Vector CANape / ASAM ODS -- 必含 seed + 算法版本 + ECU 固件 hash + 仿真版本(主宪章 §21 可复现性) +- 必含 seed + 算法版本 + ECU 固件 hash + 仿真版本 ## 输出 diff --git a/ai/skills/automotive-ota-update-test.md b/ai/skills/automotive-ota-update-test.md index ff0b5e92..b748cd4f 100644 --- a/ai/skills/automotive-ota-update-test.md +++ b/ai/skills/automotive-ota-update-test.md @@ -28,7 +28,7 @@ SKILL_IMPL_STATUS: production - 升级流量重放:wireshark + scapy - 中断模拟:`utils/chaos_helper.py` 在升级中触发(`runtime/scheduler` 集成) -- 弱网模拟:tc + netem(主宪章已用) +- 弱网模拟:tc + netem ## 输出 diff --git a/ai/skills/automotive-test.md b/ai/skills/automotive-test.md index 7855f8f6..5ae8af43 100644 --- a/ai/skills/automotive-test.md +++ b/ai/skills/automotive-test.md @@ -29,12 +29,12 @@ SKILL_IMPL_STATUS: production | 9 合规审计 | `compliance/engine.py` + 行业规则库(ISO 26262/SOTIF/R155/R156 Phase 2) | | 10 报告 + Bug 单 | `report-generator` | -## 主宪章铁律 +## 规则 -- §21 L4 极深:ADAS/底盘/转向必 HIL + 形式化验证 -- §24 safe-by-default:`automotive.fleet_test_authorized: true` + `automotive.test_lab: ` 才允许真车数据 -- §22 不可逆禁止:OTA 必含回退;真车 kill-switch 必有 -- §19 行业适配:接入车载行业必《领域档案》+ 主机厂签字 +- L4 极深:ADAS/底盘/转向必 HIL + 形式化验证 +- safe-by-default:`automotive.fleet_test_authorized: true` + `automotive.test_lab: ` 才允许真车数据 +- 不可逆操作:OTA 必含回退;真车 kill-switch 必有 +- 行业适配:接入车载行业必《领域档案》+ 主机厂签字 ## 输出 diff --git a/ai/skills/build-your-own-x-explorer.md b/ai/skills/build-your-own-x-explorer.md index 399c44b9..c80a8926 100644 --- a/ai/skills/build-your-own-x-explorer.md +++ b/ai/skills/build-your-own-x-explorer.md @@ -1,6 +1,6 @@ --- name: build-your-own-x-explorer -description: "Build-your-own-X 教学引导 Skill。按用户当前测试场景推荐对应 byox 教程深度学习路径;每条标 estimated_time_hours;主宪章 §31 教学层 13 大类落地。" +description: "Build-your-own-X 教学引导 Skill。按用户当前测试场景推荐对应 byox 教程深度学习路径;每条标 estimated_time_hours;教学层 13 大类落地。" tools: Read, Write, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -47,11 +47,11 @@ SKILL_IMPL_STATUS: production | E2E 调试 / 视觉回归 | byox-web-browser | | webhook / gateway | byox-bot | -## 与主宪章融合 +## 与融合 -- §23 教学层:本 skill 是 learn mode 深度路径入口 -- §27 Karpathy 原则 4(Goal-Driven):推荐前必问用户**时间预算**;无预算 → 拒推 -- §29 essence-watcher:byox 标 `essence_only`(默认不动 Test-Agent),需要时本 skill 主动引 +- 教学层:本 skill 是 learn mode 深度路径入口 +- Karpathy 原则 4(Goal-Driven):推荐前必问用户**时间预算**;无预算 → 拒推 +- essence-watcher:byox 标 `essence_only`(默认不动 Test-Agent),需要时本 skill 主动引 ## 不做 diff --git a/ai/skills/e2e-testing.md b/ai/skills/e2e-testing.md index b4f9ce70..dcff9869 100644 --- a/ai/skills/e2e-testing.md +++ b/ai/skills/e2e-testing.md @@ -1,6 +1,6 @@ --- name: e2e-testing -description: "E2E 测试 Skill。Playwright 关键用户流 + 跨浏览器 + 2FA/TOTP/SSO 自动登录 + 视觉回归 + 录屏。派生自 ECC e2e-testing(主宪章 §28)。" +description: "E2E 测试 Skill。Playwright 关键用户流 + 跨浏览器 + 2FA/TOTP/SSO 自动登录 + 视觉回归 + 录屏。派生自 ECC e2e-testing。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -21,7 +21,7 @@ SKILL_IMPL_STATUS: production | 浏览器 | Playwright(Chromium / Firefox / WebKit) | | 2FA / TOTP | `pyotp.TOTP(SECRET).now()` | | SSO | Playwright follow redirects(Okta / Auth0 / Azure AD / Keycloak) | -| 视觉回归 | `screenshot()` + SSIM(主宪章 §21 测试类型) | +| 视觉回归 | `screenshot()` + SSIM | | 录屏 | `context = browser.new_context(record_video_dir="evidence/")`| | Trace | `tracing.start(screenshots=True, snapshots=True)` | @@ -47,12 +47,12 @@ class LoginPage: 5. 退出 + 登录失效 6. 错误路径(密码错 5 次锁定 等) -## 与主宪章融合 +## 与融合 -- §17 测试架构:E2E 占金字塔顶层 10% -- §21 测试类型:含视觉回归 -- §21 横切可复现性:trace + 录屏 + screenshots -- §22 6-缺陷 RCA(回归 + 变更影响) +- 测试架构:E2E 占金字塔顶层 10% +- 测试类型:含视觉回归 +- 横切可复现性:trace + 录屏 + screenshots +- 6-缺陷 RCA(回归 + 变更影响) ## 不做 diff --git a/ai/skills/eval-harness.md b/ai/skills/eval-harness.md index 10cec5c1..335c8a1c 100644 --- a/ai/skills/eval-harness.md +++ b/ai/skills/eval-harness.md @@ -1,6 +1,6 @@ --- name: eval-harness -description: "Eval 框架 Skill。LLM/AI 系统评测:pass@k / Jaccard@k / top-1 stability / latency Δ。融合 gbrain eval 回放(主宪章 §24)+ ECC eval-harness。" +description: "Eval 框架 Skill。LLM/AI 系统评测:pass@k / Jaccard@k / top-1 stability / latency Δ。融合 gbrain eval 回放+ ECC eval-harness。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -32,7 +32,7 @@ SKILL_IMPL_STATUS: production ## 使用 ```bash -# 1. opt-in capture(主宪章 §24) +# 1. opt-in capture TAGENT_EVAL_CAPTURE=1 tagent run "..." # 2. 改 router/prompt/KB @@ -41,7 +41,7 @@ TAGENT_EVAL_CAPTURE=1 tagent run "..." python -m runtime.tutor.eval_replay # 输出 3 指标 ``` -## 评测原则(主宪章 §24) +## 评测原则 - **opt-in 不偷数据**:`TAGENT_EVAL_CAPTURE=1` 必显式 - **PII 必 scrub**:落档前 6 类正则 diff --git a/ai/skills/pentest-coordinator.md b/ai/skills/pentest-coordinator.md index 1ee9d9cb..aeebdf12 100644 --- a/ai/skills/pentest-coordinator.md +++ b/ai/skills/pentest-coordinator.md @@ -1,6 +1,6 @@ --- name: pentest-coordinator -description: "渗透测试主编排 Skill。完整渗透流程编排:授权检查 → 侦察 → 漏洞 → 利用 → 后渗透 → 报告。仅含 working PoC 才入报告(shannon 哲学)。tagent.yml pentest.authorized=true 显式才允许跑(§24 safe-by-default)。" +description: "渗透测试主编排 Skill。完整渗透流程编排:授权检查 → 侦察 → 漏洞 → 利用 → 后渗透 → 报告。仅含 working PoC 才入报告(shannon 哲学)。tagent.yml pentest.authorized=true 显式才允许跑(safe-by-default)。" tools: Read, Write, Bash, Grep, Glob requires_layer: [base, security] SKILL_IMPL_STATUS: production @@ -14,9 +14,9 @@ SKILL_IMPL_STATUS: production /pentest-coordinator [target] [--mode whitebox|blackbox|graybox] [--scope ] ``` -## 前置检查(铁律) +## 前置检查(规则) -1. 读 `tagent.yml`:`pentest.authorized: true` + `pentest.scope: [list]` **必须显式**;否则拒绝(§24 safe-by-default) +1. 读 `tagent.yml`:`pentest.authorized: true` + `pentest.scope: [list]` **必须显式**;否则拒绝(safe-by-default) 2. `target` 必须在 `scope` 内;不在 → 拒绝 3. `prod` 环境 → 拒绝;只允许 staging/sandbox/dev 4. 落 `workspace/测试报告/{项目名}/decisions/pentest_{run_id}_authorized.json` 记授权来源 @@ -35,7 +35,7 @@ SKILL_IMPL_STATUS: production - Injection / XSS / SSRF / Auth / Authz - 各自子 agent 独立运行;只汇总有 PoC 的 -- subagent pool(主宪章 §22)`runtime/subagent/pool.py` +- subagent pool`runtime/subagent/pool.py` ## 输出 @@ -49,4 +49,4 @@ SKILL_IMPL_STATUS: production - 不真破坏(只生成 PoC) - 不未授权(显式 yaml + scope 必有) - 不报无 PoC 的"理论风险" -- 不污染主 session(用 aux_client,§22) +- 不污染主 session(用 aux_client diff --git a/ai/skills/pentest-exploit.md b/ai/skills/pentest-exploit.md index 2470e224..7ddaab41 100644 --- a/ai/skills/pentest-exploit.md +++ b/ai/skills/pentest-exploit.md @@ -8,12 +8,12 @@ SKILL_IMPL_STATUS: production # pentest-exploit -## 沙箱铁律(主宪章 §22) +## 沙箱规则 - 利用代码**必须在 Docker/VM 内**跑(`runtime/backends/docker.py`) - 不在 host 直接跑利用 - 失败注入 (Chaos) 仅限沙箱 -- 命令 `rm` / `DROP` / `unlink` / `truncate` / `fdisk` → **destructive-guard 拦**(主宪章 §24) +- 命令 `rm` / `DROP` / `unlink` / `truncate` / `fdisk` → **destructive-guard 拦** ## 流程 diff --git a/ai/skills/pentest-report.md b/ai/skills/pentest-report.md index eb16e8dc..12a6b3c4 100644 --- a/ai/skills/pentest-report.md +++ b/ai/skills/pentest-report.md @@ -8,12 +8,12 @@ SKILL_IMPL_STATUS: production # pentest-report -## 报告原则(铁律) +## 报告原则(规则) 1. **仅 PoC 入报告**:`status: verified` 的才报;`unverified` 不报 2. **可复现**:每 PoC 3 次都成才算 reproducible 3. **PII scrub**:涉及客户数据自动脱敏(`runtime/tutor/eval_replay.PII_PATTERNS` 单源) -4. **CVSS 3.1 评分**:每条必算 + 映射到 P0/P1/P2/P3(主宪章 §18-4) +4. **CVSS 3.1 评分**:每条必算 + 映射到 P0/P1/P2/P3 5. **修复建议**:含具体代码片段 + 引用 OWASP/NIST ## 报告章节(标准模板) diff --git a/ai/skills/pentest-vuln.md b/ai/skills/pentest-vuln.md index ea4064f8..d1ad5afa 100644 --- a/ai/skills/pentest-vuln.md +++ b/ai/skills/pentest-vuln.md @@ -37,7 +37,7 @@ SKILL_IMPL_STATUS: production - `workspace/渗透/vuln_candidates.json`:候选漏洞(**未验证**,标 `unverified`) - 喂入 `/pentest-exploit` 验证 -## 铁律 +## 规则 - 漏洞 ≠ PoC,候选必标 `status: unverified` - 不在此 skill 出报告(报告在 `/pentest-report` 且只含 verified) diff --git a/ai/skills/security-review.md b/ai/skills/security-review.md index b19b0b0d..316b4a54 100644 --- a/ai/skills/security-review.md +++ b/ai/skills/security-review.md @@ -50,10 +50,10 @@ pip-audit safety check ``` -## 与主宪章融合 +## 与融合 -- §17 Shift-Left 7 层:本 skill 是 L4 pre-commit + L5 PR gate + L6 静态分析 -- §25 渗透 PoC-only 哲学:本 skill 报 unverified 候选;喂 `/pentest-vuln` 验证 +- Shift-Left 7 层:本 skill 是 L4 pre-commit + L5 PR gate + L6 静态分析 +- 渗透 PoC-only 哲学:本 skill 报 unverified 候选;喂 `/pentest-vuln` 验证 ## 不做 diff --git a/ai/skills/smoke-test.md b/ai/skills/smoke-test.md index 9898485a..35032eca 100644 --- a/ai/skills/smoke-test.md +++ b/ai/skills/smoke-test.md @@ -93,7 +93,7 @@ allure generate workspace/测试报告/{项目名}/allure-results \ **通过:** ```text ✅ 冒烟测试通过 -模块:用户登录模块 V1.0.0 +模块:用户登录模块 执行时间:8 分 32 秒 P0 用例:25 个,通过 25 个,失败 0 个(100%) 结论:可以继续部署 / 全量测试 @@ -102,7 +102,7 @@ P0 用例:25 个,通过 25 个,失败 0 个(100%) **失败:** ```text ❌ 冒烟测试失败,阻止部署 -模块:用户登录模块 V1.0.0 +模块:用户登录模块 执行时间:7 分 15 秒 P0 用例:25 个,通过 23 个,失败 2 个(92.0% < 95%) 失败用例: diff --git a/ai/skills/tdd-workflow.md b/ai/skills/tdd-workflow.md index b9ddb4dc..8e2791bd 100644 --- a/ai/skills/tdd-workflow.md +++ b/ai/skills/tdd-workflow.md @@ -1,6 +1,6 @@ --- name: tdd-workflow -description: "TDD 测试驱动开发 Skill。Tests BEFORE code,80%+ 覆盖(unit+integration+E2E),边界+异常+错误场景必覆盖。派生自 ECC 同名 skill(主宪章 §28)。" +description: "TDD 测试驱动开发 Skill。Tests BEFORE code,80%+ 覆盖(unit+integration+E2E),边界+异常+错误场景必覆盖。派生自 ECC 同名 skill。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -32,14 +32,14 @@ SKILL_IMPL_STATUS: production 3. **重构**保持测试过(refactor) 4. 移到下一个测试 case -## 与主宪章融合 +## 与融合 -- §21 测试深度横切准则:"用例本身用变异测试反向验证"(覆盖率 ≠ 用例质量) -- §27 Karpathy 原则 4 Goal-Driven:每任务转为 "写复现测试 → 让它过" -- §18-14 修改四关:测试套件全过才许 commit +- 测试深度横切准则:"用例本身用变异测试反向验证"(覆盖率 ≠ 用例质量) +- Karpathy 原则 4 Goal-Driven:每任务转为 "写复现测试 → 让它过" +- 修改四关:测试套件全过才许 commit ## 不做 - 不写无 assert 的测试 - 不一次写完 200 行测试不跑(分小批 red → green) -- 不为了覆盖率写无意义测试(主宪章 §21 测试热寂检测) +- 不为了覆盖率写无意义测试 diff --git a/ai/skills/testcase-design.md b/ai/skills/testcase-design.md index 92d1520d..08e88842 100644 --- a/ai/skills/testcase-design.md +++ b/ai/skills/testcase-design.md @@ -1,6 +1,6 @@ --- name: testcase-design -description: 快速生成测试用例技能。输入需求描述,调用 testcase-designer 专家生成结构化测试用例,输出格式由用户自选:默认 Excel(4 Sheet),可选 xmind / markmap / opml 思维导图(V1.9 加),或 --format all 一键产全部。适用于用例评审、快速梳理测试点。 +description: 快速生成测试用例技能。输入需求描述,调用 testcase-designer 专家生成结构化测试用例,输出格式由用户自选:默认 Excel(4 Sheet),可选 xmind / markmap / opml 思维导图,或 --format all 一键产全部。适用于用例评审、快速梳理测试点。 tools: Read, Write, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -82,7 +82,7 @@ P1 主要用例(12 条): - **Sheet3 P0冒烟集**:仅 P0 用例(带前置条件、数据) - **Sheet4 P0_P1回归集**:P0+P1 用例 -### 思维导图 / 大纲(V1.9 加,按需) +### 思维导图 / 大纲 `runtime/exporters/` 已注册 3 个 exporter,用户自选;同一 TestCaseTree 一份 IR,三种落盘: diff --git a/ai/skills/verification-loop.md b/ai/skills/verification-loop.md index 94ebb956..72d04928 100644 --- a/ai/skills/verification-loop.md +++ b/ai/skills/verification-loop.md @@ -1,6 +1,6 @@ --- name: verification-loop -description: "5-phase 验证循环 Skill:build → typecheck → lint → test → coverage。任意失败 STOP + 修。派生自 ECC 同名 skill(主宪章 §28)。PR 前 / 质量门禁前 / refactor 后必跑。" +description: "5-phase 验证循环 Skill:build → typecheck → lint → test → coverage。任意失败 STOP + 修。派生自 ECC 同名 skill。PR 前 / 质量门禁前 / refactor 后必跑。" tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: production --- @@ -11,7 +11,7 @@ SKILL_IMPL_STATUS: production - feature 完成后 - PR 提交前 -- 质量门禁前(主宪章 §17 五层门禁) +- 质量门禁前 - refactor 后 - darwin-skill 评分前 @@ -48,16 +48,16 @@ pytest runtime/tests/ -v 2>&1 | tail -50 ```bash pytest --cov=runtime --cov-report=term-missing 2>&1 | tail -30 ``` -对比 §17 regression 门槛 cov ≥ 80% +对比 regression 门槛 cov ≥ 80% -## 与主宪章融合 +## 与融合 -- §17 五层门禁:本 skill 是**进 smoke → regression** 的前置 -- §18-14 修改四关:四关 = 本 skill 4 阶段简化版 -- §21 横切可复现性:失败必固定 seed + snapshot +- 五层门禁:本 skill 是**进 smoke → regression** 的前置 +- 修改四关:四关 = 本 skill 4 阶段简化版 +- 横切可复现性:失败必固定 seed + snapshot ## 不做 -- 不跳阶段(主宪章 §21 跳阶段 = 测试不诚信) +- 不跳阶段 - 不忽略 type 错误"等会儿再修" - 不静默吞 lint warning(--fix 默认开) diff --git a/ai/skills/zentao-bug-submission.md b/ai/skills/zentao-bug-submission.md index c52a59e7..3bfe395d 100644 --- a/ai/skills/zentao-bug-submission.md +++ b/ai/skills/zentao-bug-submission.md @@ -1,6 +1,6 @@ --- name: zentao-bug-submission -description: BugTracker Bug 提交技能(默认 adapter:禅道,主宪章 §12 BugTrackerBase 统一契约支持 Jira/GitHub/GitLab/Linear/Webhook,由 .env BUG_TRACKER 选)。输入 Bug 描述或测试失败信息,自动规范化 Bug 报告并提交到所选 BugTracker,支持批量提交和状态追踪。默认实现 utils/zentao_bug_manager.py(severity 1=P0/2=P1/3=P2/4=P3)。 +description: BugTracker Bug 提交技能。输入 Bug 描述或测试失败信息,自动规范化 Bug 报告并提交到所选 BugTracker,支持批量提交和状态追踪。默认实现 utils/zentao_bug_manager.py(severity 1=P0/2=P1/3=P2/4=P3)。 tools: Read, Write, Bash, Grep, Glob SKILL_IMPL_STATUS: script --- diff --git a/ci/INDEX.md b/ci/INDEX.md index fc9b59a2..adfb47f3 100644 --- a/ci/INDEX.md +++ b/ci/INDEX.md @@ -58,7 +58,7 @@ | 性能基线何时更新 | 仅 `release/*` 分支 + `PERF_MODE=full` + 当次门禁全 PASS | | 多端通知未发出 | `.env` / Secrets 未配 `WECHAT_WEBHOOK_URL` / `FEISHU_WEBHOOK` / `DINGTALK_WEBHOOK` / `SLACK_WEBHOOK_URL` / `EMAIL_SMTP_*` / `TEAMS_WEBHOOK_URL` 等;未配自动跳过不阻塞 | -## 同步链路(宪章§1 同步铁律) +## 同步链路(宪章同步规则) 修改本目录任一文件时,**必须**联动检查: diff --git a/deploy/config/.mcp.json b/deploy/config/.mcp.json index a540ce2f..4b7cf75c 100644 --- a/deploy/config/.mcp.json +++ b/deploy/config/.mcp.json @@ -1,5 +1,5 @@ { - "_comment": "MCP 服务配置 — V1.2.0 6 件套 (主宪章 §16)。P2 #12 MCP client 完善后全部启用。", + "_comment": "MCP 服务配置 — 6 件套 。P2 #12 MCP client 完善后全部启用。", "mcpServers": { "filesystem": { "command": "npx", diff --git a/deploy/config/INDEX.md b/deploy/config/INDEX.md index 2d14ca50..90dacce0 100644 --- a/deploy/config/INDEX.md +++ b/deploy/config/INDEX.md @@ -29,7 +29,7 @@ - 真实凭据(`TEST_DB_PASSWORD` / `ZENTAO_PASSWORD` / `WECHAT_WEBHOOK_URL` 等)只放 `.env` 或 GitHub Secrets / Jenkins Credentials - 修改 `.env.example` 加新字段时,必须同步 `conftest.py::EnvConfig` 与 `docs/getting-started/配置清单.md` -## 同步链路(宪章§1 同步铁律) +## 同步链路(宪章同步规则) 修改本目录任一配置文件时,**必须**联动检查: diff --git a/deploy/config/conftest.py b/deploy/config/conftest.py index 0739e0a0..5437353a 100644 --- a/deploy/config/conftest.py +++ b/deploy/config/conftest.py @@ -37,7 +37,7 @@ for _utils_dir in _UTILS_CANDIDATES: if _utils_dir.is_dir() and str(_utils_dir) not in sys.path: sys.path.insert(0, str(_utils_dir)) - # utils 子目录也注入 — V1.x 重组后 utils/ 下 12 子目录 + # utils 子目录也注入 — 重组后 utils/ 下 12 子目录 for _sub in _utils_dir.iterdir(): if _sub.is_dir() and not _sub.name.startswith(("_", ".")): if str(_sub) not in sys.path: diff --git a/deploy/config/llm-providers.md b/deploy/config/llm-providers.md index 3dfc8bb8..ab591948 100644 --- a/deploy/config/llm-providers.md +++ b/deploy/config/llm-providers.md @@ -11,12 +11,12 @@ 1. 选厂商 (路径 A 内置 6 / 路径 B 兼容 5+) 2. 复制对应 export 3. `tagent demo` 验路由 -- **实测有效** (V1.0.0): Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79) +- **实测有效** : Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79) - **适用场景**: - 离线本地 = Ollama / Qwen - 国内合规 = 智谱 / 豆包 / 通义 - 性价比 = DeepSeek / Kimi - - 主备 fallback = 多 provider 接管 (见 §3) + - 主备 fallback = 多 provider 接管 (见 ) --- @@ -164,7 +164,7 @@ export TAGENT_LLM_API_KEY= --- -## 3 · .env 文件示例 +## 3 .env 文件示例 将选定厂商配置写入项目根 `.env` (从 `.env.example` 复制后填): diff --git a/deploy/config/mcp-server-impl.md b/deploy/config/mcp-server-impl.md index 1494247d..339ec4b0 100644 --- a/deploy/config/mcp-server-impl.md +++ b/deploy/config/mcp-server-impl.md @@ -32,7 +32,7 @@ MCP server 通常通过 stdio 与 client 通信,对外暴露 tools / resources ```python # zentao_mcp_server/__main__.py -"""禅道 MCP Server 骨架(默认 BugTracker 实现示例;Jira/GitHub/GitLab/Linear/Webhook 同骨架,主宪章 §12)""" +"""禅道 MCP Server 骨架""" import asyncio import json import logging diff --git a/deploy/config/requirements.txt b/deploy/config/requirements.txt index 37f6058c..b8428e3b 100644 --- a/deploy/config/requirements.txt +++ b/deploy/config/requirements.txt @@ -147,13 +147,13 @@ defusedxml==0.7.1 # [稳定层] XXE/亿笑/decompression bomb # - kubectl(K8s 混沌测试) # ============================================================ -# V1.1.0 运行时层(`runtime/`)依赖 — 全部 [可选] -# 仅启用 V1.1.0 运行时(AI 路由+Prefect+飞轮+FastAPI/CLI)时安装 +# 运行时层(`runtime/`)依赖 — 全部 [可选] +# 仅启用 运行时(AI 路由+Prefect+飞轮+FastAPI/CLI)时安装 # 完整列表见 runtime/pyproject.toml # ============================================================ # ===== [可选 · runtime 核心] ===== -mcp>=1.0.0 # V1.2.0 MCP SDK(6 件套 server + P2 #12 client): test-orchestrator/protocol-adapter/evidence-vault/defect-tracker/knowledge-base/compliance-checker +mcp>=1.0.0 # MCP SDK(6 件套 server + P2 #12 client): test-orchestrator/protocol-adapter/evidence-vault/defect-tracker/knowledge-base/compliance-checker # litellm>=1.55.0 # 多厂商 LLM 路由(Claude/OpenAI/Gemini/Qwen/DeepSeek/Ollama) # prefect>=2.20.0,<3.0 # 编排引擎(可选,Direct 执行器在 prefect 缺席时自动接管) # fastapi>=0.115.0 # HTTP 入口 diff --git a/deploy/config/templates/INDEX.md b/deploy/config/templates/INDEX.md index 86d3669d..a93dd116 100644 --- a/deploy/config/templates/INDEX.md +++ b/deploy/config/templates/INDEX.md @@ -1,4 +1,4 @@ -# 配置模板库索引(V1.0.0) +# 配置模板库索引 > `tagent init` 交互向导从本目录读取模板 + matrix · 5 分钟生成 `.env` + `tagent.yml` + `STARTUP.md`。 @@ -39,6 +39,6 @@ llm_providers: ## 相关 -- 主宪章 §5 多格式 I/O · §36 多端通知 canon · §37 BugTracker canon +- 多格式 I/O · 多端通知 canon · BugTracker canon - 上一级:[`../INDEX.md`](../INDEX.md) - 实现:[`../../runtime/init/INDEX.md`](../../../runtime/init/INDEX.md) diff --git a/deploy/config/templates/STARTUP.md.tpl b/deploy/config/templates/STARTUP.md.tpl index e7d294ed..0d3d2af2 100644 --- a/deploy/config/templates/STARTUP.md.tpl +++ b/deploy/config/templates/STARTUP.md.tpl @@ -45,7 +45,7 @@ tagent run "{{SAMPLE_TARGET}}" --mode learn |------|------| | `LLM 调用 raise` | 检查 API key + 网络;切 `TAGENT_LLM_PROVIDER=ollama` 离线兜底 | | `BugTracker 提交失败` | 占位没填或网络 / 权限错;不阻塞,但日报会少 | -| `通知没发出` | 至少配 1 个渠道(主宪章 §36);未配自动跳过 | +| `通知没发出` | 至少配 1 个渠道;未配自动跳过 | | `selftest n7 失败` | 装 python-docx:`pip install python-docx` | ## 7. 下一步 diff --git a/deploy/config/templates/base.env.tpl b/deploy/config/templates/base.env.tpl index 8a69ae05..d69117bb 100644 --- a/deploy/config/templates/base.env.tpl +++ b/deploy/config/templates/base.env.tpl @@ -15,10 +15,10 @@ TAGENT_LLM_PROVIDER_FALLBACK=ollama # ===== 平台 extras ===== {{PLATFORM_EXTRAS_BLOCK}} -# ===== BugTracker(主宪章 §37,默认 zentao,可换) ===== +# ===== BugTracker ===== {{BUG_TRACKER_ENV_BLOCK}} -# ===== 多端通知(主宪章 §36,任意 1 个生效即可) ===== +# ===== 多端通知 ===== {{NOTIFIER_ENV_BLOCK}} # ===== Test-Agent 运行时(通常不需改) ===== diff --git a/deploy/config/templates/base.tagent.yml.tpl b/deploy/config/templates/base.tagent.yml.tpl index 5ca4d4e9..430b4aa5 100644 --- a/deploy/config/templates/base.tagent.yml.tpl +++ b/deploy/config/templates/base.tagent.yml.tpl @@ -15,7 +15,7 @@ skills: bug_tracker: primary: {{BUG_TRACKER}} - # 多 tracker 并存(主宪章 §37):写成 [zentao, github],按 Bug label 路由 + # 多 tracker 并存:写成 [zentao, github],按 Bug label 路由 # extra: [github] notifiers: @@ -28,15 +28,15 @@ quality_gates: perf_p99_ms_max: 300 selftest: - # 主宪章 §33 自检铁律 + # 自检规则 pre_tag_required: true pass_threshold: 0.80 strict_on_release: true marketplace: - enabled: false # 默认关 · 主宪章 §30 safe-by-default + enabled: false # 默认关 -# ============== SAFETY GATES · safe-by-default(主宪章 §22 / §35 + W5 sprint v2) ============== +# ============== SAFETY GATES · safe-by-default ============== # 危险操作 / 自动化 / 影响生产 的功能 必须显式开启, 否则 destructive-guard 拒绝运行。 # 详见 SECURITY.md 武器化代码使用边界 + 测试工具准入控制 节。 @@ -62,7 +62,7 @@ gateway: # 例: enabled_platforms: [telegram, feishu] pentest: - # 法律契约(default refuse · charter §35); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 + # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, 激活后接入真实路由 authorized: false # 法律授权确认 scope_in_targets: [] # IP/domain/URL 白名单(IN) scope_out_targets: [] # 强制黑名单(覆盖 IN) diff --git a/deploy/config/templates/matrix.yaml b/deploy/config/templates/matrix.yaml index 88be4cf9..132d2548 100644 --- a/deploy/config/templates/matrix.yaml +++ b/deploy/config/templates/matrix.yaml @@ -1,4 +1,4 @@ -# tagent init 单源真理矩阵(V1.12.0) +# tagent init 单源真理矩阵 # # 测试类型 × 平台 × LLM provider × BugTracker × 通知渠道 → 生成 .env + tagent.yml + STARTUP.md # 加新选项:补这里 + 看 wizard 自动列出。 @@ -118,7 +118,7 @@ llm_providers: OLLAMA_HOST: "http://localhost:11434" model_hint: qwen2.5:7b -# 4. BugTracker(6 选,主宪章 §37) +# 4. BugTracker bug_trackers: zentao: label: 禅道(默认) @@ -160,7 +160,7 @@ bug_trackers: BUG_TRACKER: webhook BUG_WEBHOOK_URL: "" -# 5. 通知渠道(6 选,可多选,主宪章 §36) +# 5. 通知渠道 notifiers: wechat: label: 企业微信群机器人 diff --git a/deploy/marketplace/INDEX.md b/deploy/marketplace/INDEX.md index d0b7d519..81823ea3 100644 --- a/deploy/marketplace/INDEX.md +++ b/deploy/marketplace/INDEX.md @@ -1,4 +1,4 @@ -# marketplace 索引(主宪章 §30) +# marketplace 索引 > 对标 Claude Code 官方 marketplace · 4 lane 资源库:skills / agents / mcp / hooks > 用户按需下载;**4 关安全门必过**(签名 / prompt 扫 / 沙箱试跑 / darwin 评分) @@ -51,20 +51,20 @@ tagent verify # 单独跑沙箱验证 tagent uninstall # 卸(归档不删) ``` -## 4 关安全门(主宪章 §30 铁律) +## 4 关安全门 1. **签名校验**:SHA256 + 可选 GPG/ed25519 2. **全 prompt 扫描**:`runtime/scheduler/injection_scan.py` 复用,扫 skill 文本 3. **沙箱试跑**:`runtime/backends/docker.py` 在 Docker 内跑 24h 观察 4. **darwin 评分**:`darwin-skill` 评 ≥75 才放行 -任一不过 → 拒装 + 落 `decisions/`(§18-12) +任一不过 → 拒装 + 落 `decisions/`() ## 注意 - **不复制 Anthropic / OpenAI 源码**(品牌+协议红线) - **仅镜像 metadata + 链接**到上游 -- 卸载只**归档**到 `marketplace/.archive/`(§22 不可逆禁止) +- 卸载只**归档**到 `marketplace/.archive/` ## 配置 diff --git a/deploy/marketplace/_safety_policy.yaml b/deploy/marketplace/_safety_policy.yaml index a2eab841..b03f3c79 100644 --- a/deploy/marketplace/_safety_policy.yaml +++ b/deploy/marketplace/_safety_policy.yaml @@ -1,4 +1,4 @@ -# Marketplace 4 关安全门(主宪章 §30) +# Marketplace 4 关安全门 # 4 关任一不过 → 拒装 + 落 decisions/ gates: @@ -52,7 +52,7 @@ trust_tiers: # 卸载策略 uninstall: - archive_only: true # 主宪章 §22 决策不可逆禁止 + archive_only: true # 不可逆操作操作 archive_dir: marketplace/.archive/ # 紧急 kill switch diff --git a/deploy/marketplace/registry.json b/deploy/marketplace/registry.json index a8fc486a..f0649d3a 100644 --- a/deploy/marketplace/registry.json +++ b/deploy/marketplace/registry.json @@ -1,5 +1,5 @@ { - "_comment": "Marketplace registry · 4 lane(skills/agents/mcp/hooks)· 主宪章 §30. 初始为空,装时由 tagent install 写入.", + "_comment": "Marketplace registry · 4 lane(skills/agents/mcp/hooks) "version": "1.0", "last_updated": "2026-05-12", "entries": [] diff --git a/deploy/profiles/INDEX.md b/deploy/profiles/INDEX.md index f35b38fb..7232cb78 100644 --- a/deploy/profiles/INDEX.md +++ b/deploy/profiles/INDEX.md @@ -1,4 +1,4 @@ -# profiles/ 索引(V1.10.0) +# profiles/ 索引 > 行业合规 profile 配置 · 测试时按行业自动加载额外规则。 @@ -23,5 +23,5 @@ ## 相关 - 上一级:[`../README.md`](../../README.md) -- 主宪章 §17(九大簇维度边界)+ §25(渗透 & 安全)+ §26(车载 & 自动驾驶) +- (九大簇维度边界)+ (渗透 & 安全)+ (车载 & 自动驾驶) - 加载实现:`runtime/config/settings.py` profile 字段 diff --git a/deploy/profiles/compliance/INDEX.md b/deploy/profiles/compliance/INDEX.md index ae51ccb0..8b23d47f 100644 --- a/deploy/profiles/compliance/INDEX.md +++ b/deploy/profiles/compliance/INDEX.md @@ -1,7 +1,7 @@ # profiles/compliance 索引 > 行业合规规则库插槽。每个 YAML 文件 = 一个合规框架的检查项集。 -> 真规则由领域专家提供;本目录仅含**空载示例**,V1.2.0 起步。 +> 真规则由领域专家提供;本目录仅含**空载示例** ## 已提供示例(空载) @@ -35,5 +35,5 @@ checks: ## 接入方式 -L4 级被测项(charter §21 深度准则 L4)必须通过 `mcp-compliance-checker.check_compliance(profile, run_id)` 验证。 -真规则文件由领域专家+test-lead 双签签字后入库(charter §10 五条铭文 + §15 AgentChat 协议)。 +L4 级被测项(charter 深度准则 L4)必须通过 `mcp-compliance-checker.check_compliance(profile, run_id)` 验证。 +真规则文件由领域专家+test-lead 双签签字后入库(charter 五条铭文 + AgentChat 协议)。 diff --git a/docs/INDEX.md b/docs/INDEX.md index ebfa20c0..c614991e 100644 --- a/docs/INDEX.md +++ b/docs/INDEX.md @@ -8,7 +8,7 @@ |--------------|------|--------| | `STYLE.md` | 全仓 Markdown 样式约定(标题/加粗/列表/表格/代码块/链接/emoji/命名/中英混排) | 写文档前必看一次 | | `SURVEY.md` | 12 题用户调研模板(NPS + skill 使用率) | 想发用户问卷时 | -| `theory/` | **教学层 KB 13 大类**(主宪章 §23 §31) · `01-tools` ~ `13-build-your-own` | learn mode 推荐路径 | +| `theory/` | **教学层 KB 13 大类** · `01-tools` ~ `13-build-your-own` | learn mode 推荐路径 | | `charter/` | 愿景宪章 7 子文件(vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license) | 项目设计原理深读 | | `case-studies/` | 实施 case study(timeline + 决策 + metrics + 教训) | 想看真实项目演进决策 | | `assets/` | 演示素材 · `demo.recipe.md`(30 秒 demo 录制脚本)+ `terminalizer-config.yml` | 录演示视频时 | @@ -28,5 +28,5 @@ ## 相关 - 上一级:[`../README.md`](../README.md) -- 主宪章 §23(教学层准则)+ §31(KB 扩 13 大类) +- (教学层准则)+ (KB 扩 13 大类) - 样式约束:`.pre-commit-config.yaml` markdownlint hook(MD001/MD036) diff --git a/docs/MASTER_PLAN.md b/docs/MASTER_PLAN.md index 9f4d5cf8..e0318605 100644 --- a/docs/MASTER_PLAN.md +++ b/docs/MASTER_PLAN.md @@ -61,8 +61,8 @@ ### #7 Karpathy 诚实化 - **文件**: `README.md` + `README.zh-CN.md` — "8640 combinations" → "~12 common combinations tested in CI"; "95% aspirational" → "Coverage is broad but not exhaustive" -- **文件**: `00-项目导航.md` — 移除 9 处 "主宪章 §X" 引用 -- **文件**: `ROADMAP.md` — 移除 3 处 "主宪章" 引用 +- **文件**: `00-项目导航.md` — 移除 9 处 "X" 引用 +- **文件**: `ROADMAP.md` — 移除 3 处 "" 引用 - **文件**: `utils/generate_report.py` — `generate_test_report()` 143→30 行, 提取 6 helper - **文件**: `utils/mobile_driver.py` — `run_monkey()` 107→55 行, 提取 2 helper - **文件**: `runtime/router/llm_client.py` — `_stub_response()` 77 行 if/elif → dispatch table 8 条目 diff --git a/docs/STYLE.md b/docs/STYLE.md index 2bc183b5..e26f3edb 100644 --- a/docs/STYLE.md +++ b/docs/STYLE.md @@ -1,6 +1,6 @@ # 文档样式约定(STYLE.md) -> Test-Agent 全仓 Markdown 文档统一约定 · V1.10.0 起强制 · pre-commit markdownlint 卡 MD001 / MD036。 +> Test-Agent 全仓 Markdown 文档统一约定 · 起强制 · pre-commit markdownlint 卡 MD001 / MD036。 --- @@ -21,7 +21,7 @@ |------|-----| | 关键术语首次出现 | "采用 **遍历性检验**:失败能否重来" | | 表格表头(可选) | 表头单元格内的术语 | -| 警告 / 铁律前缀 | "**铁律**:敏感文件不入 repo" | +| 警告 / 规则前缀 | "**规则**:敏感文件不入 repo" | **禁止**: - 整段加粗(用 blockquote `>` 代替) diff --git a/docs/assets/demo-script-v1.12.md b/docs/assets/demo-script-v1.12.md index ea1cbc4a..b0d65133 100644 --- a/docs/assets/demo-script-v1.12.md +++ b/docs/assets/demo-script-v1.12.md @@ -1,4 +1,4 @@ -# 30 秒 demo · 录制脚本(V1.12 配置自动组装) +# 30 秒 demo · 录制脚本 > 目标:让观众在 30 秒内看到"从 0 到测试报告"完整链路 · 用于推特 / 微信视频号 / 掘金 / Hacker News > 录制工具:[Terminalizer](https://terminalizer.com) / [asciinema](https://asciinema.org) / OBS 屏幕录制 @@ -37,7 +37,7 @@ cat STARTUP.md | head -30 # Step 4 · 健康检查(秒过) tagent doctor --agents -# Step 5 · 跑 demo(V1.13 加 · 全 stub LLM 0 成本) +# Step 5 · 跑 demo tagent demo # Step 6 · 看产物(树形) @@ -84,7 +84,7 @@ terminalizer render demo --output docs/assets/demo.mp4 --quality 80 |------|------|------|------| | Twitter / X | 30 秒 | mp4 | "5 sec AI testing setup with `tagent init`. 16 experts, 32 skills, 8640 config combinations. github.com/Wool-xing/Test-Agent" | | 微信视频号 / 抖音 | 30-60 秒 | mp4 1080×1920 竖屏 | "AI 测试 5 秒上手 · 用例 + 思维导图 + Bug 单 + 报告一键产出 · GitHub 搜 Test-Agent" | -| 掘金 / V2EX / 少数派 | gif | terminalizer | 配文章:介绍 V1.12 配置自动组装 + 矩阵 8640 组合 + 5 preset | +| 掘金 / V2EX / 少数派 | gif | terminalizer | 配文章:介绍 配置自动组装 + 矩阵 8640 组合 + 5 preset | | Hacker News | 静态截图 + 链接 | png + url | 标题:"Test-Agent: AI testing framework with `tagent init` to scaffold 8640 configurations" | --- @@ -100,7 +100,7 @@ terminalizer render demo --output docs/assets/demo.mp4 --quality 80 --- -## 后续 V1.13 扩(若 demo 火) +## 后续 扩(若 demo 火) | 触发条件 | 加什么 | |----------|--------| @@ -112,6 +112,6 @@ terminalizer render demo --output docs/assets/demo.mp4 --quality 80 ## 相关 -- 项目宪章 §1 一键部署 · §38 配置自动组装 canon · §5 多格式 I/O +- 项目宪章 一键部署 · 配置自动组装 canon · 多格式 I/O - Terminalizer 配置:[`terminalizer-config.yml`](terminalizer-config.yml) -- 录制原 recipe:[`demo.recipe.md`](demo.recipe.md)(V1.7 起占位) +- 录制原 recipe:[`demo.recipe.md`](demo.recipe.md) diff --git a/docs/assets/terminalizer-config.yml b/docs/assets/terminalizer-config.yml index 781e71ad..87ef2d80 100644 --- a/docs/assets/terminalizer-config.yml +++ b/docs/assets/terminalizer-config.yml @@ -1,4 +1,4 @@ -# terminalizer 配置 · V1.14 30s Test-Agent demo +# terminalizer 配置 · 30s Test-Agent demo # 用法: # npm install -g terminalizer # terminalizer record demo --config docs/assets/terminalizer-config.yml @@ -7,7 +7,7 @@ command: bash scripts/_demo-commands.sh -cwd: . +cwd:. env: recording: true diff --git "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" index 703d6147..de89b063 100644 --- "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" +++ "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" @@ -1,20 +1,20 @@ -# Case Study: 文档诚实化 + 防 mock 闭环 (V1.14.0+1) +# Case Study: 文档诚实化 + 防 mock 闭环 > **时间**: 2026-05-13 → 2026-05-15 -> **范围**: V1.14.0 → V1.14.0+1 +> **范围**: → +1 > **PR**: #63 / #64 / #65 / #66 / #67 / #68 (合 6 PR) > **文件改**: 16 个 .md frontmatter + 4 文档 + 5 runtime + 11 文件 / 339 行加 > **验证**: pytest 23/23 pass · L2 selftest --e2e ✓ PASS 8/9 ok · CI 11 必修 ×3 PR 全绿 ## 1. 背景 -V1.14.0 起步时, README / ROADMAP 与代码实质之间存在 3 类 gap: +起步时, README / ROADMAP 与代码实质之间存在 3 类 gap: | Gap 类型 | 旧表述 | 实际状态 | |---|---|---| | Agent 数字 | "16 expert agents" | 16 个 .md 文件, 实装状态不明 (无 frontmatter 标注) | | Skill 数字 | "33 business skills + 3 meta-skills" | 实测 32 业务 + 3 meta (33 是误数) | -| Rollout 范围 | "类别 3 垂直领域 2 Agent (V1.x rollout)" | rollout 实跨三类别共 6 个 (env-manager + mobile-tester + visual-tester + system-tester + pentest-tester + automotive-tester) | +| Rollout 范围 | "类别 3 垂直领域 2 Agent (rollout)" | rollout 实跨三类别共 6 个 (env-manager + mobile-tester + visual-tester + system-tester + pentest-tester + automotive-tester) | | Mock 输出 | ROADMAP "防 mock 承诺" 仅文字声明 | expert 部分硬拒已落, **skill 完全无防 mock** — rollout skill 路由走 fallback no-op stdout `[no-op] skill 'X' has no canonical script; documented step recorded` 返 ok=True 假成功 | 用户反复强调: **修复 ≠ 削话术, 终态 = install + tagent demo + 真 LLM-driven agent 跑通**。 @@ -28,8 +28,8 @@ V1.14.0 起步时, README / ROADMAP 与代码实质之间存在 3 类 gap: ``` production ← 真 LLM-driven runner (orchestrator/agents/*.py) 已实装 script ← 真 script-backed (utils/*.py) 已实装 -rollout ← V1.x rollout 待实装 -vision ← V2.x 方法论参考 (仅 skill 有此状态) +rollout ← rollout 待实装 +vision ← 方法论参考 (仅 skill 有此状态) ``` PR #65 自纠错: 初版 4 个 agent 标注与 runtime/orchestrator/agents/ 实代码不一致 (e.g., env-manager 标 active 但无 agent runner), 多源核对 (runtime/ + ROADMAP + test-lead) 后校正。 @@ -42,8 +42,8 @@ PR #65 自纠错: 初版 4 个 agent 标注与 runtime/orchestrator/agents/ 实 |---|---|---| | `README.md` L44 | "16 expert agents" | "16 expert agents (5 production + 5 script + 6 rollout — see ROADMAP.md)" | | `README.md` L45 | "33 business skills + 3 meta-skills" | "32 business skills (7 production + 7 script + 16 rollout + 2 vision) + 3 meta-skills" | -| `agents/README.md` L47 | "类别 3:垂直领域 2 Agent (V1.x rollout)" | "类别 3:垂直领域扩展 2 Agent" + L54 显式列 6 rollout 全集 (跨三类别) | -| `ROADMAP.md` | 仅 6 expert rollout 节 | 加「当前活跃 14 skill」+「V1.x rollout 16 skill」+「V2.x vision 2 skill」三节 | +| `agents/README.md` L47 | "类别 3:垂直领域 2 Agent (rollout)" | "类别 3:垂直领域扩展 2 Agent" + L54 显式列 6 rollout 全集 (跨三类别) | +| `ROADMAP.md` | 仅 6 expert rollout 节 | 加「当前活跃 14 skill」+「rollout 16 skill」+「vision 2 skill」三节 | **关键诚实点**: 旧 "33 skill" 是真错 (实 32, 3 meta 单列), PR #66 一并修。 @@ -69,7 +69,7 @@ orchestrator.execute_node ← returncode=2 + stderr "未实 2. **`runtime/orchestrator/adapters/experts.py`**: **移除 hardcoded `EXPERT_IMPL_STATUS` dict** (单源迁移, 杜绝 PR #65 双源漂移教训复发); 加 `_get_impl_status(name, kind)` helper 直读 catalog; `execute_node` expert / skill 双分支统一防 mock — rollout / vision / unknown → `returncode=2` + stderr "未实装"。 3. **`runtime/router/router.py`**: `_validate_against_catalog` 改读 `catalog.lookup().impl_status` (不再 import hardcoded dict); expert / skill 双 kind 同检; rollout / vision / unknown 全标 issue + 降 confidence。 4. **`runtime/tests/test_impl_status_filter.py`** (新, 13 cases): registry 无 unknown / counts 校验 / router flag 4 类 + production 不误标 / orchestrator hard rc=2 (5 路径) + **反例 assert "documented step recorded" not in stdout** 验 rollout skill 不再 no-op 假成功。 -5. **`ROADMAP.md`**: rollout-6-expert table row 0 状态 `planned → done`; rollout-16-skill 前置改已完成; 防 mock 承诺节展开为双 layer 实装事实; 进度跟踪 +V1.14.0+1 (2026-05-15)。 +5. **`ROADMAP.md`**: rollout-6-expert table row 0 状态 `planned → done`; rollout-16-skill 前置改已完成; 防 mock 承诺节展开为双 layer 实装事实; 进度跟踪 ++1 (2026-05-15)。 #### 范围修订过程 @@ -82,7 +82,7 @@ X4 真正核心 = **skill 防 mock + 单源化**, 不是单纯 router 过滤。 ### Phase 4: stub 设计不一致根治 (PR #68) -X4 验时发现 pre-existing baseline 坏测试 `test_router_ends_with_report_generator`。根因: V1.14 把 `test-lead` 加到 stub 的 web-system path 末 (按主宪章 §40 "测试主管 — 协调 + 最终上线建议"), 但 mobile-app / desktop-app / ai-model / rest-api 4 path 未同步。 +X4 验时发现 pre-existing baseline 坏测试 `test_router_ends_with_report_generator`。根因: 把 `test-lead` 加到 stub 的 web-system path 末 , 但 mobile-app / desktop-app / ai-model / rest-api 4 path 未同步。 修法: 5 path 末统一 `test-lead` 决策 (与 `agents/README.md` L20-31 流程对齐): @@ -136,7 +136,7 @@ f6 假阳性 3 问全否后撤项。 ### 决策 4: stub 5 path 统一 test-lead (而非容错测试) -`test_router_ends_with_report_generator` 旧测试错, 不是 stub 错。改修 stub (V1.14 漏改 4 path 同步) + rename 测试, 与 `agents/README.md` 流程对齐。 +`test_router_ends_with_report_generator` 旧测试错, 不是 stub 错。改修 stub + rename 测试, 与 `agents/README.md` 流程对齐。 ## 5. 教训 / 复用 pattern @@ -151,13 +151,13 @@ f6 假阳性 3 问全否后撤项。 - 协作章程 (六道闸 + f1-f6): 项目内部文档 - utils env-var gate v2 范式 (5 utils 实战沉淀): 项目内部工程模式文档 - LLM-as-judge 弱模型 prompt 工程 (4 层 prompt + 顺序敏感 few-shot): 项目内部工程模式文档 -- 防 mock 闭环 4-step pattern: 本文 §3 闭环图, 待跨项目复用 +- 防 mock 闭环 4-step pattern: 本文 闭环图, 待跨项目复用 ## 7. 未做 / 后续 | 项 | 状态 | |---|---| -| X4.5 (production skill no-op) | NOT-A-BUG (本 case study §4 决策 3) | +| X4.5 (production skill no-op) | NOT-A-BUG (本 case study 决策 3) | | TG 跨项目迁移 (协作宪章 v6 + utils gate v2) | 待启 | | Mac 真机 install.sh 实测 | 待启 (Linux Docker 已通 PR #60/#61/#62) | -| V1.15 sprint (env-manager LLM-driven minimum viable) | 防 mock 前置已落, 可直接进入 expert 实装 | +| sprint (env-manager LLM-driven minimum viable) | 防 mock 前置已落, 可直接进入 expert 实装 | diff --git a/docs/case-studies/INDEX.md b/docs/case-studies/INDEX.md index 28dba174..9f5c8c3e 100644 --- a/docs/case-studies/INDEX.md +++ b/docs/case-studies/INDEX.md @@ -6,15 +6,15 @@ | 文件 | 时间 | 范围 | 要点 | |------|------|------|------| -| [2026-05-15-诚实化与防mock闭环.md](2026-05-15-诚实化与防mock闭环.md) | 2026-05-13 → 2026-05-15 | V1.14.0 → V1.14.0+1 (6 PR) | 16 expert + 32 skill frontmatter labeling → README/ROADMAP 数字诚实化 → registry 单源 + router warn + orchestrator hard block 4-step 闭环 | +| [2026-05-15-诚实化与防mock闭环.md](2026-05-15-诚实化与防mock闭环.md) | 2026-05-13 → 2026-05-15 | → +1 (6 PR) | 16 expert + 32 skill frontmatter labeling → README/ROADMAP 数字诚实化 → registry 单源 + router warn + orchestrator hard block 4-step 闭环 | ## 复用导引 -- 看「**多源核对纪律**」→ 2026-05-15 §5 教训 1 -- 看「**单源化优于双源**」→ 2026-05-15 §5 教训 2 -- 看「**一文件一审 + f1-f6**」→ 2026-05-15 §5 教训 3 (协作章程) -- 看「**范围修订要敢承认错估**」→ 2026-05-15 §5 教训 4 -- 看「**防 mock 闭环 4-step pattern**」→ 2026-05-15 §3 闭环图 +- 看「**多源核对纪律**」→ 2026-05-15 教训 1 +- 看「**单源化优于双源**」→ 2026-05-15 教训 2 +- 看「**一文件一审 + f1-f6**」→ 2026-05-15 教训 3 (协作章程) +- 看「**范围修订要敢承认错估**」→ 2026-05-15 教训 4 +- 看「**防 mock 闭环 4-step pattern**」→ 2026-05-15 闭环图 ## 相关 diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md index 9be1f7b3..e7cc7a00 100644 --- a/docs/charter/01-vision-dimensions.md +++ b/docs/charter/01-vision-dimensions.md @@ -6,7 +6,7 @@ ## 🏛️ 项目宪章(灵魂底色) -> 三公理 + 五条铭文 + 工程映射 + V1.0.0 锁死 + 双签解锁条件 — 已迁入主宪章 §10(memory `project_test_agent_workflow.md`),FULL_GUIDE 不再重复维护。 +> 三公理 + 五条铭文 + 工程映射 + 锁死 + 双签解锁条件 — 已迁入(memory `project_test_agent_workflow.md`),FULL_GUIDE 不再重复维护。 --- @@ -33,14 +33,14 @@ ## 🌌 维度全图(九大簇 · Agent 看世界的方式) > 工程矩阵之下的认知地图——回答"测试 Agent 到底需要哪些维度的能力"。各簇能力的工程落点散布在「核心特性」「全链路覆盖矩阵」「关键模块清单」中。 -> **接入策略**:簇 1-2 为 V1.0.0 主交付;簇 3-5 部分进入工程矩阵;簇 6-9 多数列入 Phase 2-4 路线图,**承认存在但不假装能立刻交付**——符合第三公理。 +> **接入策略**:簇 1-2 为 主交付;簇 3-5 部分进入工程矩阵;簇 6-9 多数列入 Phase 2-4 路线图,**承认存在但不假装能立刻交付**——符合第三公理。 -### 簇 1 · 工程与架构层(V1.0.0 主体) +### 簇 1 · 工程与架构层 - 七阶段工作流:需求理解 → 用例生成 → 执行 → 观测 → 根因 → 反馈 → 治理 - 自动化工具栈、Agent 协作协议、用户交互界面 - 决策回放器、停机演练、可观测性递归 -### 簇 2 · 认知暗物质层(V1.0.0 部分 + Phase 3 补全) +### 簇 2 · 认知暗物质层 - 认知债务(被遗忘的 Why) - 跨系统嗅觉(上下游气味相投) - 沉默故障(不报警的恶化) @@ -53,7 +53,7 @@ - 制度性愚蠢抗体 - 生态位"暗杀"攻击建模 -### 簇 4 · 抽象与元层(V1.0.0 部分) +### 簇 4 · 抽象与元层 - 预兆感知(弱信号 + 拓扑同调) - 反目标函数(对测试本身的测试) - 语言游戏(语义歧义放大器) @@ -61,7 +61,7 @@ - 测试热寂与熵减祭司 - 本体论测试(数字孪生 vs 物理承诺) -### 簇 5 · 行业元逻辑层(V1.0.0 参照表 + Phase 2 选定 MVP) +### 簇 5 · 行业元逻辑层 - 金融=承诺守恒、医疗=伤害可逆、法律=边界例外 - 教育=认知脚手架、农业=优雅降级、艺术=避免审查官 - 自动驾驶/机器人=物理承诺 @@ -99,7 +99,7 @@ ## 🎭 关键模块清单(测试 Agent 的工具箱) > 每个模块对应一个 utils 或 skill 的工程落点;划分到对应簇便于追溯认知来源。 -> **Phase 标注**:✅ V1.0.0 已交付;⚪ Phase 2-4 路线图;❌ Phase 4-5 概念阶段。 +> **Phase 标注**:✅ 已交付;⚪ Phase 2-4 路线图;❌ Phase 4-5 概念阶段。 | 模块 | 职能 | 所属簇 | 工程落点 | 阶段 | |------|------|--------|----------|------| diff --git a/docs/charter/03-agentchat-protocol.md b/docs/charter/03-agentchat-protocol.md index 2a5d53e8..861f39e4 100644 --- a/docs/charter/03-agentchat-protocol.md +++ b/docs/charter/03-agentchat-protocol.md @@ -67,8 +67,8 @@ agent 在三种情况**必须停下反问用户**,不允许猜: - 反问全部落档到 `discussions/{date}_clarifications.md` - 同一会话内不重复问已澄清过的同一术语 -**不做的事(Via Negativa 显式标注)**:V1.0.0 **不构建反问知识库(KB)**——不做 embedding 向量库、不做半结构化匹配引擎、不做语义检索。所有反问纪要落 `discussions/` 后由 test-lead 在新任务前**人工查阅**类似场景。 -- **为什么不做**:(a) V1.0.0 时期数据量不足(< 100 条反问);(b) 反问的"是否还有效"依赖项目阶段,自动复用可能传递过期判断;(c) 投入 KB 工程 ≠ 提升决策质量 +**不做的事(Via Negativa 显式标注)**:**不构建反问知识库(KB)**——不做 embedding 向量库、不做半结构化匹配引擎、不做语义检索。所有反问纪要落 `discussions/` 后由 test-lead 在新任务前**人工查阅**类似场景。 +- **为什么不做**:(a) 时期数据量不足(< 100 条反问);(b) 反问的"是否还有效"依赖项目阶段,自动复用可能传递过期判断;(c) 投入 KB 工程 ≠ 提升决策质量 - **现状更新(2026-05-16)**:discussions/ 累计反问 + 讨论纪要已超 200 条,进入 Phase 2 重新评估区间。详见 [06-test-architecture.md](06-test-architecture.md) Phase 2 触发条件 - **未来开案条件**:若需要开放反问 KB,须由 test-lead + 独立伦理责任人**双签**立项 @@ -130,6 +130,6 @@ agent 在三种情况**必须停下反问用户**,不允许猜: --- ``` -**铁律**:争议未落档 → 不允许 Bug 单关闭、不允许测试报告签发、不允许测试计划评审通过。 +**规则**:争议未落档 → 不允许 Bug 单关闭、不允许测试报告签发、不允许测试计划评审通过。 --- diff --git a/docs/charter/04-skills-bugtracker.md b/docs/charter/04-skills-bugtracker.md index 749422c4..40f5c6c2 100644 --- a/docs/charter/04-skills-bugtracker.md +++ b/docs/charter/04-skills-bugtracker.md @@ -50,7 +50,7 @@ darwin-skill 跑出的改进建议**不绕过协作协议**——重大改动( ### 6. 不做的事(Via Negativa 显式标注) -V1.0.0 darwin-skill **不消费**项目运行数据(`discussions/` / `decisions/` / `history/` / `skill-evolution/results.tsv` 之外的运行历史),仅对 skill 文本结构本身做静态 + 实测评分优化。 +darwin-skill **不消费**项目运行数据(`discussions/` / `decisions/` / `history/` / `skill-evolution/results.tsv` 之外的运行历史),仅对 skill 文本结构本身做静态 + 实测评分优化。 **为什么不做"运行数据反哺 skill"的自学习闭环**: 1. 自学习难界定何时停止学习"坏样本"(如一段时期的高 flaky 反而被学进 skill 形成自我固化) @@ -69,7 +69,7 @@ V1.0.0 darwin-skill **不消费**项目运行数据(`discussions/` / `decision | 适配器 | 状态 | 配置字段 | severity 映射 | |--------|------|---------|--------------| -| **zentao**(默认) | ✅ V1.0.0 | `ZENTAO_URL / ZENTAO_USER / ZENTAO_TOKEN` | severity 1=P0 / 2=P1 / 3=P2 / 4=P3 | +| **zentao**(默认) | ✅ | `ZENTAO_URL / ZENTAO_USER / ZENTAO_TOKEN` | severity 1=P0 / 2=P1 / 3=P2 / 4=P3 | | **jira** | ⚪ Phase 2 | `JIRA_URL / JIRA_USER / JIRA_TOKEN / JIRA_PROJECT_KEY` | Highest=P0 / High=P1 / Medium=P2 / Low=P3 | | **github** | ⚪ Phase 2 | `GITHUB_TOKEN / GITHUB_REPO` | label `priority:p0..p3` | | **linear** | ⚪ Phase 2 | `LINEAR_API_KEY / LINEAR_TEAM_ID` | priority 1=P0 / 2=P1 / 3=P2 / 4=P3 | diff --git a/docs/charter/05-install-deploy.md b/docs/charter/05-install-deploy.md index 29632a77..9fc38790 100644 --- a/docs/charter/05-install-deploy.md +++ b/docs/charter/05-install-deploy.md @@ -236,7 +236,7 @@ cursor # Cursor ## 🔐 闭环约定(设计原则) -> 18 条全栈闭环约定(数据/cov/重试/severity/error_rate/基线/门禁/MCP/prod禁/Flaky/铭文/决策追溯/三筐/修改四关/工具兼容/纪要不可删/自进化棘轮/依赖补装)— 已迁入主宪章 §19,FULL_GUIDE 不再重复维护。 +> 18 条全栈闭环约定(数据/cov/重试/severity/error_rate/基线/门禁/MCP/prod禁/Flaky/铭文/决策追溯/三筐/修改四关/工具兼容/纪要不可删/自进化棘轮/依赖补装)— 已迁入,FULL_GUIDE 不再重复维护。 --- diff --git a/docs/charter/06-test-architecture.md b/docs/charter/06-test-architecture.md index 8741f1c1..7cb518e1 100644 --- a/docs/charter/06-test-architecture.md +++ b/docs/charter/06-test-architecture.md @@ -133,7 +133,7 @@ | Phase | 触发条件 | 标志性交付 | |------|---------|----------| -| **Phase 1**(已完成 V1.0.0-V1.36.0) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config | +| **Phase 1**(已完成 -) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config | | **Phase 2** | utils 单测覆盖 ≥ 60% 且团队 ≥ 5 人 | 契约链路串通 + 门禁引擎 yaml 抽象 + 反问 KB 重新评估 + skill rollout 继续 | | **Phase 3** | Phase 2 全交付 + 接入 ≥ 2 行业 | 合成监控 + canary/feature flag + 统一 dashboard + 沉默故障 + 缺席者注入 | | **Phase 4** | 接入合规行业(金融/医疗/司法)| 证据链司法可采信打包 + 数字考古学家 + AI 测试深化 | @@ -156,10 +156,10 @@ | **证据链 / 司法可采信打包** | 散落 | Phase 4 | 决策日志 + 模型版本 + 数据集 → 标准送审包 | | **数字考古学家**(遗留系统初始假设回溯) | 缺 | Phase 4 | 知识图谱冷启动 + Why 数据库 | | **神圣性守护 + 禁忌矩阵** | 缺 | Phase 5 | 跨文化禁忌词/色/数/节日组合(本地化共建) | -| **darwin-skill 集成(自进化)** | ✅ V1.0.0 已并入 | Phase 1 | 上游 SKILL.md + workspace 落 results.tsv + 季度同步 | -| **Bug Tracker 多适配** | ✅ V1.0.0 已并入 | Phase 1 | 5 套适配器(zentao/jira/github/linear/webhook)+ 工厂模式 | -| **AgentChat 协作协议** | ✅ V1.0.0 已并入 | Phase 1 | discussions/ 纪要 + test-lead 中枢路由 + 反问 3 级预算 | -| **按需安装与依赖分层** | ✅ V1.0.0 已并入 | Phase 1 | 6 requirements 文件 + install.py + 运行时补装回路 | +| **darwin-skill 集成(自进化)** | ✅ 已并入 | Phase 1 | 上游 SKILL.md + workspace 落 results.tsv + 季度同步 | +| **Bug Tracker 多适配** | ✅ 已并入 | Phase 1 | 5 套适配器(zentao/jira/github/linear/webhook)+ 工厂模式 | +| **AgentChat 协作协议** | ✅ 已并入 | Phase 1 | discussions/ 纪要 + test-lead 中枢路由 + 反问 3 级预算 | +| **按需安装与依赖分层** | ✅ 已并入 | Phase 1 | 6 requirements 文件 + install.py + 运行时补装回路 | > **第三公理在此节兑现**:项目有意识地**少承诺**——文明级伦理议题(如缓慢暴力、末日哨兵、神圣性守护)我们承认其存在,但**不在工程路线图上假装能做**。如果未来接入特定行业(金融 / 医疗 / 司法)需要其中某项能力,由业务方按需单独立项,不绑进通用框架。 @@ -204,12 +204,12 @@ | # | 议题 | | | |---|------|---------|------| | | | | | -| Q2 | Agent 架构:单体 vs 专 | | V1.0.0 选专科 + test-lead 中枢 | -| Q3 | 五条铭文的技术实现机制(不可变区域、熔断条件)? | 🔄 | V1.0.0 铭文锁死,无削弱机制;Phase 4 接入合规行业后重新设计 | +| Q2 | Agent 架构:单体 vs 专 | | 选专科 + test-lead 中枢 | +| Q3 | 五条铭文的技术实现机制(不可变区域、熔断条件)? | 🔄 | 铭文锁死,无削弱机制;Phase 4 接入合规行业后重新设计 | | Q4 | 独立审计署的法律实体形态? | ⏳ | 触发条件:团队 ≥ 20 人 或 接入合规行业 | | Q5 | 末日哨兵权的触发授权链? | ⏳ | 需监管/学界共识,Phase 4 | | | | | | -| Q7 | 团队最小配置(工程/行业专家/伦理责任人)? | ⏳ | V1.0.0 单人可启动;剥离伦理责任人需 ≥ 20 人 | +| Q7 | 团队最小配置(工程/行业专家/伦理责任人)? | ⏳ | 单人可启动;剥离伦理责任人需 ≥ 20 人 | | Q8 | 与现有 AI 测试平台(Mabl / Applitools / Functionize)的差异化定位? | ⏳ | 候选定位:「承诺学科 + 伦理护栏 + 行业隐喻先行」 | --- diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md index 229cae6b..cd0dd55e 100644 --- a/docs/charter/07-runtime-license.md +++ b/docs/charter/07-runtime-license.md @@ -4,9 +4,9 @@ 内容与原 FULL_GUIDE.md 对应段完全一致, 仅拆不动语义。 --> -## 🧠 V1.36.0 运行时层(`runtime/`) +## 🧠 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill / 79 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 +> 已有 16 专家 / 32 Skill / 79 utils**不动**(宪章规则),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 > 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。 ### 模块拓扑 @@ -113,17 +113,17 @@ uvicorn runtime.api.main:app --port 8800 | 79 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | | `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` | -任何专家/Skill/脚本**新增**或**修改**仍按宪章 §1 同步铁律走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 -V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增执行能力,详见 [ROADMAP.md](../../ROADMAP.md)。 +任何专家/Skill/脚本**新增**或**修改**仍按宪章 同步规则走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 ++ 真 LLM-driven agent runner + + SkillRunner 系统为 runtime 新增执行能力,详见 [ROADMAP.md](../../ROADMAP.md)。 --- ## 📜 LICENSE / CHANGELOG / CONTRIBUTING / SECURITY - **LICENSE**:MIT(详见 [`LICENSE`](../../LICENSE)) -- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md)(V1.43.0 / Phase 3+4+5 落版 + 32/32 skill active 全 production) +- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md) - **VERSION**:详见 [`VERSION`](../../VERSION) -- **CONTRIBUTING**:详见 [`CONTRIBUTING.md`](../../CONTRIBUTING.md)(含同步铁律 + RACI 矩阵) +- **CONTRIBUTING**:详见 [`CONTRIBUTING.md`](../../CONTRIBUTING.md)(含同步规则 + RACI 矩阵) - **SECURITY**:详见 [`SECURITY.md`](../../SECURITY.md)(漏洞报告流程 + GitHub Security Advisories 入口) - **CODE_OF_CONDUCT**:详见 [`CODE_OF_CONDUCT.md`](../../CODE_OF_CONDUCT.md)(基于 Contributor Covenant 2.1) @@ -133,17 +133,17 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 当前阶段(最后更新:2026-05-18) -- **Phase**:Phase 2 前期(V1.36.0 · expert rollout 收尾 + skill rollout 全 16/16 完成) +- **Phase**:Phase 2 前期(expert rollout 收尾 + skill rollout 全 16/16 完成) - **关键已交付**:16 expert (11p+5s) · 32 skill (23p+7s+0r+2v) · AgentChat · Bug 多适配 · 按需安装 · darwin-skill · MCP 6 件套 · Marketplace · 教学层 · 多 LLM config · 16 SkillRunner 全落地 -- **活跃 PR**:#124-#127 merged(V1.34-V1.36, 2026-05-18) +- **活跃 PR**:#124-#127 merged ### 历史关键决议 - 2026-05-11:宪章四章 + 三公理 + 五铭文起草完成 - 2026-05-11:FULL_GUIDE.md 确立永久宪章地位 -- 2026-05-11:darwin-skill 不消费运行数据(Via Negativa);反问 KB 不进 V1.0.0 -- 2026-05-12 ~ 2026-05-14:V1.1-V1.14 runtime 层 + 教学层 + Marketplace + 渗透/车载 + Hermes + GBrain + Karpathy + ECC -- 2026-05-15 ~ 2026-05-18:V1.15-V1.36 22 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile + 65发现全修 +- 2026-05-11:darwin-skill 不消费运行数据(Via Negativa);反问 KB 不进 +- 2026-05-12 ~ 2026-05-14:-runtime 层 + 教学层 + Marketplace + 渗透/车载 + Hermes + GBrain + Karpathy + ECC +- 2026-05-15 ~ 2026-05-18:-22 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile + 65发现全修 ### 下次会话进入项目时,按顺序检查 @@ -155,8 +155,8 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 来源与引用(认知史) - 第一至五轮(DeepSeek + Claude):测试 Agent 架构 + 九大簇 -- V1.0.0 工程基线:14 agent + 14 skill + 76 utils + CI/CD(历史基线) -- V1.1.0 ~ V1.36.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) +- 工程基线:14 agent + 14 skill + 76 utils + CI/CD(历史基线) +- ~ :runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) - 永久宪章糅合(2026-05-11/14/16):FULL_GUIDE 工程主体 + 全局记忆哲学维度 + 持续回写 --- diff --git a/docs/charter/INDEX.md b/docs/charter/INDEX.md index 5445d495..a931553e 100644 --- a/docs/charter/INDEX.md +++ b/docs/charter/INDEX.md @@ -11,7 +11,7 @@ | 04 | [skills-bugtracker](04-skills-bugtracker.md) | Skills 自进化机制 + Bug Tracker 多适配器 | ~103 | | 05 | [install-deploy](05-install-deploy.md) | 按需安装 + 架构图 + 快速开始 + 工作流 + 技术栈 + 闭环 + 升级 + 协作 + 跨 AI | ~311 | | 06 | [test-architecture](06-test-architecture.md) | 测试架构深度 + 关键反问 + 开放问题 + 术语表 | ~240 | -| 07 | [runtime-license](07-runtime-license.md) | V1.1.0 运行时层 + LICENSE / CHANGELOG / 项目当前状态 | ~162 | +| 07 | [runtime-license](07-runtime-license.md) | 运行时层 + LICENSE / CHANGELOG / 项目当前状态 | ~162 | ## 维护原则 diff --git "a/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" "b/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" index 93b9e9af..799d1e5b 100644 --- "a/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" +++ "b/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" @@ -233,7 +233,7 @@ workspace/测试报告/{项目名}/baselines/perf_baseline.json ← 性能基 --- -## V1.1.0 · 运行时层交付物 +## · 运行时层交付物 | 场景 | 路径 / 出口 | 来源 | |------|------------|------| diff --git "a/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" index 3b7a8712..71c2b5e8 100644 --- "a/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -654,9 +654,9 @@ python -m utils.jmeter_result_parser \ --- -## V1.1.0 · 运行时 CLI/API 用法 +## · 运行时 CLI/API 用法 -> 运行时层 `runtime/` 是 V1.1.0 新增。已有 16 专家 / 32 Skill / 79 脚本不动,本层仅作可执行调度。 +> 运行时层 `runtime/` 是 新增。已有 16 专家 / 32 Skill / 79 脚本不动,本层仅作可执行调度。 ### CLI 命令速查 @@ -718,11 +718,11 @@ TAGENT_LLM_PROVIDER=ollama python -m runtime.cli.main run "..." # 本地 TAGENT_LLM_PROVIDER=stub python -m runtime.cli.main run "..." # 测试,不出网 ``` -### 与 V1.0.0 Claude Code 模式的关系 +### 与 Claude Code 模式的关系 | 模式 | 适合 | 入口 | |------|------|------| -| **V1.0.0 Claude Code 工作流** | 团队跟 Claude Code 互动测试,深度对话 | `claude` → `/smoke-test` | -| **V1.1.0 runtime 模式** | 自动化跑批,CI 集成,Web UI 接入 | `tagent run` 或 `POST /run/*` | +| **Claude Code 工作流** | 团队跟 Claude Code 互动测试,深度对话 | `claude` → `/smoke-test` | +| **runtime 模式** | 自动化跑批,CI 集成,Web UI 接入 | `tagent run` 或 `POST /run/*` | 两种模式共享同一份 `agents/` `skills/` `utils/`,无冗余。 diff --git "a/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" index 030c31a8..a644a55d 100644 --- "a/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -273,7 +273,7 @@ TEMPLATE_DIR="${TEMPLATE_DIR:-$(pwd)}" PROJECT_ROOT="${1:-$(pwd)/test-project}" echo "==========================================" -echo " Test-Agent 工作流部署 V1.36.0" +echo " Test-Agent 工作流部署 " echo " 模板目录: $TEMPLATE_DIR" echo " 项目目录: $PROJECT_ROOT" echo "==========================================" @@ -636,9 +636,9 @@ python -c "from utils.api_retry_util import call_with_retry; print('utils OK')" --- -## V1.1.0 · 运行时层 `runtime/` 部署 +## · 运行时层 `runtime/` 部署 -> 运行时层是可选的(用户也可只用 16 专家+32 Skill+79 脚本的 V1.0.0 工作流模式)。 +> 运行时层是可选的(用户也可只用 16 专家+32 Skill+79 脚本的 工作流模式)。 > 想要 HTTP/CLI 一键跑、AI 路由、飞轮存储,启它。 ### 1. 起本地依赖(Docker) diff --git "a/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" "b/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" index e1d60c14..bc11d16c 100644 --- "a/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" +++ "b/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md" @@ -407,7 +407,7 @@ pip install -U -r requirements.txt --- -## V1.1.0 · 运行时层配置(`runtime/`) +## · 运行时层配置(`runtime/`) ### `.env` 增项 diff --git a/docs/theory/03-foundation/test-pyramid-2024.zh.md b/docs/theory/03-foundation/test-pyramid-2024.zh.md index 77d9c349..c0901d02 100644 --- a/docs/theory/03-foundation/test-pyramid-2024.zh.md +++ b/docs/theory/03-foundation/test-pyramid-2024.zh.md @@ -10,11 +10,11 @@ authority: - "Mike Cohn 2009《Succeeding with Agile》ch.16(原版金字塔)" - "Martin Fowler https://martinfowler.com/articles/practical-test-pyramid.html" - "Google Testing Blog 2024 doc on pyramid" - - ISO/IEC 25010 §4 Quality Model + - ISO/IEC 25010 Quality Model confidence: high last_reviewed: 2026-05-12 reviewer: agent-curator -when_to_use: 任何项目测试架构设计前;主宪章 §17 落点 +when_to_use: 任何项目测试架构设计前;落点 common_pitfall: - "70/20/10 一刀切——按变更频率+阻塞代价重新分布才合理" - "把视觉回归当成独立层——它属 E2E" @@ -55,4 +55,4 @@ reading_en: - 单元最便宜最快 → 多写 - E2E 最贵最脆 → 少写 - 契约层填补微服务断点 → 必有 -- 变异分数 ≠ 覆盖率(主宪章 §21 横切准则)→ 用例质量反验 +- 变异分数 ≠ 覆盖率→ 用例质量反验 diff --git a/docs/theory/04-strategy/shift-left.zh.md b/docs/theory/04-strategy/shift-left.zh.md index 6848d151..94ff8301 100644 --- a/docs/theory/04-strategy/shift-left.zh.md +++ b/docs/theory/04-strategy/shift-left.zh.md @@ -13,7 +13,7 @@ authority: confidence: high last_reviewed: 2026-05-12 reviewer: agent-curator -when_to_use: 任何项目;主宪章 §17 测试架构 7 层左移 +when_to_use: 任何项目;测试架构 7 层左移 common_pitfall: - "只测开发末端 → 修复成本爆炸" - "需求阶段不评审 → 测试用例无所依" diff --git a/docs/theory/05-methods/equivalence-partitioning.zh.md b/docs/theory/05-methods/equivalence-partitioning.zh.md index 2ddd0da1..76e06499 100644 --- a/docs/theory/05-methods/equivalence-partitioning.zh.md +++ b/docs/theory/05-methods/equivalence-partitioning.zh.md @@ -56,5 +56,5 @@ reading_en: - 与边界值配合,90% 输入缺陷可覆盖 ## 反模式 -- 漏无效类(只测好路径)= **测试不诚信**(主宪章 §21 取舍铁律) +- 漏无效类(只测好路径)= **测试不诚信** - 类切太细 = 用例数爆炸,失去抽象价值 diff --git a/docs/theory/06-protocols/http-https.zh.md b/docs/theory/06-protocols/http-https.zh.md index 5b540cc9..cdd589e4 100644 --- a/docs/theory/06-protocols/http-https.zh.md +++ b/docs/theory/06-protocols/http-https.zh.md @@ -56,7 +56,7 @@ Web/API 测试**必经协议**。Test-Agent `utils/api_retry_util.py` 提供 10/ | 性能 | TTFB / P95 / TPS(JMeter / k6) | | 安全 | OWASP API Top 10 / TLS 配置 | | 幂等性 | Idempotency-Key 重复提交 | -| 重试 | 指数退避(主宪章 §18-3 全栈 10/20/40s) | +| 重试 | 指数退避 | ## 为什么 Agent 默认调 HTTP? 被测物 = Web/REST/GraphQL/Webhook/SOAP → 全部跑在 HTTP 之上;Agent 用 `runtime/mcp/protocol_adapter` 的 HTTP adapter 做协议层抽象。 diff --git a/docs/theory/08-gates/flaky-vs-reruns.zh.md b/docs/theory/08-gates/flaky-vs-reruns.zh.md index b903d2de..7e05abeb 100644 --- a/docs/theory/08-gates/flaky-vs-reruns.zh.md +++ b/docs/theory/08-gates/flaky-vs-reruns.zh.md @@ -9,7 +9,7 @@ one_liner_en: No reruns at smoke (preserve flaky signal); reruns at regression ( authority: - "Google Testing Blog: Flaky Tests at Google and How We Mitigate Them" - "pytest-rerunfailures docs" - - ISTQB Advanced Test Manager §6 Risk-based Testing + - ISTQB Advanced Test Manager Risk-based Testing confidence: high last_reviewed: 2026-05-12 reviewer: agent-curator diff --git a/docs/theory/12-process/bug-lifecycle.zh.md b/docs/theory/12-process/bug-lifecycle.zh.md index 9f78b9b0..8ef2db16 100644 --- a/docs/theory/12-process/bug-lifecycle.zh.md +++ b/docs/theory/12-process/bug-lifecycle.zh.md @@ -26,7 +26,7 @@ example: | └→ Deferred reopen: Closed → Open(失败回归触发) - 严重度(技术影响):1=P0 / 2=P1 / 3=P2 / 4=P3(主宪章 §18-4) + 严重度(技术影响):1=P0 / 2=P1 / 3=P2 / 4=P3 优先级(业务紧急):同样四级,但**与严重度独立** related_to: [rca-5why-8d-fishbone, bug-tracker-adapters] --- @@ -40,7 +40,7 @@ Test-Agent **统一权威**:`utils/bug_severity_map.py` 跨 5 adapter(zentao/jir - **优先级**:修复紧急度(老板用 vs 客户偶遇) - **两者独立**:可能"P3 优先级 + P0 严重度"(数据丢失但只影响 1% 用户) -## RCA 标准方法(主宪章 §21 流程阶段 6) +## RCA 标准方法 - 5Why:连问 5 个为什么 - 8D:8 步纪律(团队+护栏+遏制+根因+永久解+预防) - Fishbone:鱼骨图分类(人/机/料/法/环/测) diff --git a/docs/theory/13-build-your-own/INDEX.md b/docs/theory/13-build-your-own/INDEX.md index 1a0ff686..b3ab4aa2 100644 --- a/docs/theory/13-build-your-own/INDEX.md +++ b/docs/theory/13-build-your-own/INDEX.md @@ -19,10 +19,10 @@ ## 横切准则 -- 每卡 `estimated_time_hours` 必填(防 §27 原则 4 时间陷阱) -- confidence 默认 `medium`(tutorial 质量参差,§23 KB) +- 每卡 `estimated_time_hours` 必填(防 原则 4 时间陷阱) +- confidence 默认 `medium`(tutorial 质量参差 - 引用 URL 必带 1 句摘要(防链接失效) -- `essence_only` 标:不自动提议入 Test-Agent(§29 policy) +- `essence_only` 标:不自动提议入 Test-Agent(policy) ## 不收录 diff --git a/docs/theory/13-build-your-own/byox-bot.zh.md b/docs/theory/13-build-your-own/byox-bot.zh.md index a552fd9e..37e10c4a 100644 --- a/docs/theory/13-build-your-own/byox-bot.zh.md +++ b/docs/theory/13-build-your-own/byox-bot.zh.md @@ -24,8 +24,8 @@ reading_en: ["https://core.telegram.org/bots/api"] # 对测试工作 -- **gateway 测试**(本项目 §22 runtime/gateway 8 平台):理解 webhook 校验 + 限流 + retry +- **gateway 测试**(本项目 runtime/gateway 8 平台):理解 webhook 校验 + 限流 + retry - **消息回调测试**:测平台超时 / 重试策略 -- **scheduler + bot**(§22):懂 webhook 才能测自动化日报推送 +- **scheduler + bot**():懂 webhook 才能测自动化日报推送 - **垃圾消息防御**:bot 必测 rate-limit + 签名校验 - **bot 模拟器**:用 from-scratch bot 当测试 mock diff --git a/docs/theory/13-build-your-own/byox-programming-language.zh.md b/docs/theory/13-build-your-own/byox-programming-language.zh.md index 2dc74447..3b57f167 100644 --- a/docs/theory/13-build-your-own/byox-programming-language.zh.md +++ b/docs/theory/13-build-your-own/byox-programming-language.zh.md @@ -27,4 +27,4 @@ reading_en: ["https://craftinginterpreters.com/"] - **DSL 测试**:测试配置语言 / 业务规则引擎 - **编译器 bug**:测试代码生成正确性(差分测试) - **解析器测试**:本项目 `runtime/router/schema.py` 用 Pydantic 解析 LLM JSON,理解 grammar 才能写好 schema -- **§28 ECC agent-introspection-debugging** 调试 LLM 输出 = 解析 LLM 类自然语言"DSL" +- **ECC agent-introspection-debugging** 调试 LLM 输出 = 解析 LLM 类自然语言"DSL" diff --git a/docs/theory/13-build-your-own/byox-search-engine.zh.md b/docs/theory/13-build-your-own/byox-search-engine.zh.md index c2faa1a0..2728f6e2 100644 --- a/docs/theory/13-build-your-own/byox-search-engine.zh.md +++ b/docs/theory/13-build-your-own/byox-search-engine.zh.md @@ -25,6 +25,6 @@ reading_en: ["https://nlp.stanford.edu/IR-book/"] # 对测试工作 - **RAG 测试**:理解检索召回率/精度 → 设计 Jaccard@k / nDCG eval -- **混合检索**(本项目 §24):懂 BM25 + vector 才能融合 fork +- **混合检索**(本项目 ):懂 BM25 + vector 才能融合 fork - **KB 测试**:本项目 docs/theory KB 检索质量评测 - **gbrain 精髓** § 1.3 混合检索 4 路落地的理论基础 diff --git a/docs/theory/13-build-your-own/byox-web-browser.zh.md b/docs/theory/13-build-your-own/byox-web-browser.zh.md index 0f6939c0..483567d3 100644 --- a/docs/theory/13-build-your-own/byox-web-browser.zh.md +++ b/docs/theory/13-build-your-own/byox-web-browser.zh.md @@ -27,4 +27,4 @@ reading_en: ["https://browser.engineering/"] - **视觉回归**:理解 layout/paint → 知道为什么字体抖动 / DPR / 动画导致 SSIM 不稳 - **Web Vitals**(LCP/FID/CLS/INP):懂渲染管线才能优化 - **selector 失效**(M1-10 web-demo bug):懂 DOM 才知道用 `role` / `text` 而非 CSS class -- **§28 ECC e2e-testing skill** 落地的底层 +- **ECC e2e-testing skill** 落地的底层 diff --git a/docs/theory/INDEX.md b/docs/theory/INDEX.md index d9f5e4fc..bdd2f308 100644 --- a/docs/theory/INDEX.md +++ b/docs/theory/INDEX.md @@ -1,7 +1,7 @@ -# Theory KB 索引(主宪章 §23 教学层准则) +# Theory KB 索引 > Test-Agent 部署后的**学习知识库**。用户进入学习模式时,Agent 依此 KB 提供权威解释。 -> 主宪章 §23 铁律:**LLM 不得编造 KB 外的引用**(防幻觉 L1 层)。 +> 规则:**LLM 不得编造 KB 外的引用**(防幻觉 L1 层)。 ## 双语支持 @@ -9,7 +9,7 @@ - `--lang zh-en` 双栏对照(学英文用) - UI i18n 独立(`runtime/web/src/locales/`) -## 12 大类(主宪章 §23) +## 12 大类 | # | 分类 | 目录 | 主题示例 | |---|------|------|---------| @@ -43,7 +43,7 @@ - `confidence`(high/medium/low/**llm-draft-unreviewed**) - `last_reviewed` + `reviewer` -## 反幻觉 3 层(主宪章 §23) +## 反幻觉 3 层 | 层 | 机制 | |----|------| diff --git a/docs/theory/_authority_sources.yaml b/docs/theory/_authority_sources.yaml index f9002ebe..4a7ffe50 100644 --- a/docs/theory/_authority_sources.yaml +++ b/docs/theory/_authority_sources.yaml @@ -1,4 +1,4 @@ -# 权威源白名单(Q4-D 全要)· 主宪章 §23 铁律 +# 权威源白名单(Q4-D 全要) # LLM 在 learn mode 引用必须出自此列表;否则标 confidence=llm-draft-unreviewed. international: diff --git a/docs/theory/_schema.yaml b/docs/theory/_schema.yaml index b61e699a..808e0e60 100644 --- a/docs/theory/_schema.yaml +++ b/docs/theory/_schema.yaml @@ -1,6 +1,6 @@ -# Theory KB card schema (charter §23). +# Theory KB card schema (charter ). # 每张卡片(*.zh.md / *.en.md) frontmatter 必须满足此 schema. -# 主宪章 §23 铁律:LLM 在 learn mode 下只能引用 KB 中存在的 id;不在 KB 的概念输出"该领域未收录"。 +# 规则:LLM 在 learn mode 下只能引用 KB 中存在的 id;不在 KB 的概念输出"该领域未收录"。 required_fields: id: diff --git a/examples/INDEX.md b/examples/INDEX.md index 783ddb1b..423eb12f 100644 --- a/examples/INDEX.md +++ b/examples/INDEX.md @@ -21,7 +21,7 @@ - 想加新 demo(API / 移动 / 桌面 / IoT 等):新建子目录,写 `README.md` 说明启动方式 - demo 不应含真实凭据 / 真实客户数据 → 一律占位 `` 或 `.env.example` -## 私有边界(V1.10 起强制) +## 私有边界 - **禁止**:真实客户 PRD 样本入 `examples/` - **占位**:用 `_template_*` 前缀(如 `_template_login_prd.md`) @@ -30,4 +30,4 @@ ## 相关 - 上一级:[`../README.md`](../README.md) -- 主宪章 §0(开源约束)+ §29(精髓库隔离)+ §34(精髓库防误入,V1.10) +- (开源约束)+ (精髓库隔离)+ (精髓库防误入 diff --git a/examples/_smoke_prd.md b/examples/_smoke_prd.md index 67f61470..634740d5 100644 --- a/examples/_smoke_prd.md +++ b/examples/_smoke_prd.md @@ -1,6 +1,6 @@ # Smoke PRD · 登录模块(fixture) -> Test-Agent 自检 fixture · 主宪章 §33 · 改了请同步更新 `selftest` 期望产出。 +> Test-Agent 自检 fixture > **此文件不代表任何真实项目**,纯为 e2e 流程验证。 ## 1. 背景 diff --git a/runtime/INDEX.md b/runtime/INDEX.md index a4817782..596e08e6 100644 --- a/runtime/INDEX.md +++ b/runtime/INDEX.md @@ -1,6 +1,6 @@ # runtime 索引 -> Test-Agent 运行时层(V1.1.0 新增)。 +> Test-Agent 运行时层。 > 顶层导航见根目录 `00-项目导航.md`;运行时完整章节见 `docs/charter/07-runtime-license.md`;架构设计见 [`ARCHITECTURE.md`](ARCHITECTURE.md)。 ## 定位 diff --git a/runtime/api/deps.py b/runtime/api/deps.py index 463041fe..081dc01e 100644 --- a/runtime/api/deps.py +++ b/runtime/api/deps.py @@ -36,7 +36,7 @@ def decide(self, artifact: TargetArtifact, *, vote_providers: list[str] | None = # ---------- run lifecycle ---------- def submit(self, artifact: TargetArtifact, *, persist: bool = True) -> tuple[str, RoutingDecision]: decision = self.decide(artifact) - # V1.14 主宪章 §40 — 把原始 artifact 文本注入每节点 inputs,让 AgentRunner 拿得到 + # 把原始 artifact 文本注入每节点 inputs,让 AgentRunner 拿得到 full_text = artifact.text or "" if not full_text and artifact.path: p = Path(artifact.path) diff --git a/runtime/api/main.py b/runtime/api/main.py index 0128bc09..8bbdd7e9 100644 --- a/runtime/api/main.py +++ b/runtime/api/main.py @@ -126,7 +126,7 @@ def catalog() -> CatalogResponse: @app.post("/run/text", response_model=RunCreated) def run_text(payload: RunCreateText, bg: BackgroundTasks, mode: str = "exec", lang: str = "zh") -> RunCreated: - # Charter §23 mode+lang per-request + # Charter mode+lang per-request from runtime.tutor.i18n import set_lang from runtime.tutor.verbosity import set_mode diff --git a/runtime/backends/INDEX.md b/runtime/backends/INDEX.md index c6a9ca43..b7707ef2 100644 --- a/runtime/backends/INDEX.md +++ b/runtime/backends/INDEX.md @@ -27,7 +27,7 @@ class BaseExecutionEnv(abc.ABC): async def close(self) -> None ``` -## 经济模型(主宪章 §22 落地) +## 经济模型 - $5 VPS = local/docker 后端跑得起 - Serverless hibernate = modal/daytona 闲置零成本 diff --git a/runtime/backends/__init__.py b/runtime/backends/__init__.py index aad95b2e..03664a97 100644 --- a/runtime/backends/__init__.py +++ b/runtime/backends/__init__.py @@ -1,4 +1,4 @@ -"""7 execution backends · hermes §1.4. +"""7 execution backends · hermes. local / docker / ssh / singularity / modal / daytona / vercel_sandbox. Use `get_backend(name)` to obtain an adapter implementing BaseExecutionEnv. diff --git a/runtime/backends/base.py b/runtime/backends/base.py index c3837f3d..a843f35e 100644 --- a/runtime/backends/base.py +++ b/runtime/backends/base.py @@ -1,4 +1,4 @@ -"""BaseExecutionEnv abstract base (hermes §1.4). +"""BaseExecutionEnv abstract base (hermes ). All 7 backends implement this contract; new backend = new file + @register. """ diff --git a/runtime/backends/daytona.py b/runtime/backends/daytona.py index 9df8f4eb..b6807ee3 100644 --- a/runtime/backends/daytona.py +++ b/runtime/backends/daytona.py @@ -1,4 +1,4 @@ -"""Daytona dev sandbox backend (hermes §1.4 serverless hibernate).""" +"""Daytona dev sandbox backend (hermes serverless hibernate).""" from __future__ import annotations @@ -14,7 +14,7 @@ class DaytonaBackend(BaseExecutionEnv): """Wrap a Daytona workspace via its CLI (`daytona`). Requires the Daytona CLI on PATH + authenticated profile. - Hermes §1.4 经济模型: workspace hibernates when idle, wakes on demand. + Hermes 经济模型: workspace hibernates when idle, wakes on demand. """ def __init__(self, workspace: str, *, profile: str | None = None) -> None: diff --git a/runtime/backends/docker.py b/runtime/backends/docker.py index a9982b2c..285aa753 100644 --- a/runtime/backends/docker.py +++ b/runtime/backends/docker.py @@ -1,4 +1,4 @@ -"""Docker backend (hermes §1.4). Wraps `docker exec` for a named container.""" +"""Docker backend (hermes ). Wraps `docker exec` for a named container.""" from __future__ import annotations diff --git a/runtime/backends/local.py b/runtime/backends/local.py index b02ff6af..3f107200 100644 --- a/runtime/backends/local.py +++ b/runtime/backends/local.py @@ -1,4 +1,4 @@ -"""Local subprocess backend (hermes §1.4).""" +"""Local subprocess backend (hermes ).""" from __future__ import annotations diff --git a/runtime/backends/modal.py b/runtime/backends/modal.py index 674ef960..1506118d 100644 --- a/runtime/backends/modal.py +++ b/runtime/backends/modal.py @@ -1,4 +1,4 @@ -"""Modal serverless backend (hermes §1.4 经济模型 — hibernate when idle). +"""Modal serverless backend (hermes 经济模型 — hibernate when idle). Modal client SDK must be installed and authenticated: pip install modal @@ -20,7 +20,7 @@ class ModalBackend(BaseExecutionEnv): """Wrap a Modal Function/App; commands run inside a hibernated container. - Hermes §1.4 经济模型: ground-state nearly zero cost when idle. + Hermes 经济模型: ground-state nearly zero cost when idle. """ def __init__(self, app_name: str, *, image: str | None = None) -> None: diff --git a/runtime/backends/singularity.py b/runtime/backends/singularity.py index 15ac7338..acfa93cc 100644 --- a/runtime/backends/singularity.py +++ b/runtime/backends/singularity.py @@ -1,4 +1,4 @@ -"""Singularity / Apptainer backend (hermes §1.4; HPC-friendly).""" +"""Singularity / Apptainer backend (hermes ; HPC-friendly).""" from __future__ import annotations diff --git a/runtime/backends/ssh.py b/runtime/backends/ssh.py index e003b7bb..b9de6e93 100644 --- a/runtime/backends/ssh.py +++ b/runtime/backends/ssh.py @@ -1,4 +1,4 @@ -"""SSH backend (hermes §1.4). Uses asyncssh for persistent connection.""" +"""SSH backend (hermes ). Uses asyncssh for persistent connection.""" from __future__ import annotations diff --git a/runtime/backends/vercel_sandbox.py b/runtime/backends/vercel_sandbox.py index 06dafff9..32e4612c 100644 --- a/runtime/backends/vercel_sandbox.py +++ b/runtime/backends/vercel_sandbox.py @@ -1,4 +1,4 @@ -"""Vercel Sandbox backend (hermes §1.4 边缘运行).""" +"""Vercel Sandbox backend (hermes 边缘运行).""" from __future__ import annotations diff --git a/runtime/cli/config.py b/runtime/cli/config.py index 95969a0a..7acaa64d 100644 --- a/runtime/cli/config.py +++ b/runtime/cli/config.py @@ -1,11 +1,11 @@ -"""tagent config — LLM provider configuration (V1.22.0 · Step 2 multi-model onboarding). +"""tagent config — LLM provider configuration (Step 2 multi-model onboarding). 5 sub-commands: list — list 6 built-in + path-B compatible examples show — show current .env config (keys fully redacted) use — path A: switch to built-in provider, write TAGENT_LLM_PROVIDER + vendor key placeholder use-compat — path B: OpenAI-compatible fallback channel (any vendor, plug-and-play) - unset — remove specified key from .env (V1.25.0) + unset — remove specified key from .env env file priority: CWD/.env -> repo-root/.env. Always backup to .env.bak before writing. """ diff --git a/runtime/config/safety.py b/runtime/config/safety.py index ca7d4510..6122371a 100644 --- a/runtime/config/safety.py +++ b/runtime/config/safety.py @@ -1,4 +1,4 @@ -"""Safe-by-default destructive guard · gbrain §1.9 派生. +"""Safe-by-default destructive guard · gbrain 派生. 危险/自动化/生产影响 操作必须 tagent.yml 显式开启. """ @@ -74,7 +74,7 @@ def get_setting(key_path: str, default: Any = None) -> Any: return _resolve(key_path.split("."), default=default) -# Common gates (charter §24) +# Common gates (charter ) def gate_scheduler_tick() -> None: assert_allowed("scheduler.tick", "scheduler.enabled") diff --git a/runtime/docker-compose.app.yml b/runtime/docker-compose.app.yml index 84407234..5d09a1b3 100644 --- a/runtime/docker-compose.app.yml +++ b/runtime/docker-compose.app.yml @@ -2,7 +2,7 @@ # Usage: docker compose -f docker-compose.yml -f docker-compose.app.yml up services: app: - build: . + build:. container_name: tagent-app depends_on: postgres: diff --git a/runtime/essence_watcher/INDEX.md b/runtime/essence_watcher/INDEX.md index 4a79423e..38143aad 100644 --- a/runtime/essence_watcher/INDEX.md +++ b/runtime/essence_watcher/INDEX.md @@ -1,7 +1,7 @@ # essence_watcher 索引 > 自动追踪 upstream reference 引用的所有 upstream repo 更新。 -> 主宪章 §29 教学层加固之 essence 自动汲取。 +> 教学层加固之 essence 自动汲取。 ## 文件清单 @@ -26,7 +26,7 @@ d. 标 confidence: llm-draft-unreviewed 5. 应用 policy.yaml: - skill-related delta → 提议入 skills/ - - rule-related delta → 提议入主宪章 § 待审 + - rule-related delta → 提议入待审 - 其他 → 仅入 upstream 不动 Test-Agent ``` @@ -47,9 +47,9 @@ essence_watcher: # 哪些 delta 自动提议入 Test-Agent auto_propose: - skill_definitions # 新 skill 名字 / 描述 / 元数据 → 提议 skills/ - - charter_rules # 主宪章规则更新 → 提议 主宪章 - - safety_patterns # 防护模式 → 提议 §24 safe-by-default - - test_methodology # 测试方法论新增 → 提议 §17/§21 + - charter_rules # 规则更新 → 提议 + - safety_patterns # 防护模式 → 提议 safe-by-default + - test_methodology # 测试方法论新增 → 提议 / # 仅入精髓库,不动 Test-Agent essence_only: diff --git a/runtime/essence_watcher/__init__.py b/runtime/essence_watcher/__init__.py index 6f4f84cf..69d5e8a3 100644 --- a/runtime/essence_watcher/__init__.py +++ b/runtime/essence_watcher/__init__.py @@ -4,11 +4,11 @@ 1. 解析 upstream INDEX 提取 repo url 2. gh API 查最新 commit hash + 与上次记录 diff 3. 若有新 commit → 拉 README + 关键 files - 4. LLM 萃取 delta(用 aux_client,主宪章 §22) + 4. LLM 萃取 delta 5. 写 upstream update 文件 标 confidence: llm-draft-unreviewed 6. 应用 policy 决定是否提议入 Test-Agent -接入 scheduler(主宪章 §22 §24 safe-by-default): +接入 scheduler: - tagent.yml essence_watcher.enabled: true 才允许跑 - 默认每周一次 """ diff --git a/runtime/essence_watcher/apply_policy.example.yaml b/runtime/essence_watcher/apply_policy.example.yaml index 3a577e89..1b0cc63c 100644 --- a/runtime/essence_watcher/apply_policy.example.yaml +++ b/runtime/essence_watcher/apply_policy.example.yaml @@ -1,4 +1,4 @@ -# essence_watcher 选择性应用 policy(主宪章 §29) +# essence_watcher 选择性应用 policy # # 实际部署:cp 到 /_apply_policy.yaml 启用 # 默认所有 delta 仅入精髓库,不动 Test-Agent. @@ -6,7 +6,7 @@ # 自动提议入 Test-Agent(待审) auto_propose: - skill_definitions # 新 skill name/description/metadata - - charter_rules # 主宪章规则更新建议 + - charter_rules # 规则更新建议 - safety_patterns # 防护模式 - test_methodology # 测试方法论新增 - tool_integrations # 工具集成模式 diff --git a/runtime/essence_watcher/runner.py b/runtime/essence_watcher/runner.py index 95b49659..0bee777b 100644 --- a/runtime/essence_watcher/runner.py +++ b/runtime/essence_watcher/runner.py @@ -1,7 +1,7 @@ """Essence watcher main runner. CLI: python -m runtime.essence_watcher.runner -Cron: 接入 runtime/scheduler 由 cron 触发(主宪章 §22) +Cron: 接入 runtime/scheduler 由 cron 触发 """ from __future__ import annotations @@ -19,7 +19,7 @@ def run() -> dict: """Main entry. Returns summary of changes detected + reports written.""" - # Safe-by-default gate (charter §24) + # Safe-by-default gate (charter ) if not is_allowed("essence_watcher.enabled"): raise SafeByDefaultBlocked(op="essence_watcher.run", key_path="essence_watcher.enabled") diff --git a/runtime/exporters/INDEX.md b/runtime/exporters/INDEX.md index 9682fb01..9f1ac17c 100644 --- a/runtime/exporters/INDEX.md +++ b/runtime/exporters/INDEX.md @@ -1,4 +1,4 @@ -# runtime/exporters 索引(V1.9.0) +# runtime/exporters 索引 > 用例多格式导出 · 用户自选 · 默认 Excel(已有)+ 3 新格式. @@ -38,7 +38,7 @@ class TestCaseTree: `testcase-designer` 专家 / `/testcase-design` skill 产此 IR,再 dispatch 到具体 exporter. -## CLI(V1.9 加) +## CLI ```bash tagent export --format xmind --out workspace/测试用例/login.xmind @@ -56,7 +56,7 @@ tagent export --format all --out-dir workspace/测试用例/ - `plantuml`(文本驱动 mindmap) - `mermaid-mindmap`(Markdown 嵌入,GitHub 渲染) -## 与主宪章关系 +## 与关系 -- §5 多格式 I/O(扩输出端 3 种思维导图格式) -- §27 简洁优先:**只加用户用得到的 3 格式**(P0+P1),P2 留位 +- 多格式 I/O(扩输出端 3 种思维导图格式) +- 简洁优先:**只加用户用得到的 3 格式**(P0+P1),P2 留位 diff --git a/runtime/exporters/__init__.py b/runtime/exporters/__init__.py index 8a29e679..cd977d4b 100644 --- a/runtime/exporters/__init__.py +++ b/runtime/exporters/__init__.py @@ -1,4 +1,4 @@ -"""Multi-format test-case exporters · V1.9.0. +"""Multi-format test-case exporters · By default: Excel 4 Sheet(`utils/excel_generator.py`,已有). New formats: xmind / markmap / opml / freemind / plantuml(按用户选). diff --git a/runtime/gateway/__init__.py b/runtime/gateway/__init__.py index 707ec638..81a66cdb 100644 --- a/runtime/gateway/__init__.py +++ b/runtime/gateway/__init__.py @@ -1,4 +1,4 @@ -"""Multi-platform messaging gateway · hermes §1.5. +"""Multi-platform messaging gateway · hermes. Single gateway process serves N platforms. Cross-platform conversation continuity. """ diff --git a/runtime/gateway/base.py b/runtime/gateway/base.py index 4f77b5d6..388120e8 100644 --- a/runtime/gateway/base.py +++ b/runtime/gateway/base.py @@ -1,4 +1,4 @@ -"""Platform abstraction · hermes §1.5.""" +"""Platform abstraction · hermes .""" from __future__ import annotations diff --git a/runtime/gateway/session.py b/runtime/gateway/session.py index 27756d7d..5a4becf4 100644 --- a/runtime/gateway/session.py +++ b/runtime/gateway/session.py @@ -1,4 +1,4 @@ -"""Cross-platform session continuity (hermes §1.5). +"""Cross-platform session continuity (hermes ). Stores conversation handles keyed by (user, app_session); each platform may attach its native chat_id so a user moving Telegram → Slack still finds the same context. diff --git a/runtime/healthcheck/INDEX.md b/runtime/healthcheck/INDEX.md index 15f3b8a5..6742b9d5 100644 --- a/runtime/healthcheck/INDEX.md +++ b/runtime/healthcheck/INDEX.md @@ -1,6 +1,6 @@ -# runtime/healthcheck/ 索引(V1.10.0) +# runtime/healthcheck/ 索引 -> 4 层自检 · 主宪章 §33 自检铁律 · pre-tag 卡 release。 +> 4 层自检 ## 4 层结构 @@ -42,6 +42,6 @@ ## 相关 -- 主宪章 §33(自检铁律,V1.10.0 加) +- (自检规则 - `.pre-commit-config.yaml` 中 `forbid-private-source` / `forbid-essence-library` / `file-count-check` 协同 - 日志归档:`discussions/selftest__.log` diff --git a/runtime/healthcheck/__init__.py b/runtime/healthcheck/__init__.py index 974a473e..5a243f80 100644 --- a/runtime/healthcheck/__init__.py +++ b/runtime/healthcheck/__init__.py @@ -1,4 +1,4 @@ -"""Healthcheck · L1 frontmatter lint + L3 LLM smoke(V1.10.0 · 主宪章 §33). +"""Healthcheck · L1 frontmatter lint + L3 LLM smoke. L1(本模块,无 LLM):agent/skill frontmatter 必填字段 + 注册表存在性 L2(CI mock):workflow `ci.yml` selftest job diff --git a/runtime/healthcheck/llm_smoke.py b/runtime/healthcheck/llm_smoke.py index 2b05b09f..ebc8ae74 100644 --- a/runtime/healthcheck/llm_smoke.py +++ b/runtime/healthcheck/llm_smoke.py @@ -1,7 +1,7 @@ """L3 lightweight LLM smoke · 单次最小往返,验真-LLM 通 + 报告延迟/token/估算成本. `tagent doctor --llm-smoke` 5 秒验证,远轻于 --probe (16 agent 全跑 ~$0.3-0.8). -用 aux 通道 provider 隔离主 prompt cache (hermes §1.3 借鉴). +用 aux 通道 provider 隔离主 prompt cache (hermes 借鉴). """ from __future__ import annotations diff --git a/runtime/init/INDEX.md b/runtime/init/INDEX.md index 985cbf99..c3c7820c 100644 --- a/runtime/init/INDEX.md +++ b/runtime/init/INDEX.md @@ -1,6 +1,6 @@ -# runtime/init/ 索引(V1.12.0) +# runtime/init/ 索引 -> `tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 主宪章 §1 一键部署。 +> `tagent init` 配置自动组装 · 5 分钟从 0 到可跑 ## 模块 @@ -40,8 +40,8 @@ tagent init --overwrite 不改 wizard / renderer 代码,改 matrix.yaml 即可: - 新 LLM provider → `llm_providers:` 加节 -- 新 BugTracker → `bug_trackers:` 加节(主宪章 §37 6 adapter 之外加) -- 新通知渠道 → `notifiers:` 加节(主宪章 §36 6 渠道之外加) +- 新 BugTracker → `bug_trackers:` 加节 +- 新通知渠道 → `notifiers:` 加节 - 新测试类型 → `test_types:` 加节 + 同步 `agents/` 加平台 expert(如需) ## 矩阵规模 @@ -50,6 +50,6 @@ tagent init --overwrite ## 相关 -- 主宪章 §1(同步铁律)+ §5(多格式 I/O)+ §7(一键部署)+ §36(多端)+ §37(BugTracker) +- (同步规则)+ (多格式 I/O)+ (一键部署)+ (多端)+ (BugTracker) - 模板:[`config/templates/`](../../deploy/config/templates/INDEX.md) - 集成 CLI:`runtime/cli/main.py` `init` 子命令 diff --git a/runtime/init/__init__.py b/runtime/init/__init__.py index f613d5e8..f96a4a5d 100644 --- a/runtime/init/__init__.py +++ b/runtime/init/__init__.py @@ -1,4 +1,4 @@ -"""tagent init · 配置自动组装(V1.12.0). +"""tagent init · 配置自动组装. 读 `config/templates/matrix.yaml` 矩阵 + base.*.tpl 模板,产 `.env` + `tagent.yml` + `STARTUP.md`。 矩阵 8 测试类型 × 6 平台 × 5 LLM × 6 BugTracker × 6 通知 = 8640 组合,wizard 自动列出。 diff --git a/runtime/init/wizard.py b/runtime/init/wizard.py index 1f1ea485..230a07f9 100644 --- a/runtime/init/wizard.py +++ b/runtime/init/wizard.py @@ -87,13 +87,13 @@ def run_wizard(matrix: Matrix | None = None) -> InitAnswers: ) bug_tracker = _pick_one( - "4) BugTracker?(主宪章 §37,6 选,默认禅道)", + "4) BugTracker?", [(k, v.label) for k, v in m.bug_trackers.items()], default_key="zentao", ) notifiers = _pick_many( - "5) 通知渠道?(主宪章 §36,可多选,逗号分隔)", + "5) 通知渠道?", [(k, v.label) for k, v in m.notifiers.items()], default_keys=["wechat"], ) diff --git a/runtime/learning_loop/INDEX.md b/runtime/learning_loop/INDEX.md index cf2a5808..7b6301e7 100644 --- a/runtime/learning_loop/INDEX.md +++ b/runtime/learning_loop/INDEX.md @@ -1,6 +1,6 @@ # learning_loop 索引 -> 主宪章 §14 darwin-skill 是 skill 文本本身的棘轮;本模块是**外层协调器**: +> darwin-skill 是 skill 文本本身的棘轮;本模块是**外层协调器**: > session 检索 + 用户画像 + curator 触发 + skill 自创建提示。 ## 不变量(与 hermes 同源) diff --git a/runtime/learning_loop/__init__.py b/runtime/learning_loop/__init__.py index e96f1419..6c4126e5 100644 --- a/runtime/learning_loop/__init__.py +++ b/runtime/learning_loop/__init__.py @@ -1,5 +1,5 @@ -"""Closed learning loop · hermes §1.1 inspired. +"""Closed learning loop · hermes inspired. -Coordinates with darwin-skill (charter §14) as the inner棘轮; +Coordinates with darwin-skill (charter ) as the inner棘轮; this module is the outer协调器 (curator + session search + user model). """ diff --git a/runtime/learning_loop/curator.py b/runtime/learning_loop/curator.py index b90c96f1..e1c28944 100644 --- a/runtime/learning_loop/curator.py +++ b/runtime/learning_loop/curator.py @@ -1,11 +1,11 @@ -"""Curator coordinator · hermes §1.1. +"""Curator coordinator · hermes. Background skill maintenance: - inactivity-triggered (no daemon) - only touches agent-created skills - never auto-deletes — archives only - pinned skills bypass - - uses aux client (charter §22; runtime/subagent/aux_client) + - uses aux client (charter ; runtime/subagent/aux_client) """ from __future__ import annotations diff --git a/runtime/learning_loop/session_search.py b/runtime/learning_loop/session_search.py index 62226669..670f7904 100644 --- a/runtime/learning_loop/session_search.py +++ b/runtime/learning_loop/session_search.py @@ -1,4 +1,4 @@ -"""FTS5 session search · hermes §1.1. +"""FTS5 session search · hermes. SQLite FTS5 over historical sessions. LLM summary attached at retrieval time. """ diff --git a/runtime/learning_loop/user_model.py b/runtime/learning_loop/user_model.py index 819de763..a46ad51c 100644 --- a/runtime/learning_loop/user_model.py +++ b/runtime/learning_loop/user_model.py @@ -1,4 +1,4 @@ -"""Dialectic user modeling · hermes §1.1 (inspired by Honcho). +"""Dialectic user modeling · hermes (inspired by Honcho). Cross-session profile of user preferences / vocabulary / working style. Stored as JSON facts under `workspace/learning/user_models/{user_id}.json`. diff --git a/runtime/marketplace/INDEX.md b/runtime/marketplace/INDEX.md index d4c8cec6..0ece0f82 100644 --- a/runtime/marketplace/INDEX.md +++ b/runtime/marketplace/INDEX.md @@ -34,9 +34,9 @@ 任一关失败 → 全部回滚 + decisions/ 落原因。 -## 与主宪章融合 +## 规则 -- §22 决策不可逆禁止:卸载只归档(`marketplace/.archive/`) -- §24 safe-by-default:`tagent.yml marketplace.enabled` 默认 false -- §27 Karpathy 原则 3 Surgical:卸载只动安装时建的文件,不动相邻 -- §29 Essence watcher:可关联 marketplace 远程 registry 自动同步 +- 分类卸载:按类型归档(`marketplace/.archive/`) +- safe-by-default:`tagent.yml marketplace.enabled` 默认 false +- Karpathy 原则 3 Surgical:卸载只动安装时建的文件,不动相邻 +- Essence watcher:可关联 marketplace 远程 registry 自动同步 diff --git a/runtime/marketplace/__init__.py b/runtime/marketplace/__init__.py index f83f8fcf..3e0bc442 100644 --- a/runtime/marketplace/__init__.py +++ b/runtime/marketplace/__init__.py @@ -1,4 +1,4 @@ -"""Marketplace · 主宪章 §30. +"""Marketplace 4 lane(skills/agents/mcp/hooks)+ 4 关安全门(sig/scan/sandbox/darwin). Safe-by-default:`tagent.yml marketplace.enabled: true` 才允许跑. diff --git a/runtime/marketplace/catalog.py b/runtime/marketplace/catalog.py index ab87c0c4..2048cb9f 100644 --- a/runtime/marketplace/catalog.py +++ b/runtime/marketplace/catalog.py @@ -1,4 +1,4 @@ -"""Marketplace local + remote catalog · §30. +"""Marketplace local + remote catalog ·. 读 marketplace/registry.json,可选拉远程 mirror,合并查询. """ diff --git a/runtime/marketplace/installer.py b/runtime/marketplace/installer.py index dfafc003..6bfa6456 100644 --- a/runtime/marketplace/installer.py +++ b/runtime/marketplace/installer.py @@ -1,7 +1,7 @@ -"""Install / Uninstall / Archive · §30. +"""Install / Uninstall / Archive ·. 安装流程:catalog 查 → verifier 跑 4 关 → 落地到 marketplace/{lane}/{name}/ -卸载只归档不删(§22) +卸载只归档不删() """ from __future__ import annotations @@ -79,7 +79,7 @@ def install(entry: Entry, content_path: Path) -> dict: def uninstall(name: str) -> dict: - """Uninstall by archiving (§22 不可逆禁止).""" + """Uninstall by archiving.""" if not is_allowed("marketplace.enabled"): raise SafeByDefaultBlocked(op="marketplace.uninstall", key_path="marketplace.enabled") diff --git a/runtime/marketplace/verifier.py b/runtime/marketplace/verifier.py index eada6cb3..edcf75fd 100644 --- a/runtime/marketplace/verifier.py +++ b/runtime/marketplace/verifier.py @@ -1,4 +1,4 @@ -"""4 关安全门 · 主宪章 §30. +"""4 关安全门 关 1: signature_check (sha256 + ed25519 可选) 关 2: injection_scan (prompt 注入扫,复用 scheduler 模块) diff --git a/runtime/mcp/INDEX.md b/runtime/mcp/INDEX.md index b53ed339..ab6dcd74 100644 --- a/runtime/mcp/INDEX.md +++ b/runtime/mcp/INDEX.md @@ -1,6 +1,6 @@ # mcp 索引 -> 主宪章 §16 预留 6 件套,V1.2.0(M2)实现。 +> 预留 6 件套 > 当前 `config/.mcp.json` 仅启用 filesystem;本目录服务通过 `config/.mcp.json` 启用。 ## 模块清单 @@ -40,8 +40,8 @@ python -m runtime.mcp.test_orchestrator.server --http 8801 # http mode `base.py` 提供: - `make_server(name, version)`:统一 Server 实例化 -- `tool_decision_logged(name)`:工具装饰器,自动落 `decisions/{date}_mcp_{tool}.json`(宪章 §18-12) -- `with_run_id(handler)`:run_id 全链路注入(§21 横切可复现性) +- `tool_decision_logged(name)`:工具装饰器,自动落 `decisions/{date}_mcp_{tool}.json`(宪章 ) +- `with_run_id(handler)`:run_id 全链路注入(横切可复现性) ## MCP 客户端 (P2 #12) diff --git a/runtime/mcp/__init__.py b/runtime/mcp/__init__.py index 727f62e7..76a7c0c1 100644 --- a/runtime/mcp/__init__.py +++ b/runtime/mcp/__init__.py @@ -1,6 +1,6 @@ """MCP servers for Test-Agent. -6 servers per project charter §16: +6 servers per project charter : - test-orchestrator: 主调度,包装 runtime/orchestrator - protocol-adapter: HTTP/gRPC/WS/MQTT/Kafka 协议层 - evidence-vault: 证据/录屏/日志(MinIO + Postgres) @@ -9,7 +9,7 @@ - compliance-checker: 行业合规规则库(SOC2/PCI/HIPAA/IEC 62304 等) All servers respect: - - 主宪章 §9: 已有不动 → 仅包装,不修改 16 专家/32 skill/67 脚本 - - 主宪章 §21 横切: 失败可复现(seed+snapshot+录屏),不入回归库否 - - 主宪章 §18-12: 决策可追溯 → 工具调用落 decisions/ + - 已有不动 → 仅包装,不修改 16 专家/32 skill/67 脚本 + - 横切: 失败可复现(seed+snapshot+录屏),不入回归库否 + - 12: 决策可追溯 → 工具调用落 decisions/ """ diff --git a/runtime/mcp/base.py b/runtime/mcp/base.py index 28c22e82..de34a447 100644 --- a/runtime/mcp/base.py +++ b/runtime/mcp/base.py @@ -1,9 +1,9 @@ """Shared MCP server scaffolding. Honors charter: - - §18-12 决策可追溯:工具调用自动落 `decisions/{date}_mcp_{tool}_{run_id}.json` - - §21 横切可复现性:run_id 注入 + seed 记录 + 失败 snapshot - - §1 同步铁律:服务列表必须与 `config/.mcp.json` 一致 + - 决策可追溯:工具调用自动落 `decisions/{date}_mcp_{tool}_{run_id}.json` + - 横切可复现性:run_id 注入 + seed 记录 + 失败 snapshot + - 同步规则:服务列表必须与 `config/.mcp.json` 一致 """ from __future__ import annotations @@ -46,7 +46,7 @@ def _decisions_dir() -> Path: def log_decision(tool: str, payload: dict, run_id: str | None = None) -> Path: - """Persist a decision record per charter §18-12. + """Persist a decision record per charter. Returns the written file path. """ @@ -68,7 +68,7 @@ def log_decision(tool: str, payload: dict, run_id: str | None = None) -> Path: def tool_decision_logged(tool_name: str) -> Callable: """Wrap an async MCP tool handler with decision logging. - Charter §18-12 决策可追溯: every call (success or failure) writes a record. + Charter 决策可追溯: every call (success or failure) writes a record. Logging failures must not mask the original handler exception/result. """ diff --git a/runtime/mcp/compliance_checker/__init__.py b/runtime/mcp/compliance_checker/__init__.py index 7d0c7940..0e951419 100644 --- a/runtime/mcp/compliance_checker/__init__.py +++ b/runtime/mcp/compliance_checker/__init__.py @@ -1,5 +1,5 @@ """mcp-compliance-checker: 行业合规规则库 (空载 + 规则插槽). -V1.2.0 仅起步骨架,真规则库由领域专家供。 -L4 级被测项触发 (charter §21 横切准则: 合规审计周期). +仅起步骨架,真规则库由领域专家供。 +L4 级被测项触发 (charter 横切准则: 合规审计周期). """ diff --git a/runtime/mcp/compliance_checker/server.py b/runtime/mcp/compliance_checker/server.py index e7722f33..0b4add66 100644 --- a/runtime/mcp/compliance_checker/server.py +++ b/runtime/mcp/compliance_checker/server.py @@ -33,7 +33,7 @@ async def tool_list_profiles() -> dict: def _load_profile(name: str) -> dict | None: - # Charter §4 security: 防 path traversal — 仅允许字母数字+连字符,且 resolve 后必须落在 profiles_dir 下 + # Charter security: 防 path traversal — 仅允许字母数字+连字符,且 resolve 后必须落在 profiles_dir 下 import re if not re.fullmatch(r"[A-Za-z0-9_\-\.]+", name) or ".." in name: @@ -135,7 +135,7 @@ def build_server(): ), Tool( name="check_compliance", - description="Match a profile's evidence_required against a run's actual evidence. L4 被测项必触发(charter §21).", + description="Match a profile's evidence_required against a run's actual evidence. L4 被测项必触发(charter ).", inputSchema={ "type": "object", "properties": { diff --git a/runtime/mcp/defect_tracker/__init__.py b/runtime/mcp/defect_tracker/__init__.py index ede82576..87127626 100644 --- a/runtime/mcp/defect_tracker/__init__.py +++ b/runtime/mcp/defect_tracker/__init__.py @@ -1,4 +1,4 @@ """mcp-defect-tracker: 工单桥 (5 adapter zentao/jira/github/linear/webhook). -Per charter §12: 接入新 adapter 必须实现 BugTrackerBase 契约。 +Per charter : 接入新 adapter 必须实现 BugTrackerBase 契约。 """ diff --git a/runtime/mcp/defect_tracker/base.py b/runtime/mcp/defect_tracker/base.py index 4a84a93c..555d2343 100644 --- a/runtime/mcp/defect_tracker/base.py +++ b/runtime/mcp/defect_tracker/base.py @@ -1,6 +1,6 @@ """BugTrackerBase contract (mirrors utils/bug_manager.py 5 adapter). -Charter §12: 实现该 5 方法才能注册。 +Charter : 实现该 5 方法才能注册。 severity 映射统一权威 (`utils/bug_severity_map.py`): 1=P0 / 2=P1 / 3=P2 / 4=P3. """ diff --git a/runtime/mcp/defect_tracker/server.py b/runtime/mcp/defect_tracker/server.py index 38330da6..ec1797a8 100644 --- a/runtime/mcp/defect_tracker/server.py +++ b/runtime/mcp/defect_tracker/server.py @@ -3,7 +3,7 @@ Tools wrap the 5 BugTrackerBase methods; delegate to existing `utils/bug_manager.py` if available, else fall back to flywheel `defects` table. -Per charter §12 + §18-4: severity 1=P0 / 2=P1 / 3=P2 / 4=P3 一致。 +Per charter + : severity 1=P0 / 2=P1 / 3=P2 / 4=P3 一致。 """ from __future__ import annotations @@ -47,7 +47,7 @@ def _flywheel_get(bug_id: str) -> dict | None: d = s.get(Defect, bid) if d is None: return None - # Charter §18-4: 1=P0 / 2=P1 / 3=P2 / 4=P3 (one-based; enum value "P0".."P3" is zero-based string) + # Charter : 1=P0 / 2=P1 / 3=P2 / 4=P3 (one-based; enum value "P0".."P3" is zero-based string) sev_int = int(d.severity.value[1]) + 1 if d.severity.value.startswith("P") else 0 return { "bug_id": str(d.id), @@ -164,7 +164,7 @@ def build_server(): TOOLS = [ Tool( name="create_bug", - description="Create a defect. severity 1=P0 / 2=P1 / 3=P2 / 4=P3 (charter §18-4).", + description="Create a defect. severity 1=P0 / 2=P1 / 3=P2 / 4=P3 (charter ).", inputSchema={ "type": "object", "properties": { diff --git a/runtime/mcp/evidence_vault/server.py b/runtime/mcp/evidence_vault/server.py index 8cbba050..e048363c 100644 --- a/runtime/mcp/evidence_vault/server.py +++ b/runtime/mcp/evidence_vault/server.py @@ -76,7 +76,7 @@ def _validate_evidence_path(path_str: str) -> Path: def _persist_evidence(run_id: str, kind: str, data: bytes, key: str) -> dict: """DB insert first, then MinIO upload; if upload fails, rollback DB row. - Charter §18 闭环约定: 防止 MinIO 与 Postgres 不一致 (orphaned file or dangling row). + Charter 闭环约定: 防止 MinIO 与 Postgres 不一致 (orphaned file or dangling row). """ import hashlib diff --git a/runtime/mcp/protocol_adapter/adapters.py b/runtime/mcp/protocol_adapter/adapters.py index ba7690a9..09885d01 100644 --- a/runtime/mcp/protocol_adapter/adapters.py +++ b/runtime/mcp/protocol_adapter/adapters.py @@ -146,7 +146,7 @@ async def close(self) -> None: class MQTTAdapter(ProtocolAdapter): """MQTT v3.1.1 via paho-mqtt sync client wrapped in asyncio threadpool. - Charter §21 横切准则: paho-mqtt's on_message callback runs on the network + Charter 横切准则: paho-mqtt's on_message callback runs on the network thread. We guard the shared buffer with a lock so async recv() and the callback don't race. """ diff --git a/runtime/mcp/protocol_adapter/base.py b/runtime/mcp/protocol_adapter/base.py index bcb375cd..b935cca9 100644 --- a/runtime/mcp/protocol_adapter/base.py +++ b/runtime/mcp/protocol_adapter/base.py @@ -1,7 +1,7 @@ """ProtocolAdapter abstract base. All concrete adapters must implement this contract to register. -Honors charter §21: +Honors charter : - 协议调用即测,不裸跑 - 失败必带 seed+snapshot(可复现性横切准则) """ diff --git a/runtime/mcp/test_orchestrator/server.py b/runtime/mcp/test_orchestrator/server.py index 34cd68e4..c6eb52e3 100644 --- a/runtime/mcp/test_orchestrator/server.py +++ b/runtime/mcp/test_orchestrator/server.py @@ -23,7 +23,7 @@ _kernel: Kernel | None = None # Bounded LRU cache for in-memory run results. -# Charter §21 横切预算: 防 server 长时跑无限增长. +# Charter 横切预算: 防 server 长时跑无限增长. # Production should rely on Postgres `runs` table; this is the fast path. _MAX_RUN_RESULTS = 1024 _run_results: OrderedDict[str, dict] = None # type: ignore[assignment] diff --git a/runtime/orchestrator/adapters/experts.py b/runtime/orchestrator/adapters/experts.py index b2d149cb..2617c02c 100644 --- a/runtime/orchestrator/adapters/experts.py +++ b/runtime/orchestrator/adapters/experts.py @@ -7,10 +7,10 @@ declarative description and execute its CANONICAL SCRIPT mapping (below). - A handful of experts have a strong default script. The rest fall back to recording the expert step + producing an empty result placeholder which the - report-generator then summarises (matching V1.0.0 manual workflow). + report-generator then summarises (matching manual workflow). - Scripts with required CLI args(e.g. generate_report.py --data)get default inputs auto-injected via SCRIPT_DEFAULT_ARGS;referenced fixtures auto-materialized - by _ensure_fixture (V1.11 修 V1.10 n7 selftest bug)。 + by _ensure_fixture 。 """ from __future__ import annotations @@ -41,9 +41,9 @@ "visual-tester": None, "system-tester": None, "ai-tester": "ai_validator.py", - "pentest-tester": None, # V1.19 production (V1.x rollout 收尾) - "automotive-tester": None, # V1.20 production (V1.x rollout 收尾) - # V1.34 bridge: standalone scripts wired into orchestrator + "pentest-tester": None, # production (rollout 收尾) + "automotive-tester": None, # production (rollout 收尾) + # bridge: standalone scripts wired into orchestrator "mutation-test": "mutation_runner.py", "chaos-test": "chaos_helper.py", "fuzz-test": "fuzzer.py", @@ -51,15 +51,15 @@ "suite-minimize": "suite_minimizer.py", } -# V1.14 防 mock 单源 (ROADMAP V1.15 Day 0 承诺): +# 防 mock 单源 (ROADMAP Day 0 承诺): # 实装状态读 registry catalog (agents/skills *.md frontmatter # EXPERT_IMPL_STATUS / SKILL_IMPL_STATUS),避免 hardcoded dict 与 .md 双源漂移。 # # 合法值 (registry._VALID_IMPL_STATUS 同步): # - production: 真 LLM-driven runner (orchestrator/agents/*.py) 已实装 # - script: 真 script-backed (utils/*.py) 已实装 -# - rollout: V1.x rollout 待实装 → execute_node 拒绝路由,不输出 mock -# - vision: V2.x 方法论参考 → 同 rollout 处理 +# - rollout: rollout 待实装 → execute_node 拒绝路由,不输出 mock +# - vision: 方法论参考 → 同 rollout 处理 # - unknown: frontmatter 缺失/非法值 → 同 rollout 处理 (fail closed) @@ -86,7 +86,7 @@ def _get_impl_status(name: str, kind: str) -> str: "visual-test": None, "system-test": None, "ai-test": "ai_validator.py", - # V1.34 bridge: standalone scripts wired into orchestrator + # bridge: standalone scripts wired into orchestrator "mutation-testing": "mutation_runner.py", "chaos-engineering": "chaos_helper.py", "api-fuzzing": "fuzzer.py", @@ -221,7 +221,7 @@ def reset_upstream_cache() -> None: def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: int = 1800) -> StepOutcome: inputs = inputs or {} - # V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): 拒绝路由未实装 expert/skill,不输出 mock 数据 + # 防 mock (ROADMAP Day 0 承诺): 拒绝路由未实装 expert/skill,不输出 mock 数据 # 单源 = agents/skills .md frontmatter (registry catalog) if kind in ("expert", "skill"): status = _get_impl_status(name, kind) @@ -233,7 +233,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i returncode=2, # 明确非 0,标记 "未实装" 而非 no-op 兜底 stdout="", stderr=( - f"[V1.x {status}] {kind} '{name}' 未实装 (ROADMAP.md);" + f"[{status}] {kind} '{name}' 未实装 (ROADMAP.md);" f" router/test-lead 应跳过此 {kind},不输出 mock 数据" ), duration_ms=0, @@ -252,7 +252,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i duration_ms=0, ) - # V1.14 真 agent runner 优先(主宪章 §40,5 核心 expert 落地) + # 真 agent runner 优先 if kind == "expert": try: from runtime.config.settings import get_settings @@ -297,7 +297,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i except Exception as e: # noqa: BLE001 logger.warning("agent runner {} unavailable, fallback to script map: {}", name, e) - # V1.21 真 skill runner 优先 (ROADMAP skill rollout 起点) + # 真 skill runner 优先 (ROADMAP skill rollout 起点) # 与 expert runner 接口同, 仅 registry 独立 SKILL_RUNNERS if kind == "skill": try: @@ -343,7 +343,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i except Exception as e: # noqa: BLE001 logger.warning("skill runner {} unavailable, fallback to script map: {}", name, e) - # Fallback: SCRIPT_MAP(主宪章 §9 已有实现保留) + # Fallback: SCRIPT_MAP script = _resolve_script(name, kind) if script is None: return StepOutcome( @@ -388,7 +388,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i for k, v in defaults.items(): if k not in inputs: # only materialize fixture for auto-injected defaults _ensure_fixture(str(v)) - # V1.14:`artifact_text` 给 AgentRunner 用,不当 CLI arg(多行文本会炸 argparse) + # :`artifact_text` 给 AgentRunner 用,不当 CLI arg(多行文本会炸 argparse) _CLI_EXCLUDE = {"artifact_text", "lang", "mode"} args = [f"--{k}={v}" for k, v in merged.items() if k not in _CLI_EXCLUDE] res: ScriptResult = run_script(script, args=args, timeout=timeout) diff --git a/runtime/orchestrator/adapters/scripts.py b/runtime/orchestrator/adapters/scripts.py index a5fb5e32..60798342 100644 --- a/runtime/orchestrator/adapters/scripts.py +++ b/runtime/orchestrator/adapters/scripts.py @@ -43,7 +43,7 @@ def run_script(script_filename: str, args: list[str] | None = None, *, timeout: scripts_dir: Path = s.resolve(s.scripts_dir) script_path = scripts_dir / script_filename if not script_path.is_file(): - # utils-reorg (V1.x): scripts moved into subdirs (utils/reporting/, utils/data/, ...). + # utils-reorg : scripts moved into subdirs (utils/reporting/, utils/data/, ...). # Recursively look up by basename when not at top-level. matches = [p for p in scripts_dir.rglob(script_filename) if p.is_file()] if len(matches) == 1: @@ -84,7 +84,7 @@ def _do_run() -> subprocess.CompletedProcess[str]: def list_available_scripts() -> list[str]: s = get_settings() scripts_dir: Path = s.resolve(s.scripts_dir) - # utils-reorg (V1.x): scripts in subdirs (utils/reporting/, utils/data/, ...). + # utils-reorg : scripts in subdirs (utils/reporting/, utils/data/, ...). # Return basenames so callers can run_script("excel_generator.py") regardless of subdir. seen: set[str] = set() for p in scripts_dir.rglob("*.py"): diff --git a/runtime/orchestrator/agents/INDEX.md b/runtime/orchestrator/agents/INDEX.md index 6c2caf49..e97d4086 100644 --- a/runtime/orchestrator/agents/INDEX.md +++ b/runtime/orchestrator/agents/INDEX.md @@ -1,8 +1,8 @@ -# runtime/orchestrator/agents/ 索引(V1.36.0) +# runtime/orchestrator/agents/ 索引 -> 真 LLM-driven expert runner · 16 核心 expert 全落地 · 主宪章 §40 真 agent 落地 canon。 +> 真 LLM-driven expert runner · 16 核心 expert 全落地 -## 已实现 16 runner(V1.32) +## 已实现 16 runner | Runner | 角色源 | 上游 | 产物 | |--------|--------|------|------| @@ -12,11 +12,11 @@ | `bug-manager` | agents/08-Bug管理.md | test-executor | `bug_drafts.json`(BugTracker-ready) | | `test-lead` | agents/01-测试主管.md | 全链路 | `final_verdict_*.json`(上线决策) | -## 0 未实现(V1.32 rollout 完成) +## 0 未实现 - test-lead 自身已实现(用全链路上游),其他 11 个:env-manager / data-preparer(scripted)/ testcase-designer(scripted)/ report-generator(scripted)/ mobile-tester / desktop-tester(scripted)/ visual-tester / system-tester / ai-tester(scripted)/ 渗透 / 车载 - **5 个有 script 真跑**(testcase-designer / data-preparer / report-generator / desktop-tester / ai-tester)→ SCRIPT_MAP 兜 -- **6 个 no-op**(env-manager / mobile-tester / visual-tester / system-tester / 渗透 / 车载)→ 待 V1.15+ +- **6 个 no-op**(env-manager / mobile-tester / visual-tester / system-tester / 渗透 / 车载)→ 待 + ## 协议 @@ -40,12 +40,12 @@ 3. 加 import 到 `__init__.py`(触发 @register) 4. 跑 `tagent selftest --e2e` 验编排 -## 主宪章 +## -- §33 自检铁律(L1+L2+L3+L4) -- §40 真 agent 落地 canon(V1.32 加) -- §9 已有实现不动 — 5 个 SCRIPT_MAP 兜底 expert 不动 -- §10 第 5 铭文:test-lead 决策 `requires_human_signoff: true` +- 自检规则(L1+L2+L3+L4) +- 真 agent 落地 canon +- 已有实现不动 — 5 个 SCRIPT_MAP 兜底 expert 不动 +- 第 5 铭文:test-lead 决策 `requires_human_signoff: true` ## 相关 diff --git a/runtime/orchestrator/agents/__init__.py b/runtime/orchestrator/agents/__init__.py index b1927b2a..3bbfe7cc 100644 --- a/runtime/orchestrator/agents/__init__.py +++ b/runtime/orchestrator/agents/__init__.py @@ -1,24 +1,24 @@ -"""Real LLM-driven agent runners(V1.36.0 · 主宪章 §33 + §40). +"""Real LLM-driven agent runners. 每个 runner 把 agents/*.md 的角色描述变成可执行的 LLM 调用: - 读上游产物 → 拼 prompt → 调 LLM → 解析输出 → 落产物 → 给下游 -11 核心 runner(V1.x rollout 收尾,所有 LLM-driven expert 已实装): -- requirements-analyst (V1.14) -- automation-engineer (V1.14) -- test-executor (V1.14) -- bug-manager (V1.14) -- test-lead (V1.14) -- env-manager (V1.15.0, ROADMAP rollout #1 落地 — minimum viable) -- mobile-tester (V1.16.0, ROADMAP rollout #2 落地 — minimum viable) -- visual-tester (V1.17.0, ROADMAP rollout #3 落地 — minimum viable) -- system-tester (V1.18.0, ROADMAP rollout #4 落地 — minimum viable) -- pentest-tester (V1.19.0, ROADMAP rollout #5 落地 — minimum viable; +11 核心 runner(rollout 收尾,所有 LLM-driven expert 已实装): +- requirements-analyst +- automation-engineer +- test-executor +- bug-manager +- test-lead +- env-manager +- mobile-tester +- visual-tester +- system-tester +- pentest-tester (ROADMAP rollout #5 落地 — minimum viable; 仅输出测试计划文本, 不调外部攻击工具;真执行守护已在 utils 层 env gate) -- automotive-tester (V1.20.0, ROADMAP rollout #6 落地 — minimum viable; - V1.x rollout 收尾;ASIL 评估 + HIL 测试 + ADAS 场景 + OTA + 合规矩阵) +- automotive-tester (ROADMAP rollout #6 落地 — minimum viable; + rollout 收尾;ASIL 评估 + HIL 测试 + ADAS 场景 + OTA + 合规矩阵) -剩余 5 个 expert 走 SCRIPT_MAP script-backed (主宪章 §9 已有实现: +剩余 5 个 expert 走 SCRIPT_MAP script-backed (已有实现: testcase-designer / data-preparer / report-generator / desktop-tester / ai-tester)。 """ diff --git a/runtime/orchestrator/agents/automotive_tester.py b/runtime/orchestrator/agents/automotive_tester.py index c6a6190a..ddbce565 100644 --- a/runtime/orchestrator/agents/automotive_tester.py +++ b/runtime/orchestrator/agents/automotive_tester.py @@ -1,11 +1,11 @@ """automotive-tester · LLM 读 PRD + CAN-bus/ISO-26262 上下文 → ASIL 评估 + HIL 测试用例 + ADAS 场景 + OTA 升级测试 + 协议特定配置. -V1.20.0 minimum viable (ROADMAP rollout #6 落地, V1.x rollout 收尾): +minimum viable (ROADMAP rollout #6 落地, rollout 收尾): - 仅生成 ASIL 评估 + test_cases + bus_test_plan + adas_scenarios + ota_plan + compliance_matrix 结构化 JSON - 不实装 16-车载测试.md 全部职责 (Vector CANoe 真跑 / HIL 台架真接 / VTD/CarMaker/CARLA - 仿真真跑 / OTA A/B 分区真切 / SocketCAN 真嗅探 等留 V1.x 深化) + 仿真真跑 / OTA A/B 分区真切 / SocketCAN 真嗅探 等留 深化) - 覆盖 ECU + ADAS 域控 + IVI + V2X 4 大子系统 - 覆盖 CAN / CAN-FD / LIN / FlexRay / Automotive Ethernet / DoIP/UDS / SOME-IP / V2X 8 协议 diff --git a/runtime/orchestrator/agents/base.py b/runtime/orchestrator/agents/base.py index 7d2a1d47..86b09772 100644 --- a/runtime/orchestrator/agents/base.py +++ b/runtime/orchestrator/agents/base.py @@ -80,7 +80,7 @@ def summary(self, output: dict[str, Any]) -> str: # noqa: ARG002 def run(self, ctx: RunnerContext) -> RunnerResult: """ 执行 LLM-driven agent。ok/degraded 语义: - - stub/mock 模式: ok=True + degraded=True (mock 兜底,主宪章 §33 selftest 允许) + - stub/mock 模式: ok=True + degraded=True (mock 兜底, selftest 允许) - 真 LLM 成功 + JSON 解析 OK: ok=True + degraded=False (真输出) - 真 LLM 成功但 JSON 解析错: ok=False + degraded=True (LLM 回了但不合规) - exec 模式 LLM 失败 fallback: ok=False + degraded=True (不再假绿) @@ -94,7 +94,7 @@ def run(self, ctx: RunnerContext) -> RunnerResult: if ctx.settings_provider == "stub" or ctx.mode == "mock": # stub/mock 模式: 输出 mock,标 degraded output = self.mock_output(ctx) - raw = "[stub] mock output(主宪章 §33 selftest 兜底)" + raw = "[stub] mock output (selftest 兜底)" ok = True degraded = True else: @@ -176,7 +176,7 @@ def get_runner(name: str) -> AgentRunner | None: return cls() if cls else None -# Skill runner registry (V1.21.0 — skill LLM-driven rollout 基础设施). +# Skill runner registry. # SkillRunner 接口与 AgentRunner 100% 一致 (system_prompt / user_prompt / # mock_output / summary / output_file / run), 仅 registry 独立, 避免 expert/skill # 同名冲突,且让 catalog / router / orchestrator 按 kind 路由清晰。 diff --git a/runtime/orchestrator/agents/bug_manager.py b/runtime/orchestrator/agents/bug_manager.py index 83e6c2f7..0acfe4f0 100644 --- a/runtime/orchestrator/agents/bug_manager.py +++ b/runtime/orchestrator/agents/bug_manager.py @@ -1,4 +1,4 @@ -"""bug-manager · 分类失败 → 产 BugTracker-ready Bug 列表(主宪章 §37).""" +"""bug-manager · 分类失败 → 产 BugTracker-ready Bug 列表.""" from __future__ import annotations @@ -13,7 +13,7 @@ class BugManager(AgentRunner): def system_prompt(self) -> str: return ( "你是 Test-Agent 项目内 bug-manager 专家(agents/08-Bug管理.md)。\n" - "职责:把 test-executor 的失败列表转 BugTracker-ready Bug(默认 zentao,可换 Jira/GitHub Issues 等,主宪章 §37)。\n" + "职责:把 test-executor 的失败列表转 BugTracker-ready Bug。\n" "原则:\n" "1) severity 权威映射:1=P0(阻塞)/ 2=P1(高)/ 3=P2(中)/ 4=P3(低)\n" "2) STAR 格式:Situation / Task / Action / Result\n" @@ -69,7 +69,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: "severity": 1, # P0 阻塞 — 不应作为发版决策依据 "pri": 1, "steps": [ - "检查上游 expert 实装状态 (ROADMAP.md V1.15-V1.20 rollout)", + "检查上游 expert 实装状态 (ROADMAP.md rollout)", "确认 LLM provider 不在 stub mode (settings.llm_provider)", "若 expert 处于 rollout,等待对应版本完成实装", ], diff --git a/runtime/orchestrator/agents/env_manager.py b/runtime/orchestrator/agents/env_manager.py index d8855b4c..9d970578 100644 --- a/runtime/orchestrator/agents/env_manager.py +++ b/runtime/orchestrator/agents/env_manager.py @@ -1,8 +1,8 @@ """env-manager · LLM 读 PRD + 上游需求摘要 → 环境检查清单 + 准备步骤. -V1.15.0 minimum viable (ROADMAP rollout #1 落地): +minimum viable (ROADMAP rollout #1 落地): - 仅生成 env checklist + prep steps 结构化 markdown/JSON -- 不实装 04-环境管理.md 全 5 节 (Docker / 异常退避 / 清理等留 V1.x 深化) +- 不实装 04-环境管理.md 全 5 节 (Docker / 异常退避 / 清理等留 深化) - 输出消费者: data-preparer / automation-engineer / test-executor """ diff --git a/runtime/orchestrator/agents/mobile_tester.py b/runtime/orchestrator/agents/mobile_tester.py index d257843b..20ec7f18 100644 --- a/runtime/orchestrator/agents/mobile_tester.py +++ b/runtime/orchestrator/agents/mobile_tester.py @@ -1,9 +1,9 @@ """mobile-tester · LLM 读 PRD + 上游摘要 → 移动测试用例 + ADB/Xcode 命令清单. -V1.16.0 minimum viable (ROADMAP rollout #2 落地): +minimum viable (ROADMAP rollout #2 落地): - 仅生成 mobile test cases + device commands + test_environment 结构化 JSON - 不实装 10-移动测试.md 全部职责 (Appium driver 真跑 / 云真机集成 / 弱网 / 权限弹窗 - 等留 V1.x 深化) + 等留 深化) - 覆盖 Android / iOS 原生 + 微信/支付宝/抖音 小程序 """ diff --git a/runtime/orchestrator/agents/pentest_tester.py b/runtime/orchestrator/agents/pentest_tester.py index f4f3ac1c..5d74258a 100644 --- a/runtime/orchestrator/agents/pentest_tester.py +++ b/runtime/orchestrator/agents/pentest_tester.py @@ -1,12 +1,12 @@ """pentest-tester · LLM 读 PRD + 安全上下文 → 渗透测试计划 + 工具调用清单. -V1.19.0 minimum viable (ROADMAP rollout #5 落地): +minimum viable (ROADMAP rollout #5 落地): - 仅生成 pentest plan (target_scope + recon/vuln/exploit phases + reporting 结构) 结构化 JSON - 不实装 15-渗透测试.md 全部职责 (sqlmap / Metasploit / Hydra 真跑 / Static-Dynamic - Correlation 实装 / PoC 沙箱执行 等留 V1.x 深化 — 真执行守护已在 utils 层 + Correlation 实装 / PoC 沙箱执行 等留 深化 — 真执行守护已在 utils 层 `api_security_scanner.py` / `ai_adversarial.py` 用 TAGENT_PENTEST_AUTHORIZED env gate) - 覆盖 5 攻击域 (Injection / XSS / SSRF / Auth / Authz) + API + 移动 + AI 应用安全 -- 主宪章 §24 safe-by-default + Shannon 哲学 (仅 working PoC 才入报告) +- safe-by-default + Shannon 哲学 (仅 working PoC 才入报告) 授权边界:本 runner 只输出**测试计划文本**,不调外部攻击工具,不执行不可逆操作。 法律责任 (CFAA / 网络安全法 / NIS2) 在操作者侧,见 SECURITY.md L84 "武器化代码使用边界"。 diff --git a/runtime/orchestrator/agents/system_tester.py b/runtime/orchestrator/agents/system_tester.py index 800b3caf..5474a238 100644 --- a/runtime/orchestrator/agents/system_tester.py +++ b/runtime/orchestrator/agents/system_tester.py @@ -1,10 +1,10 @@ """system-tester · LLM 读 PRD + IoT/串口/MQTT/MQ/Tracing 上下文 → 系统集成测试用例 + 设备命令清单 + 协议特定配置. -V1.18.0 minimum viable (ROADMAP rollout #4 落地): +minimum viable (ROADMAP rollout #4 落地): - 仅生成 test_cases + device_commands + protocol_specific 结构化 JSON - 不实装 13-系统集成测试.md 全部职责 (paramiko/pyserial/paho-mqtt 真跑 / FFmpeg - 解码 / Jaeger 查询执行 / Kafka consumer 真起 等留 V1.x 深化) + 解码 / Jaeger 查询执行 / Kafka consumer 真起 等留 深化) - 覆盖 IoT (SSH/串口/MQTT/Modbus) + 音视频 (FFmpeg) + 链路追踪 (Jaeger/OpenTelemetry) + 消息队列 (Kafka/RabbitMQ) + 跨服务集成 """ diff --git a/runtime/orchestrator/agents/test_executor.py b/runtime/orchestrator/agents/test_executor.py index 81d870f5..307a676c 100644 --- a/runtime/orchestrator/agents/test_executor.py +++ b/runtime/orchestrator/agents/test_executor.py @@ -17,7 +17,7 @@ def system_prompt(self) -> str: "原则:\n" "1) 四阶段执行:冒烟(P0) → 回归(P0+P1) → 全量 → 性能\n" "2) 失败 4 类:product_bug / test_code_bug / env_issue / flaky\n" - "3) Flaky 检测:连续 3 跑 2 过即标 flaky 隔离(主宪章 §21)\n" + "3) Flaky 检测:连续 3 跑 2 过即标 flaky 隔离\n" "4) 不真跑 sandbox,产**执行计划 JSON**(由 utils 真执行)\n" "输出严格 JSON,不 markdown 包裹。" ) diff --git a/runtime/orchestrator/agents/test_lead.py b/runtime/orchestrator/agents/test_lead.py index 190796b8..f99af013 100644 --- a/runtime/orchestrator/agents/test_lead.py +++ b/runtime/orchestrator/agents/test_lead.py @@ -17,8 +17,8 @@ def system_prompt(self) -> str: "原则:\n" "1) 看 requirements / scripts / execution_plan / bug_drafts 完整链路\n" "2) 决策标准:P0 Bug=0 + 回归通过率 ≥ 90% + 性能门禁过 = go;否则 conditional / no-go\n" - "3) 业务语言(主宪章 §10 五铭文 #5):管理层 / 开发都能秒懂\n" - "4) 标 skin-in-the-game:本决策**人类签字**,Agent 仅给建议(主宪章 §10 第 5 铭文)\n" + "3) 业务语言:管理层 / 开发都能秒懂\n" + "4) 标 skin-in-the-game:本决策**人类签字**,Agent 仅给建议\n" "5) 列出已知遗留 + 兜底方案\n" "输出严格 JSON,不 markdown 包裹。" ) @@ -34,14 +34,14 @@ def user_prompt(self, ctx: RunnerContext) -> str: if degraded_upstream: degraded_block = ( f"\n## ⚠ 上游 degraded 警示 (强制约束)\n" - f"以下上游 expert 输出降级 (mock 兜底 / LLM 失败 fallback / 未实装 V1.x rollout):\n" + f"以下上游 expert 输出降级 (mock 兜底 / LLM 失败 fallback / 未实装 rollout):\n" f"{degraded_upstream}\n\n" f"**强制要求**:\n" f"1. `verdict` **绝不能输出 'go'** — 因为本次测试数据不完整\n" f"2. `verdict` 应输出 `conditional`(部分数据可信) 或 `no-go`(P0 缺失维度过多)\n" f"3. `known_risks` **必须列出每个 degraded expert 名**及对应未覆盖维度\n" f"4. `rationale` 必须包含「测试数据不完整,基于 {len(degraded_upstream)} 个降级 expert 无法做发版决策」\n" - f"5. `fallback_plan` 必须包含「等 V1.x rollout 完成后重跑」\n" + f"5. `fallback_plan` 必须包含「等 rollout 完成后重跑」\n" ) return ( @@ -68,7 +68,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: bug = ctx.upstream.get("bug-manager", {}).get("summary", {}) p0 = bug.get("p0", 0) if isinstance(bug, dict) else 0 - # V1.14 防 mock 闭环: 检查上游是否有 degraded 信号 + # 防 mock 闭环: 检查上游是否有 degraded 信号 # (mock 兜底 / LLM 失败 fallback / JSON 解析错 / rollout expert 被路由) degraded_upstream = [ name for name, meta in ctx.upstream_meta.items() @@ -88,7 +88,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: ) rationale = ( f"防 mock 闭环触发: 上游 expert {degraded_upstream} 输出 degraded " - f"(mock 兜底 / LLM 失败 / 未实装 V1.x rollout)。" + f"(mock 兜底 / LLM 失败 / 未实装 rollout)。" f"不能基于不完整数据输出 GO,降级 conditional 等人审。" ) known_risks = [ diff --git a/runtime/orchestrator/agents/visual_tester.py b/runtime/orchestrator/agents/visual_tester.py index bd9b64b9..a6b7e0ef 100644 --- a/runtime/orchestrator/agents/visual_tester.py +++ b/runtime/orchestrator/agents/visual_tester.py @@ -1,9 +1,9 @@ """visual-tester · LLM 读 PRD + UI 描述 → 视觉测试点 + 视觉对比脚本片段. -V1.17.0 minimum viable (ROADMAP rollout #3 落地): +minimum viable (ROADMAP rollout #3 落地): - 仅生成 visual test points + comparison scripts + tolerance + baseline_strategy 结构化 JSON - 不实装 12-视觉游戏测试.md 全部职责 (Airtest 真跑 / OCR 调用 / SSIM 像素对比执行 - 等留 V1.x 深化) + 等留 深化) - 覆盖 Web Canvas/WebGL + 手游/PC 游戏 + OCR + 视觉回归 """ diff --git a/runtime/orchestrator/direct.py b/runtime/orchestrator/direct.py index 78f2c9e0..063b7d67 100644 --- a/runtime/orchestrator/direct.py +++ b/runtime/orchestrator/direct.py @@ -113,7 +113,7 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers: configure_logging() init_tracing() log = bind_run(run_id) - reset_upstream_cache() # V1.14 主宪章 §40 + reset_upstream_cache() # decision = RoutingDecision.model_validate(decision_dict) ordered: list[DAGNode] = decision.topological() log.info("direct flow start: run_id={} nodes={}", run_id, len(ordered)) @@ -237,7 +237,7 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers: # L2-C: rollout 节点 + on_failure=skip 节点 rollout_skipped = [ nid for nid, r in results.items() - if not r.get("ok") and "[V1.x rollout]" in (r.get("stderr_tail") or "") + if not r.get("ok") and "[rollout]" in (r.get("stderr_tail") or "") ] + skipped summary = { diff --git a/runtime/orchestrator/flows.py b/runtime/orchestrator/flows.py index d4d0963a..8878c0c6 100644 --- a/runtime/orchestrator/flows.py +++ b/runtime/orchestrator/flows.py @@ -39,7 +39,7 @@ def run_decision_flow(decision_dict: dict[str, Any], run_id: str, on_progress: A configure_logging() init_tracing() log = bind_run(run_id) - reset_upstream_cache() # V1.14 主宪章 §40 — 每 run 清 runner 间产物缓存 + reset_upstream_cache() # 每 run 清 runner 间产物缓存 decision = RoutingDecision.model_validate(decision_dict) ordered: list[DAGNode] = decision.topological() log.info("flow start: run_id={} nodes={}", run_id, len(ordered)) @@ -91,7 +91,7 @@ def run_decision_flow(decision_dict: dict[str, Any], run_id: str, on_progress: A # L2-C: 识别 rollout 节点 + on_failure=skip 节点 rollout_skipped = [ nid for nid, r in results.items() - if not r.get("ok") and "[V1.x rollout]" in (r.get("stderr_tail") or "") + if not r.get("ok") and "[rollout]" in (r.get("stderr_tail") or "") ] + skipped summary = { diff --git a/runtime/orchestrator/skills/__init__.py b/runtime/orchestrator/skills/__init__.py index 90b120b2..350899d6 100644 --- a/runtime/orchestrator/skills/__init__.py +++ b/runtime/orchestrator/skills/__init__.py @@ -1,10 +1,10 @@ -"""Real LLM-driven skill runners (V1.x · ALL 18/18 rollout + 2 ex-vision complete). +"""Real LLM-driven skill runners (· ALL 18/18 rollout + 2 ex-vision complete). 18 production runners across 4 domains: - General: mobile-test, visual-test, system-test, eval-harness - Pentest: pentest-coordinator, pentest-recon, pentest-vuln, pentest-exploit, pentest-api, pentest-web, pentest-report - Automotive: automotive-test, automotive-can-bus-test, automotive-adas-scenario, automotive-ota-update-test, automotive-hil-loop-test -- Meta (V1.x ex-vision): agent-introspection-debugging, build-your-own-x-explorer +- Meta (ex-vision): agent-introspection-debugging, build-your-own-x-explorer """ from runtime.orchestrator.agents.base import ( # noqa: F401 diff --git a/runtime/orchestrator/skills/agent_introspection_debugging.py b/runtime/orchestrator/skills/agent_introspection_debugging.py index 862abed9..a2e57e9e 100644 --- a/runtime/orchestrator/skills/agent_introspection_debugging.py +++ b/runtime/orchestrator/skills/agent_introspection_debugging.py @@ -1,4 +1,4 @@ -"""agent-introspection-debugging skill · 五维自省分析 (V1.x). +"""agent-introspection-debugging skill · 五维自省分析 . 职责: 对 agent 行为做五维自省 (决策回放/工具调用/token/上下文/状态机) → 结构化报告。 """ diff --git a/runtime/orchestrator/skills/automotive_adas_scenario.py b/runtime/orchestrator/skills/automotive_adas_scenario.py index f44cb901..a4595667 100644 --- a/runtime/orchestrator/skills/automotive_adas_scenario.py +++ b/runtime/orchestrator/skills/automotive_adas_scenario.py @@ -1,4 +1,4 @@ -"""automotive-adas-scenario · ADAS 场景库测试编排 (V1.31.0).""" +"""automotive-adas-scenario · ADAS 场景库测试编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_can_bus_test.py b/runtime/orchestrator/skills/automotive_can_bus_test.py index ce62fb97..221b5818 100644 --- a/runtime/orchestrator/skills/automotive_can_bus_test.py +++ b/runtime/orchestrator/skills/automotive_can_bus_test.py @@ -1,4 +1,4 @@ -"""automotive-can-bus-test · CAN/CAN-FD/LIN/FlexRay/SOME-IP 协议测试编排 (V1.31.0).""" +"""automotive-can-bus-test · CAN/CAN-FD/LIN/FlexRay/SOME-IP 协议测试编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_hil_loop_test.py b/runtime/orchestrator/skills/automotive_hil_loop_test.py index d54f2100..bd17ace6 100644 --- a/runtime/orchestrator/skills/automotive_hil_loop_test.py +++ b/runtime/orchestrator/skills/automotive_hil_loop_test.py @@ -1,4 +1,4 @@ -"""automotive-hil-loop-test · HIL/SIL/MIL/PIL 环路编排 (V1.31.0).""" +"""automotive-hil-loop-test · HIL/SIL/MIL/PIL 环路编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_ota_update_test.py b/runtime/orchestrator/skills/automotive_ota_update_test.py index 26f07a0a..716cc705 100644 --- a/runtime/orchestrator/skills/automotive_ota_update_test.py +++ b/runtime/orchestrator/skills/automotive_ota_update_test.py @@ -1,4 +1,4 @@ -"""automotive-ota-update-test · OTA 升级测试编排 (V1.31.0).""" +"""automotive-ota-update-test · OTA 升级测试编排 .""" from __future__ import annotations from pathlib import Path diff --git a/runtime/orchestrator/skills/automotive_test.py b/runtime/orchestrator/skills/automotive_test.py index d6ee2841..0dc01a2f 100644 --- a/runtime/orchestrator/skills/automotive_test.py +++ b/runtime/orchestrator/skills/automotive_test.py @@ -1,4 +1,4 @@ -"""automotive-test skill · 整车测试主编排 (V1.31.0 batch). +"""automotive-test skill · 整车测试主编排. 10 阶段: HARA+ASIL → 静态 MISRA → 单元 MC/DC → SIL/PIL → HIL → CAN → ADAS → OTA → 合规 → 报告 """ diff --git a/runtime/orchestrator/skills/build_your_own_x_explorer.py b/runtime/orchestrator/skills/build_your_own_x_explorer.py index e7daaefb..4aa475ec 100644 --- a/runtime/orchestrator/skills/build_your_own_x_explorer.py +++ b/runtime/orchestrator/skills/build_your_own_x_explorer.py @@ -1,7 +1,7 @@ -"""build-your-own-x-explorer skill · 教学层 byox deep-dive 推荐 (V1.32.0). +"""build-your-own-x-explorer skill · 教学层 byox deep-dive 推荐. 职责: 据用户当前测试场景 + 时间预算, 从 13 类 byox KB 推 deep-dive 路径。 -铁律: 1) 必问时间预算 2) 不强推 3) 不复制全文。 +规则: 1) 必问时间预算 2) 不强推 3) 不复制全文。 """ from __future__ import annotations @@ -19,7 +19,7 @@ def system_prompt(self) -> str: "(database/network-stack/web-server/git/search-engine/shell/regex-engine/" "programming-language/web-browser/bot/...) KB 推 deep-dive 学习路径, " "每条带 estimated_hours + why。\n" - "铁律: 1) 必问时间预算 (无预算→拒推) 2) 不强推 (用户测试主线优先) 3) 不复制 tutorial 全文。\n" + "规则: 1) 必问时间预算 (无预算→拒推) 2) 不强推 (用户测试主线优先) 3) 不复制 tutorial 全文。\n" "输出严格 JSON。" ) diff --git a/runtime/orchestrator/skills/eval_harness.py b/runtime/orchestrator/skills/eval_harness.py index 8cade3d3..4ec67aec 100644 --- a/runtime/orchestrator/skills/eval_harness.py +++ b/runtime/orchestrator/skills/eval_harness.py @@ -1,6 +1,6 @@ """eval-harness skill · LLM 读上游 ai-tester 产物 → 4 维度 LLM/AI 评测编排. -V1.27.0 minimum viable (ROADMAP skill rollout #5 落地): +minimum viable (ROADMAP skill rollout #5 落地): - LLM 读 PRD + 上游 ai-tester expert 产物 → 5 阶段评测计划 (评测配置 / pass@k / 稳定性 / 延迟 / 报告归档) + 质量门禁 + 安全护栏 diff --git a/runtime/orchestrator/skills/mobile_test.py b/runtime/orchestrator/skills/mobile_test.py index 18ac7157..500a7450 100644 --- a/runtime/orchestrator/skills/mobile_test.py +++ b/runtime/orchestrator/skills/mobile_test.py @@ -1,6 +1,6 @@ """mobile-test skill · LLM 读上游 mobile-tester 产物 → 6 阶段移动端执行编排. -V1.23.0 minimum viable (ROADMAP skill rollout #2 落地): +minimum viable (ROADMAP skill rollout #2 落地): - LLM 读 PRD + 上游 mobile-tester expert 产物 → 6 阶段执行计划 (设备就绪 / Appium / 用例批次 / 性能采集 / Monkey / 报告归档) + 质量门禁 + 跨平台并行策略 diff --git a/runtime/orchestrator/skills/pentest_api.py b/runtime/orchestrator/skills/pentest_api.py index 1e3d85f5..ebf9b80b 100644 --- a/runtime/orchestrator/skills/pentest_api.py +++ b/runtime/orchestrator/skills/pentest_api.py @@ -1,6 +1,6 @@ """pentest-api skill · OWASP API Top 10 2023 编排. -V1.30.0 (pentest batch 2): +(pentest batch 2): - API1-API10 全覆盖: BOLA/Auth/MassAssignment/RateLimit/SSRF/CORS/ShadowAPI - OpenAPI 驱动 fuzz + GraphQL introspection + gRPC/WebSocket """ @@ -15,7 +15,7 @@ @register_skill("pentest-api") class PentestApi(AgentRunner): def system_prompt(self) -> str: - return "你是 pentest-api skill。职责:OWASP API Top 10 2023 全覆盖编排。\n铁律:1)授权前置 2)scope 内 3)prod 拒绝\n输出严格 JSON。" + return "你是 pentest-api skill。职责:OWASP API Top 10 2023 全覆盖编排。\n规则:1)授权前置 2)scope 内 3)prod 拒绝\n输出严格 JSON。" def user_prompt(self, ctx: RunnerContext) -> str: return f"## PRD\n```\n{ctx.artifact_text[:3000]}\n```\n\n## schema\n{{\n \"project_name\":\"string\",\"run_id\":\"string\",\n \"api_categories\":[{{\"id\":\"API1\",\"name\":\"BOLA\",\"checks\":[\"string\"]}}],\n \"openapi_driven\":true,\"graphql_enabled\":false,\n \"outputs\":{{\"api_vulns\":\"workspace/渗透/api_vulns.json\"}},\n \"risks\":[\"string\"],\"confidence\":\"string\"\n}}" def mock_output(self, ctx) -> dict: # noqa: ARG002 diff --git a/runtime/orchestrator/skills/pentest_coordinator.py b/runtime/orchestrator/skills/pentest_coordinator.py index 92d54336..e6ef6e8c 100644 --- a/runtime/orchestrator/skills/pentest_coordinator.py +++ b/runtime/orchestrator/skills/pentest_coordinator.py @@ -1,11 +1,11 @@ """pentest-coordinator skill · LLM 读 PRD + 授权上下文 → 5 阶段渗透流程编排. -V1.21.0 minimum viable (ROADMAP skill rollout #1 落地, 解锁 rollout skill 流水线): +minimum viable (ROADMAP skill rollout #1 落地, 解锁 rollout skill 流水线): - LLM 读 PRD + tagent.yml 授权摘要 + 上游 pentest-tester expert 产物 → 5 阶段并发计划 (recon / vuln / exploit / post-exploit / report) + 子 skill 调用顺序 + 授权前置检查 evidence - 不实装 skills/pentest-coordinator.md 全部职责 (subagent pool 真起 / Allure 报告生成 / decisions/ 真写入 等留后续深化) -- shannon 哲学 (仅 working PoC 入报告) + 主宪章 §22 决策不可逆禁止 + §24 safe-by-default +- shannon 哲学 (仅 working PoC 入报告) + 不可逆操作 + safe-by-default 授权边界:本 skill 只输出**编排计划文本**,不调子 skill,不执行不可逆操作。 真执行守护在 utils 层 (`api_security_scanner.py` / `ai_adversarial.py` env gate)。 @@ -27,7 +27,7 @@ def system_prompt(self) -> str: "你是 Test-Agent 项目内 pentest-coordinator skill(skills/pentest-coordinator.md)。\n" "职责:基于 PRD + 授权上下文 + 上游 pentest-tester expert 产物,编排渗透测试 5 阶段流程。\n" "原则:\n" - "1) 前置检查铁律 (主宪章 §24):tagent.yml pentest.authorized=true + pentest.scope=[list]\n" + "1) 前置检查规则 :tagent.yml pentest.authorized=true + pentest.scope=[list]\n" " 必须显式;否则编排拒绝运行 + evidence 记录拒绝理由\n" "2) target 必须在 scope 内;不在 → 拒绝\n" "3) prod 环境 → 拒绝;只允许 staging / sandbox / dev\n" @@ -78,7 +78,7 @@ def user_prompt(self, ctx: RunnerContext) -> str: ' "outputs": {\n' ' "report_path": "workspace/渗透报告/pentest_{target}_{date}.md",\n' ' "evidence_dir": "workspace/测试报告/{项目名}/evidence/{run_id}/",\n' - ' "bug_tickets_format": "CVSS → P0-P3 (主宪章 §18-4)",\n' + ' "bug_tickets_format": "CVSS → P0-P3 ",\n' ' "allure_report": "workspace/Allure/pentest/{run_id}/"\n' " },\n" ' "refuse_conditions": [\n' @@ -153,7 +153,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: # noqa: ARG002 "outputs": { "report_path": "workspace/渗透报告/pentest_staging.example.com_20260516.md", "evidence_dir": "workspace/测试报告/{项目名}/evidence/selftest-20260516-000001/", - "bug_tickets_format": "CVSS 9-10=P0 / 7-8.9=P1 / 4-6.9=P2 / <4=P3 (主宪章 §18-4)", + "bug_tickets_format": "CVSS 9-10=P0 / 7-8.9=P1 / 4-6.9=P2 / <4=P3 ", "allure_report": "workspace/Allure/pentest/selftest-20260516-000001/", }, "refuse_conditions": [ @@ -166,7 +166,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]: # noqa: ARG002 "子 skill 失败级联致整体流程中断 (建议每阶段独立 checkpoint)", "subagent pool 耗尽致并发降级为串行 (建议 max=5 + 队列回退)", "授权 yaml 被运行时篡改 (建议启动期 SHA-256 锁定 + 运行期不重读)", - "aux_client 串话致敏感数据进主 session (建议 §22 严格隔离 + telemetry 告警)", + "aux_client 串话致敏感数据进主 session (建议 严格隔离 + telemetry 告警)", ], "confidence": "medium", "_mode": "mock(stub provider)", diff --git a/runtime/orchestrator/skills/pentest_exploit.py b/runtime/orchestrator/skills/pentest_exploit.py index cf666ec6..371da4d0 100644 --- a/runtime/orchestrator/skills/pentest_exploit.py +++ b/runtime/orchestrator/skills/pentest_exploit.py @@ -1,8 +1,8 @@ """pentest-exploit skill · LLM 读 vuln_candidates → 沙箱 PoC 验证编排. -V1.30.0 minimum viable (pentest batch 2): +minimum viable (pentest batch 2): - 读 pentest-vuln 产物 vuln_candidates.json → 5 攻击域并发 PoC 尝试 -- 沙箱铁律: Docker/VM 内跑, host 禁直接执行 +- 沙箱规则: Docker/VM 内跑, host 禁直接执行 - destructive-guard: 拦截 rm/DROP/truncate/fdisk - 仅成功+可复现 PoC 标 verified, 其余丢弃 """ @@ -19,7 +19,7 @@ class PentestExploit(AgentRunner): def system_prompt(self) -> str: return ( "你是 pentest-exploit skill。职责:读 vuln_candidates,编排沙箱 PoC 验证。\n" - "铁律:1)Docker/VM 沙箱内跑 2)destructive ops 拦截 3)仅 verified + reproducible 入报告\n" + "规则:1)Docker/VM 沙箱内跑 2)destructive ops 拦截 3)仅 verified + reproducible 入报告\n" "输出严格 JSON。" ) def user_prompt(self, ctx: RunnerContext) -> str: diff --git a/runtime/orchestrator/skills/pentest_recon.py b/runtime/orchestrator/skills/pentest_recon.py index 92daca54..a887869b 100644 --- a/runtime/orchestrator/skills/pentest_recon.py +++ b/runtime/orchestrator/skills/pentest_recon.py @@ -1,6 +1,6 @@ """pentest-recon skill · LLM 读上游 pentest-tester 产物 → 侦察信息收集编排. -V1.28.0 minimum viable (ROADMAP pentest skill rollout #6 落地): +minimum viable (ROADMAP pentest skill rollout #6 落地): - LLM 读 PRD + 授权上下文 + 上游 pentest-tester 产物 → 主动/被动侦察计划 - 子域枚举 / 端口扫描 / 服务指纹 / Web 技术栈 / 路径发现 / 历史 URL / 泄露凭据 - 授权前置: pentest.recon_active=true 才允许主动扫, 否则仅 passive @@ -23,7 +23,7 @@ def system_prompt(self) -> str: "你是 Test-Agent 项目内 pentest-recon skill(skills/pentest-recon.md)。\n" "职责:基于 PRD + 授权上下文 + 上游 pentest-tester 产物,编排渗透侦察信息收集计划。\n" "原则:\n" - "1) 授权前置铁律: tagent.yml pentest.recon_active=true 允许主动扫;否则仅 passive\n" + "1) 授权前置规则: tagent.yml pentest.recon_active=true 允许主动扫;否则仅 passive\n" "2) scope 防护: target 在 pentest.scope 内;跨网段/跨公司域 → 拒绝\n" "3) prod 环境 → 拒绝;仅 staging/sandbox/dev\n" "4) 被动优先:子域枚举(cert.sh/DNSdumpster) > 主动扫描\n" diff --git a/runtime/orchestrator/skills/pentest_report.py b/runtime/orchestrator/skills/pentest_report.py index 1556aedb..0e0ed8ef 100644 --- a/runtime/orchestrator/skills/pentest_report.py +++ b/runtime/orchestrator/skills/pentest_report.py @@ -1,6 +1,6 @@ """pentest-report skill · 渗透报告生成编排 (shannon 哲学). -V1.30.0 (pentest batch 2): +(pentest batch 2): - 仅 verified PoC 入报告; unverified 丢弃 - CVSS 3.1 + CWE + 修复建议 + PII scrub - 标准 7 章报告模板 @@ -16,7 +16,7 @@ @register_skill("pentest-report") class PentestReport(AgentRunner): def system_prompt(self) -> str: - return "你是 pentest-report skill。职责:仅 working PoC 入报告 (shannon 哲学)。\n铁律:1)仅 verified 2)PII scrub 3)CVSS 3.1 必算 4)修复含代码\n输出严格 JSON。" + return "你是 pentest-report skill。职责:仅 working PoC 入报告 (shannon 哲学)。\n规则:1)仅 verified 2)PII scrub 3)CVSS 3.1 必算 4)修复含代码\n输出严格 JSON。" def user_prompt(self, ctx: RunnerContext) -> str: return f"## PRD\n```\n{ctx.artifact_text[:3000]}\n```\n\n## schema\n{{\n \"project_name\":\"string\",\"run_id\":\"string\",\n \"sections\":[\"executive_summary\",\"scope_auth\",\"methodology\",\"findings\",\"remediation\",\"appendix\"],\n \"findings\":[{{\"cwe\":\"string\",\"cvss\":0.0,\"severity\":\"P0|P1|P2|P3\",\"poc_verified\":true,\"remediation\":\"string\"}}],\n \"pii_scrub\":true,\n \"outputs\":{{\"report_md\":\"workspace/渗透/pentest_report.md\",\"report_json\":\"workspace/渗透/pentest_report.json\"}},\n \"risks\":[\"string\"],\"confidence\":\"string\"\n}}" def mock_output(self, ctx) -> dict: # noqa: ARG002 diff --git a/runtime/orchestrator/skills/pentest_vuln.py b/runtime/orchestrator/skills/pentest_vuln.py index 9e5fbae2..e607c8c8 100644 --- a/runtime/orchestrator/skills/pentest_vuln.py +++ b/runtime/orchestrator/skills/pentest_vuln.py @@ -5,7 +5,7 @@ Disclaimer: All secrets/keys/passwords in this file are demo examples for pentest check rules — not real credentials. Tests require explicit opt-in. -V1.29.0 minimum viable (ROADMAP pentest skill rollout #3 落地): +minimum viable (ROADMAP pentest skill rollout #3 落地): - LLM 读 PRD + 授权 + 上游 pentest-tester + pentest-recon 产物 → 漏洞发现计划 - 5 攻击域并发: Injection / XSS / SSRF / Auth / Authz - 白盒: SAST 源码扫描 + CPG (code property graph) source→sink trace @@ -29,7 +29,7 @@ def system_prompt(self) -> str: "你是 Test-Agent 项目内 pentest-vuln skill(skills/pentest-vuln.md)。\n" "职责:基于 PRD + 授权 + 上游侦察产物,编排 5 攻击域并发漏洞发现计划。\n" "原则:\n" - "1) 授权铁律: tagent.yml pentest 段 must have authorized=true + scope list\n" + "1) 授权规则: tagent.yml pentest 段 must have authorized=true + scope list\n" "2) 5 攻击域并发不串行: Injection / XSS / SSRF / Auth / Authz\n" "3) 白盒优先 (如有源码): SAST + CPG source→sink trace, 非硬编码 allowlist\n" "4) 黑盒模式: 用 recon 结果当输入, nuclei templates + Burp + sqlmap 等\n" diff --git a/runtime/orchestrator/skills/pentest_web.py b/runtime/orchestrator/skills/pentest_web.py index 89c90cf2..5f51112a 100644 --- a/runtime/orchestrator/skills/pentest_web.py +++ b/runtime/orchestrator/skills/pentest_web.py @@ -1,6 +1,6 @@ """pentest-web skill · OWASP Top 10 + ASVS 编排. -V1.30.0 (pentest batch 2): +(pentest batch 2): - A01-A10 全覆盖 + 2FA/TOTP/SSO 自动登录 + Burp/sqlmap 混合 """ @@ -14,7 +14,7 @@ @register_skill("pentest-web") class PentestWeb(AgentRunner): def system_prompt(self) -> str: - return "你是 pentest-web skill。职责:OWASP Top 10:2021 + ASVS 全覆盖编排。\n铁律:1)授权前置 2)scope 内 3)2FA/TOTP 自动登录支持\n输出严格 JSON。" + return "你是 pentest-web skill。职责:OWASP Top 10:2021 + ASVS 全覆盖编排。\n规则:1)授权前置 2)scope 内 3)2FA/TOTP 自动登录支持\n输出严格 JSON。" def user_prompt(self, ctx: RunnerContext) -> str: return f"## PRD\n```\n{ctx.artifact_text[:3000]}\n```\n\n## schema\n{{\n \"project_name\":\"string\",\"run_id\":\"string\",\n \"owasp_categories\":[{{\"id\":\"A01\",\"name\":\"BrokenAccessControl\",\"tools\":[\"string\"]}}],\n \"auth_auto\":{{\"totp_enabled\":false,\"sso_enabled\":false}},\n \"outputs\":{{\"web_vulns\":\"workspace/渗透/web_vulns.json\"}},\n \"risks\":[\"string\"],\"confidence\":\"string\"\n}}" def mock_output(self, ctx) -> dict: # noqa: ARG002 diff --git a/runtime/orchestrator/skills/system_test.py b/runtime/orchestrator/skills/system_test.py index 93a641a6..d1594a0b 100644 --- a/runtime/orchestrator/skills/system_test.py +++ b/runtime/orchestrator/skills/system_test.py @@ -1,6 +1,6 @@ """system-test skill · LLM 读上游 system-tester 产物 → 6 阶段系统集成测试编排. -V1.26.0 minimum viable (ROADMAP skill rollout #4 落地): +minimum viable (ROADMAP skill rollout #4 落地): - LLM 读 PRD + 上游 system-tester expert 产物 → 6 阶段执行计划 (环境检查 / IoT 测试 / 音视频校验 / 链路追踪 / 消息队列 / 报告归档) + 质量门禁 + 子场景路由策略 diff --git a/runtime/orchestrator/skills/visual_test.py b/runtime/orchestrator/skills/visual_test.py index 1fb7df20..431e6958 100644 --- a/runtime/orchestrator/skills/visual_test.py +++ b/runtime/orchestrator/skills/visual_test.py @@ -1,6 +1,6 @@ """visual-test skill · LLM 读上游 visual-tester 产物 → 5 阶段视觉测试执行编排. -V1.24.0 minimum viable (ROADMAP skill rollout #3 落地): +minimum viable (ROADMAP skill rollout #3 落地): - LLM 读 PRD + 上游 visual-tester expert 产物 → 5 阶段执行计划 (环境检查 / 模板图准备 / 视觉冒烟 / 视觉回归 / 报告归档) + 质量门禁 + 多分辨率策略 diff --git a/runtime/registry/registry.py b/runtime/registry/registry.py index 53483c58..39b93e7c 100644 --- a/runtime/registry/registry.py +++ b/runtime/registry/registry.py @@ -41,7 +41,7 @@ class CatalogEntry: tools: list[str] = field(default_factory=list) source_path: str = "" raw_body: str = "" - # V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): + # 防 mock (ROADMAP Day 0 承诺): # 从 frontmatter EXPERT_IMPL_STATUS / SKILL_IMPL_STATUS 解析,执行层据此拒绝路由未实装项。 # 合法值: production / script / rollout / vision / unknown(frontmatter 缺失或值非法时)。 impl_status: str = "unknown" @@ -114,7 +114,7 @@ def _entry_from_file(path: Path, kind: EntryKind) -> CatalogEntry | None: tools = [t.strip() for t in tools_raw.split(",") if t.strip()] else: tools = [] - # V1.14 防 mock: 按 kind 选对应 frontmatter key,缺失或非法值降级 unknown + # 防 mock: 按 kind 选对应 frontmatter key,缺失或非法值降级 unknown status_key = "EXPERT_IMPL_STATUS" if kind == "expert" else "SKILL_IMPL_STATUS" status_raw = meta.get(status_key, "") impl_status = str(status_raw).strip().lower() if status_raw else "" diff --git a/runtime/router/retrieval.py b/runtime/router/retrieval.py index 13fdcedb..c3201e93 100644 --- a/runtime/router/retrieval.py +++ b/runtime/router/retrieval.py @@ -6,7 +6,7 @@ Async-safety: - Called from sync `route()`. If already inside a running event loop (e.g. FastAPI request handler), running `asyncio.run` or `run_coroutine_threadsafe` on the - same loop deadlocks. We detect that and degrade to no-op (charter §21 横切 + same loop deadlocks. We detect that and degrade to no-op (charter 横切 可复现性: never block, never silently misbehave). """ diff --git a/runtime/router/router.py b/runtime/router/router.py index 6dd51bcc..06e52ff1 100644 --- a/runtime/router/router.py +++ b/runtime/router/router.py @@ -26,7 +26,7 @@ class RouterError(RuntimeError): def _validate_against_catalog(decision: RoutingDecision, catalog: Catalog) -> list[str]: issues: list[str] = [] - # V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): 检查 expert / skill 实装状态 + # 防 mock (ROADMAP Day 0 承诺): 检查 expert / skill 实装状态 # 单源: catalog entry.impl_status (agents/skills .md frontmatter) # rollout / vision / unknown 状态 router 仍可路由,但 issues 列表标 warning + downgrade confidence # → orchestrator execute_node 跑到时会硬拒并报明确错误 (returncode=2),不输出 mock 数据 @@ -39,7 +39,7 @@ def _validate_against_catalog(decision: RoutingDecision, catalog: Catalog) -> li continue if entry.impl_status in ("rollout", "vision"): issues.append( - f"{n.kind} '{n.name}' 处于 V1.x {entry.impl_status} (id={n.id}); " + f"{n.kind} '{n.name}' 处于 {entry.impl_status} (id={n.id}); " f"test-lead 决策应降级 conditional 或 no-go" ) elif entry.impl_status == "unknown": diff --git a/runtime/router/schema.py b/runtime/router/schema.py index d75f9f3a..e93093f5 100644 --- a/runtime/router/schema.py +++ b/runtime/router/schema.py @@ -19,7 +19,7 @@ class DAGNode(BaseModel): on_failure: Literal["retry", "skip", "abort"] = "retry" timeout_seconds: int = Field(default=1800, ge=1, description="node timeout in seconds") - # Charter §23 教学层字段(可选;LLM 在 learn mode 应填,exec mode 仅 one_liner) + # Charter 教学层字段(可选;LLM 在 learn mode 应填,exec mode 仅 one_liner) one_liner_zh: str = Field(default="", description="≤30 字 why,执行模式输出此字段") one_liner_en: str = Field(default="", description="≤120 chars why for English") why: str = Field(default="", description="long-form rationale (learn mode)") diff --git a/runtime/scheduler/INDEX.md b/runtime/scheduler/INDEX.md index ff731513..da24dd7f 100644 --- a/runtime/scheduler/INDEX.md +++ b/runtime/scheduler/INDEX.md @@ -10,7 +10,7 @@ | `scheduler.py` | tick() 每 60s,跨平台文件锁(fcntl/msvcrt) | | `injection_scan.py` | 运行时 prompt 注入扫描(防 skill 加载注入) | -## 规则(主宪章 §22 落地) +## 规则 - **运行时全 prompt 扫描**(非仅 create-time):skill 在 runtime 加载,恶意 skill 可携带注入 - **文件锁防重入**:`workspace/cron/.tick.lock` 双栈实现 diff --git a/runtime/scheduler/injection_scan.py b/runtime/scheduler/injection_scan.py index 24256c23..9d7b8c7b 100644 --- a/runtime/scheduler/injection_scan.py +++ b/runtime/scheduler/injection_scan.py @@ -1,6 +1,6 @@ -"""Runtime prompt injection scan (hermes §1.2 critical). +"""Runtime prompt injection scan (hermes critical). -Charter §22 rule: 非交互自动批准模式下,assembled prompt(含动态加载的 skill 内容) +Charter rule: 非交互自动批准模式下,assembled prompt(含动态加载的 skill 内容) 必须全扫,不止 create-time。 """ @@ -33,7 +33,7 @@ def __init__(self, reason: str, snippet: str) -> None: def scan(text: str) -> None: """Raise PromptInjectionBlocked when any pattern hits. - Charter §22 rule: scan FULL assembled prompt (system + user + tools + skill contents). + Charter rule: scan FULL assembled prompt (system + user + tools + skill contents). """ for pat in SUSPICIOUS: m = pat.search(text) diff --git a/runtime/scheduler/jobs.py b/runtime/scheduler/jobs.py index 832efad0..1f8e6621 100644 --- a/runtime/scheduler/jobs.py +++ b/runtime/scheduler/jobs.py @@ -1,4 +1,4 @@ -"""Cron job storage (hermes-inspired §1.2). +"""Cron job storage (hermes-inspired ). Jobs persist in `workspace/cron/jobs.json`. Each entry: - id: uuid diff --git a/runtime/scheduler/scheduler.py b/runtime/scheduler/scheduler.py index f57d7d9c..7e7382cc 100644 --- a/runtime/scheduler/scheduler.py +++ b/runtime/scheduler/scheduler.py @@ -1,4 +1,4 @@ -"""Scheduler tick loop (hermes §1.2). +"""Scheduler tick loop (hermes ). - 60s tick from a background thread - Cross-platform file lock (fcntl/msvcrt) prevents double-run @@ -111,7 +111,7 @@ def run_job(job: dict, *, runner: Callable[[str], dict] | None = None) -> dict: out_file = out_dir / f"{ts}.md" try: - # Charter §22 rule: runtime full-prompt injection scan + # Charter rule: runtime full-prompt injection scan scan(prompt) except PromptInjectionBlocked as e: out_file.write_text(f"# Job {job_id} blocked\n\nreason: {e.reason}\nsnippet: {e.snippet}\n", encoding="utf-8") diff --git a/runtime/storage/objects.py b/runtime/storage/objects.py index b4ed3c69..32fa3e6d 100644 --- a/runtime/storage/objects.py +++ b/runtime/storage/objects.py @@ -15,7 +15,7 @@ class ObjectStore: """Thin facade. Imports MinIO lazily so tests without infra still pass. - Charter §21 横切准则: lazy init is thread-safe (防止并发 caller 重复建桶). + Charter 横切准则: lazy init is thread-safe (防止并发 caller 重复建桶). """ _lock = threading.Lock() diff --git a/runtime/subagent/INDEX.md b/runtime/subagent/INDEX.md index 032907f2..455259d8 100644 --- a/runtime/subagent/INDEX.md +++ b/runtime/subagent/INDEX.md @@ -1,6 +1,6 @@ # subagent 索引 -## 规则(主宪章 §22) +## 规则 - **隔离 client**:子代理用 `auxiliary` LLM client,永不污染主 session prompt cache - **ThreadPool 动态调整**:默认 32 workers,可按并发 evals 数 resize_tool_pool diff --git a/runtime/subagent/aux_client.py b/runtime/subagent/aux_client.py index cb45a94a..0b8ba12a 100644 --- a/runtime/subagent/aux_client.py +++ b/runtime/subagent/aux_client.py @@ -1,4 +1,4 @@ -"""Auxiliary LLM client (hermes §1.3 'never touches main session prompt cache'). +"""Auxiliary LLM client (hermes 'never touches main session prompt cache'). Subagents and curator share NOTHING with the main routing path beyond raw model API. Different env vars (TAGENT_AUX_*) so users can pin a cheaper/faster aux model. diff --git a/runtime/subagent/pool.py b/runtime/subagent/pool.py index daa581a4..411b8d5e 100644 --- a/runtime/subagent/pool.py +++ b/runtime/subagent/pool.py @@ -1,4 +1,4 @@ -"""Global ThreadPool for subagent tasks (hermes §1.3 pattern).""" +"""Global ThreadPool for subagent tasks (hermes pattern).""" from __future__ import annotations @@ -26,7 +26,7 @@ def resize_pool(max_workers: int) -> None: """Replace the pool with a new one sized to `max_workers`. Safe to call before tasks are submitted; existing tasks finish on the old pool. - Charter §21 横切预算:避免大并发饥饿。 + Charter 横切预算:避免大并发饥饿。 """ global _executor with _lock: diff --git a/runtime/subagent/spawn.py b/runtime/subagent/spawn.py index 74fe4236..6e1294f6 100644 --- a/runtime/subagent/spawn.py +++ b/runtime/subagent/spawn.py @@ -29,7 +29,7 @@ def spawn(task: Callable[..., Any], *args, **kwargs) -> concurrent.futures.Futur def fanout(tasks: list[Callable[..., Any]], *, timeout: float = 600.0) -> list[SubagentResult]: """Run multiple subagent tasks in parallel; collect results in submission order. - Charter §21 横切准则: + Charter 横切准则: - 失败隔离:任一子任务 crash 不影响其他 - 测试预算:总 timeout 上限 """ diff --git a/runtime/tests/test_agent_runners.py b/runtime/tests/test_agent_runners.py index de6b91de..6daf617a 100644 --- a/runtime/tests/test_agent_runners.py +++ b/runtime/tests/test_agent_runners.py @@ -1,4 +1,4 @@ -"""11 个 LLM-driven AgentRunner 专项 unit test (V1.16-followup, V1.x rollout 收尾). +"""11 个 LLM-driven AgentRunner 专项 unit test. 覆盖 3 维度 × 11 runner = 33 case (参数化): - registration: @register("name") + __init__.py import 双链路 → get_runner(name) 非空 @@ -8,7 +8,7 @@ - summary 非空: 一行业务摘要存在 (防 summary 改空 regression, report-generator 下游消费) -模板规则锁定 (V1.17+ 新 AgentRunner 必填): +模板规则锁定 : - 加 1 runner → 更新 ALL_RUNNERS 加 1 行 (name, required_keys) - 不加 → 参数化漏覆盖, pytest 不报错但 silent gap @@ -47,27 +47,27 @@ ["verdict", "rationale", "metrics", "known_risks", "fallback_plan", "summary_zh", "requires_human_signoff", "signoff_owner"], ), ( - "env-manager", # V1.15.0 + "env-manager", # ["project_name", "target_env", "env_checks", "prep_steps", "dependencies", "risks", "confidence"], ), ( - "mobile-tester", # V1.16.0 + "mobile-tester", # ["project_name", "target_platform", "test_cases", "device_commands", "test_environment", "mobile_specific", "risks", "confidence"], ), ( - "visual-tester", # V1.17.0 + "visual-tester", # ["project_name", "visual_target_type", "visual_test_points", "comparison_scripts", "tolerance", "baseline_strategy", "risks", "confidence"], ), ( - "system-tester", # V1.18.0 + "system-tester", # ["project_name", "system_target_type", "test_cases", "device_commands", "protocol_specific", "test_environment", "risks", "confidence"], ), ( - "pentest-tester", # V1.19.0 + "pentest-tester", # ["project_name", "test_mode", "target_scope", "recon_phase", "vuln_assessment_phase", "exploit_plan", "reporting", "risks", "confidence"], ), ( - "automotive-tester", # V1.20.0 (V1.x rollout 收尾) + "automotive-tester", # (rollout 收尾) ["project_name", "vehicle_subsystem", "asil_assessment", "test_cases", "bus_test_plan", "adas_scenarios", "ota_plan", "compliance_matrix", "test_environment", "risks", "confidence"], ), ] diff --git a/runtime/tests/test_cli_config.py b/runtime/tests/test_cli_config.py index b4353075..a8dce7a1 100644 --- a/runtime/tests/test_cli_config.py +++ b/runtime/tests/test_cli_config.py @@ -1,4 +1,4 @@ -"""tagent config CLI 测试 (V1.22.0 · 4 子命令 list/show/use/use-compat). +"""tagent config CLI 测试 (4 子命令 list/show/use/use-compat). 本文件中所有 API key/secret 均为虚构测试数据,不是真实凭据。 All API keys and secrets in this file are fake test fixtures — not real credentials. diff --git a/runtime/tests/test_impl_status_filter.py b/runtime/tests/test_impl_status_filter.py index bed84850..55b0c270 100644 --- a/runtime/tests/test_impl_status_filter.py +++ b/runtime/tests/test_impl_status_filter.py @@ -1,6 +1,6 @@ """X4 防 mock 闭环测试:registry parse → router warn → orchestrator hard block。 -覆盖 ROADMAP V1.15 Day 0 承诺:rollout / vision / unknown 状态的 expert / skill, +覆盖 ROADMAP Day 0 承诺:rollout / vision / unknown 状态的 expert / skill, router 路由仍可生成 DAG 但 _validate_against_catalog 标 issue + 降 confidence, orchestrator execute_node 跑到时 returncode=2 + stderr "未实装",绝不输出 mock 数据。 @@ -27,22 +27,22 @@ def test_registry_impl_status_no_unknown(): def test_registry_expert_status_counts(): - """Expert 16 = 11 production + 5 script + 0 rollout (V1.20.0 automotive-tester 落地后,V1.x rollout 收尾)。""" + """Expert 16 = 11 production + 5 script + 0 rollout 。""" cat = get_catalog() counts = Counter(e.impl_status for e in cat.experts.values()) assert counts.get("production", 0) == 11, f"expert production 应 11,实 {counts.get('production')}" assert counts.get("script", 0) == 5, f"expert script 应 5,实 {counts.get('script')}" - assert counts.get("rollout", 0) == 0, f"expert rollout 应 0 (V1.x rollout 收尾),实 {counts.get('rollout')}" + assert counts.get("rollout", 0) == 0, f"expert rollout 应 0 (rollout 收尾),实 {counts.get('rollout')}" def test_registry_skill_status_counts(): - """Skill 32 = 25 production + 7 script + 0 rollout + 0 vision (V1.x 全 skill rollout 完成 + 2 ex-vision 实装)。""" + """Skill 32 = 25 production + 7 script + 0 rollout + 0 vision (全 skill rollout 完成 + 2 ex-vision 实装)。""" cat = get_catalog() counts = Counter(e.impl_status for e in cat.skills.values()) assert counts.get("production", 0) == 25, f"skill production 应 25,实 {counts.get('production')}" assert counts.get("script", 0) == 7 assert counts.get("rollout", 0) == 0, f"skill rollout 应 0,实 {counts.get('rollout')}" - assert counts.get("vision", 0) == 0, f"skill vision 应 0 (V1.x 后),实 {counts.get('vision')}" + assert counts.get("vision", 0) == 0, f"skill vision 应 0 (后),实 {counts.get('vision')}" # ---------- router 层:_validate_against_catalog warn ---------- @@ -61,7 +61,7 @@ def _mk_decision(*dag_specs: tuple[str, str, str]) -> RoutingDecision: def test_router_flags_rollout_expert(): - # V1.20 V1.x rollout 收尾,所有 expert production/script。 + # rollout 收尾,所有 expert production/script。 # rollout 分支覆盖通过 skill 层 (test_router_flags_rollout_skill,16 skill 仍 rollout)。 # unknown 分支覆盖通过 test_router_flags_unknown_entity。 # 此 test 保留为占位,改测 unknown expert (走相同 hard-block 分支)。 @@ -72,7 +72,7 @@ def test_router_flags_rollout_expert(): def test_router_does_not_falsely_flag_production_skill(): - """V1.36.0 全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。""" + """全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。""" cat = get_catalog() dec = _mk_decision(("n1", "skill", "visual-test")) issues = router._validate_against_catalog(dec, cat) @@ -80,7 +80,7 @@ def test_router_does_not_falsely_flag_production_skill(): def test_router_flags_vision_skill(): - # V1.x 2 ex-vision skill (agent-introspection-debugging / build-your-own-x-explorer) 已实装为 production。 + # 2 ex-vision skill (agent-introspection-debugging / build-your-own-x-explorer) 已实装为 production。 # vision 分支与 rollout 共用 router._validate_against_catalog 同一 if (rollout, vision) 路径, # 现 catalog 无 vision skill,此 test 改测 unknown skill (走相同 hard-block warn 分支),保留覆盖语义。 cat = get_catalog() @@ -114,7 +114,7 @@ def test_router_passes_production_clean(): def test_execute_node_rejects_rollout_expert(): - """V1.20 V1.x rollout 收尾,无 rollout expert。 + """rollout 收尾,无 rollout expert。 rollout 分支覆盖通过 test_execute_node_rejects_rollout_skill (16 skill 仍 rollout)。 expert hard-block 路径覆盖通过 test_execute_node_rejects_unknown_expert (同分支)。 此 test 保留 + 改用 unknown expert 触发同 returncode=2 hard-block。 @@ -125,14 +125,14 @@ def test_execute_node_rejects_rollout_expert(): def test_execute_node_allows_production_skill(): - """V1.36.0 全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。""" + """全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。""" r = execute_node("automotive-can-bus-test", "skill") assert r.returncode == 0, f"production skill 被误拒: rc={r.returncode} stderr={r.stderr}" assert r.stdout, "production skill 应产出结果" def test_execute_node_rejects_vision_skill(): - # V1.x 2 ex-vision skill 已实装,catalog 无 vision skill。 + # 2 ex-vision skill 已实装,catalog 无 vision skill。 # vision hard-block 分支与 rollout 共用 execute_node 同一拒绝路径, # 此 test 改测 unknown skill (走 returncode=2 同分支),保留覆盖语义。 r = execute_node("phantom-vision-skill", "skill") diff --git a/runtime/tests/test_router.py b/runtime/tests/test_router.py index d1e2daaa..f19b5bda 100644 --- a/runtime/tests/test_router.py +++ b/runtime/tests/test_router.py @@ -33,7 +33,7 @@ def test_router_picks_platform_expert(text, expected_type, expected_expert): def test_router_pentest_includes_coordinator_skill(): - """pentest path 头节点 = pentest-coordinator (kind=skill, V1.21 SkillRunner 首接入).""" + """pentest path 头节点 = pentest-coordinator (kind=skill art = TargetArtifact(kind="text", text="pentest SQL injection penetration test") decision = route(art, client=LLMClient(provider="stub", fallback="stub")) ordered = decision.topological() @@ -49,7 +49,7 @@ def test_router_starts_with_requirements_analyst(): def test_router_ends_with_test_lead_decision(): - """DAG 末节点 = test-lead 决策(主宪章 §40 + agents/README.md 流程 + """DAG 末节点 = test-lead 决策(+ agents/README.md 流程 "bug-manager → report-generator → test-lead 决策")。report-generator 倒数第二。""" art = TargetArtifact(kind="text", text="generic web system") decision = route(art, client=LLMClient(provider="stub", fallback="stub")) diff --git a/runtime/tests/test_router_real.py b/runtime/tests/test_router_real.py index 838fb3ef..3e77ca23 100644 --- a/runtime/tests/test_router_real.py +++ b/runtime/tests/test_router_real.py @@ -1,9 +1,9 @@ """Real-model router accuracy test (M2-7). -Charter §21: +Charter : - 横切准则: 失败必带 seed + snapshot (固定 random seed) - 横切准则: 测试预算上限 (timeout per call) - - 决策可追溯 §18-12: 每次失败入 decisions/ + - 决策可追溯 : 每次失败入 decisions/ Run conditions: - Requires TAGENT_LLM_PROVIDER ∈ {claude, openai, gemini, qwen, deepseek, ollama} @@ -30,7 +30,7 @@ from runtime.router.router import route, route_with_vote from runtime.router.schema import TargetArtifact -RANDOM_SEED = 42 # §21 可复现性: 固定 seed +RANDOM_SEED = 42 # 可复现性: 固定 seed random.seed(RANDOM_SEED) # 20 test samples: 4 types × 5 phrasings each @@ -63,7 +63,7 @@ def _decisions_log(record: dict) -> Path: - """Charter §18-12 决策可追溯: log each routing decision.""" + """Charter 决策可追溯: log each routing decision.""" s = get_settings() d = s.resolve(s.workspace_dir) / "测试报告" / "decisions" d.mkdir(parents=True, exist_ok=True) diff --git a/runtime/tests/test_skill_runners.py b/runtime/tests/test_skill_runners.py index 77449d22..06e27ba4 100644 --- a/runtime/tests/test_skill_runners.py +++ b/runtime/tests/test_skill_runners.py @@ -1,6 +1,6 @@ -"""LLM-driven SkillRunner 专项 unit test (V1.21.0 — skill rollout 起点). +"""LLM-driven SkillRunner 专项 unit test. -照 test_agent_runners.py pattern (V1.16-followup 锁规则) 同构: +照 test_agent_runners.py pattern 同构: 覆盖 3 维度 × N skill_runner = 3N case (参数化): - registration: @register_skill("name") + skills/__init__.py import 双链路 → get_skill_runner(name) 非空 (防 __init__.py 漏 import 致 silent fallback no-op) @@ -9,7 +9,7 @@ - summary 非空: 一行业务摘要存在 (防 summary 改空 regression, report-generator 下游消费) -模板规则锁定 (V1.21+ 新 SkillRunner 必填): +模板规则锁定 : - 加 1 skill_runner → 更新 ALL_SKILL_RUNNERS 加 1 行 (name, required_keys) - 不加 → 参数化漏覆盖, pytest 不报错但 silent gap """ @@ -25,7 +25,7 @@ # 不含下划线开头字段 (e.g., _mode 是 stub 标志, 非业务字段) ALL_SKILL_RUNNERS: list[tuple[str, list[str]]] = [ ( - "pentest-coordinator", # V1.21.0 (skill rollout 起点) + "pentest-coordinator", # (skill rollout 起点) [ "project_name", "run_id", @@ -55,55 +55,55 @@ ["project_name","run_id","sections","findings","pii_scrub","outputs","risks","confidence"], ), ( - "pentest-recon", # V1.31.0 + "pentest-recon", # ["project_name","run_id","target","authorization","outputs","risks","confidence"], ), ( - "pentest-vuln", # V1.31.0 + "pentest-vuln", # ["project_name","run_id","source_available","mode","domains","outputs","risks","confidence"], ), ( - "mobile-test", # V1.23.0 + "mobile-test", # ["project_name","run_id","target_platform","phases","outputs","risks","confidence"], ), ( - "visual-test", # V1.23.0 + "visual-test", # ["project_name","run_id","visual_target_type","phases","outputs","risks","confidence"], ), ( - "system-test", # V1.31.0 + "system-test", # ["project_name","run_id","sub_scenarios","phases","outputs","risks","confidence"], ), ( - "eval-harness", # V1.27.0 + "eval-harness", # ["project_name","run_id","eval_target","model_version","baseline_version","safety_checks","outputs","risks","confidence"], ), ( - "automotive-test", # V1.31.0 + "automotive-test", # ["project_name","run_id","vehicle_subsystem","asil_level","phases","sub_skills","outputs","risks","confidence"], ), ( - "automotive-can-bus-test", # V1.31.0 + "automotive-can-bus-test", # ["project_name","run_id","protocols","checks","outputs","risks","confidence"], ), ( - "automotive-adas-scenario", # V1.31.0 + "automotive-adas-scenario", # ["project_name","run_id","categories","odd_levels","simulation","outputs","risks","confidence"], ), ( - "automotive-ota-update-test", # V1.31.0 + "automotive-ota-update-test", # ["project_name","run_id","checks","compliance","outputs","risks","confidence"], ), ( - "automotive-hil-loop-test", # V1.31.0 + "automotive-hil-loop-test", # ["project_name","run_id","loops","asil_required","fault_injection","platform","outputs","risks","confidence"], ), ( - "agent-introspection-debugging", # V1.x + "agent-introspection-debugging", # ["project_name","run_id","target_run_id","dimensions","findings","recommendations","outputs","confidence"], ), ( - "build-your-own-x-explorer", # V1.x + "build-your-own-x-explorer", # ["project_name","run_id","user_scenario","detected_concepts","recommendations","warnings","outputs","confidence"], ), ] diff --git a/runtime/tutor/INDEX.md b/runtime/tutor/INDEX.md index 21537502..689bd4ca 100644 --- a/runtime/tutor/INDEX.md +++ b/runtime/tutor/INDEX.md @@ -1,4 +1,4 @@ -# tutor 索引(主宪章 §23 教学层) +# tutor 索引 ## 文件清单 diff --git a/runtime/tutor/__init__.py b/runtime/tutor/__init__.py index 44174a97..3cad7f3e 100644 --- a/runtime/tutor/__init__.py +++ b/runtime/tutor/__init__.py @@ -1,4 +1,4 @@ -"""Tutor 教学层 · 主宪章 §23. +"""Tutor 教学层 执行 / 学习 双模式 · 反幻觉 3 层 · 双语切换. """ diff --git a/runtime/tutor/eval_replay.py b/runtime/tutor/eval_replay.py index e339ccc0..80d81ecf 100644 --- a/runtime/tutor/eval_replay.py +++ b/runtime/tutor/eval_replay.py @@ -1,4 +1,4 @@ -"""Eval capture + replay · gbrain §1.6 派生. +"""Eval capture + replay · gbrain 派生. opt-in via TAGENT_EVAL_CAPTURE=1. PII-scrubbed routing queries land in `workspace/learning/eval_candidates.jsonl`. Replay computes 3 metrics: @@ -30,7 +30,7 @@ def _capture_path() -> Path: return d / "eval_candidates.jsonl" -# PII scrub — single source of truth (gbrain §1.9) +# PII scrub — single source of truth (gbrain ) PII_PATTERNS = [ (re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"), ""), (re.compile(r"\b1[3-9]\d{9}\b"), ""), diff --git a/runtime/tutor/explainer.py b/runtime/tutor/explainer.py index c5a5f19d..ddf17709 100644 --- a/runtime/tutor/explainer.py +++ b/runtime/tutor/explainer.py @@ -1,10 +1,10 @@ -"""Explainer · 主宪章 §23 反幻觉 L2 自检. +"""Explainer Decorates DAG nodes / tool calls with教学注释: exec mode → one_liner only (≤30 字) learn mode → why + theory_ref + alternatives + reading -Charter §23 L2 self-check: verify_refs() re-asks LLM to confirm cited card ids +Charter L2 self-check: verify_refs() re-asks LLM to confirm cited card ids are real KB entries; non-existent → strip + downgrade confidence. """ @@ -61,7 +61,7 @@ def render(self, lang: str | None = None) -> str: def filter_refs(refs: list[str]) -> tuple[list[str], list[str]]: - """Charter §23 L1: split into (in_kb, not_in_kb).""" + """Charter L1: split into (in_kb, not_in_kb).""" kb = get_kb() in_kb, not_in_kb = [], [] for r in refs: diff --git a/runtime/tutor/feedback.py b/runtime/tutor/feedback.py index 67b97ae4..fe09d39c 100644 --- a/runtime/tutor/feedback.py +++ b/runtime/tutor/feedback.py @@ -1,4 +1,4 @@ -"""User feedback · 主宪章 §23 反幻觉 L3. +"""User feedback Users flag wrong explanations → workspace/learning/feedback/{date}.jsonl curator periodically reviews & downgrades card confidence. diff --git a/runtime/tutor/graph.py b/runtime/tutor/graph.py index 31e62b6e..9583f65a 100644 --- a/runtime/tutor/graph.py +++ b/runtime/tutor/graph.py @@ -1,4 +1,4 @@ -"""KB self-wiring graph · gbrain §1.2 派生. +"""KB self-wiring graph · gbrain 派生. 零 LLM 调用:从卡片 frontmatter 的 `related_to` + `superseded_by` + body 内的 `[[wikilink]]` 抽取 typed link,建反向索引。 diff --git a/runtime/tutor/i18n.py b/runtime/tutor/i18n.py index ec665037..c4966ce3 100644 --- a/runtime/tutor/i18n.py +++ b/runtime/tutor/i18n.py @@ -1,4 +1,4 @@ -"""Language switching · 主宪章 §23. +"""Language switching zh / en / zh-en(double-column comparison) """ diff --git a/runtime/tutor/theory_kb.py b/runtime/tutor/theory_kb.py index 75dd3823..ef38a559 100644 --- a/runtime/tutor/theory_kb.py +++ b/runtime/tutor/theory_kb.py @@ -1,4 +1,4 @@ -"""Theory KB loader · 主宪章 §23 反幻觉 L1. +"""Theory KB loader Scans `docs/theory/**/*.{zh,en}.md`, parses frontmatter, exposes lookup API. LLM in learn mode 只能引用 KB 中存在的 id;非 KB 输出"该领域未收录". diff --git a/runtime/tutor/verbosity.py b/runtime/tutor/verbosity.py index 4e6cf669..8d22afe5 100644 --- a/runtime/tutor/verbosity.py +++ b/runtime/tutor/verbosity.py @@ -1,4 +1,4 @@ -"""Verbosity mode · 主宪章 §23. +"""Verbosity mode exec = 默认,每节点仅 one_liner(≤30 字);可 --silent 关 learn = 全套教学:why + theory_ref + alternatives + reading + L3 反馈 diff --git a/runtime/web/INDEX.md b/runtime/web/INDEX.md index 0f8bb4ef..45873f19 100644 --- a/runtime/web/INDEX.md +++ b/runtime/web/INDEX.md @@ -1,7 +1,7 @@ -# runtime/web 索引 (V1.0.0) +# runtime/web 索引 > Web UI for `runtime/api`. 4 页:Upload / Run Status / Report / Catalog。 -> 被测项级别 §21 **L2**(用户可见,必含功能+边界+异常+兼容+可访问性测试)。 +> 被测项级别 **L2**(用户可见,必含功能+边界+异常+兼容+可访问性测试)。 ## 技术栈 @@ -49,7 +49,7 @@ npm run test:a11y # axe-core 可访问性扫 | Catalog | `GET /catalog` | | Health | `GET /health` | -## §21 必测项(L2 级) +## 必测项(L2 级) - 功能正常路径:上传→看 DAG→看报告 - 边界:超大文件/空文本/超长 run_id diff --git a/scripts/INDEX.md b/scripts/INDEX.md index d2694443..9c901b03 100644 --- a/scripts/INDEX.md +++ b/scripts/INDEX.md @@ -1,4 +1,4 @@ -# scripts/ 索引(V1.10.0) +# scripts/ 索引 > 运维 / 分析 / 数据导出脚本 · 不属于运行时 · 不进入 tagent CLI。 @@ -24,4 +24,4 @@ ## 相关 - 上一级:[`../README.md`](../README.md) -- 主宪章 §0(安全:真实数据隔离)+ §19-12(决策可追溯) +- (安全:真实数据隔离)+ (决策可追溯) diff --git a/scripts/analyze-usage.py b/scripts/analyze-usage.py index 18c3fcc3..b4a8bcb1 100644 --- a/scripts/analyze-usage.py +++ b/scripts/analyze-usage.py @@ -40,12 +40,12 @@ def analyze_users(users: list[dict]) -> dict: def cut_decision(skill_usage: list[dict]) -> dict: - """W3 砍/留 决策(主宪章 §21 + §27 简洁优先). + """W3 砍/留 决策. 重度(≥10% 用户): keep + 文档加强 中度(3-10%): keep + 不主推 长尾(<3%): deprecated 月观察 - 0%: archive(主宪章 §22 归档不删) + 0%: archive """ keep_core: list[str] = [] keep_mid: list[str] = [] @@ -78,7 +78,7 @@ def render_md(user_stats: dict, cuts: dict, output: Path) -> None: f"- 行业 top:{user_stats['industries']}", f"- 团队规模:{user_stats['team_sizes']}", f"- 来源渠道:{user_stats['channels']}", - f"\n## Skill 决策(主宪章 §21 §27)\n", + f"\n## Skill 决策\n", ] for k, v in cuts.items(): lines.append(f"\n### {k}({len(v)} 项)") @@ -88,7 +88,7 @@ def render_md(user_stats: dict, cuts: dict, output: Path) -> None: "1. **keep_core**:文档加强 + demo gif + 教学视频\n" "2. **keep_mid**:不主推,留\n" "3. **deprecate**:30 天观察期;再无人用 → 转 archive\n" - "4. **archive**:`marketplace/.archive/` 归档(主宪章 §22 不可逆禁止)\n" + "4. **archive**:`marketplace/.archive/` 归档\n" ) output.write_text("\n".join(lines), encoding="utf-8") diff --git a/utils/README.md b/utils/README.md index 8a4d0ba5..951b6af9 100644 --- a/utils/README.md +++ b/utils/README.md @@ -20,7 +20,7 @@ | `jmeter_csv_exporter.py` | JMeter 参数化 CSV 生成 | `generate_jmeter_dataset(count, output)` | | `jmeter_result_parser.py` | JTL 解析 + 性能门禁 + 基线对比 | `parse_jtl(jtl)` / `check_performance_gates` / `compare_with_baseline` | | `regression_scope.py` | git diff 影响范围分析(YAML 配置) | `analyze_change_impact(base_branch)` | -| `zentao_bug_manager.py` | BugTracker 默认 adapter:禅道 SDK + token 续期(其他 adapter 同契约 BugTrackerBase,主宪章 §12) | `ZentaoBugManager.create_bug` / `batch_submit_from_failures` | +| `zentao_bug_manager.py` | BugTracker 默认 adapter:禅道 SDK + token 续期 | `ZentaoBugManager.create_bug` / `batch_submit_from_failures` | | `ci_quality_gate.py` | CI 门禁统一(junit + cov) | `parse_junit` / `check_smoke` / `check_regression` / `check_coverage` | | `quality_gate_engine.py` | YAML 驱动门禁引擎(替代硬编码阈值) | `QualityGateEngine` / `check_smoke/regression/coverage/performance/release` | | `bug_tracker_base.py` | BugTracker 抽象基类 + 工厂模式(5 适配器注册) | `BugTrackerBase` / `create_bug_manager` / `TRACKER_REGISTRY` | diff --git a/utils/a11y_i18n/a11y_scanner.py b/utils/a11y_i18n/a11y_scanner.py index c5d78ca0..ec9f0b83 100644 --- a/utils/a11y_i18n/a11y_scanner.py +++ b/utils/a11y_i18n/a11y_scanner.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use a11y_scanner_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use a11y_scanner_v2 instead. This file will be removed in """ 无障碍 / Accessibility 测试(WCAG 2.1) 被引用方:UX / 易用性 / 合规 diff --git a/utils/data/data_factory.py b/utils/data/data_factory.py index 3006d8ce..ff640039 100644 --- a/utils/data/data_factory.py +++ b/utils/data/data_factory.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use data_factory_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use data_factory_v2 instead. This file will be removed in """ 测试数据工厂 - Faker + Factory Boy 生成标准化测试数据 被引用方:05-数据准备 agent / data-preparation skill / conftest.py diff --git a/utils/data/db_test_helper.py b/utils/data/db_test_helper.py index e885ecba..ff090a01 100644 --- a/utils/data/db_test_helper.py +++ b/utils/data/db_test_helper.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use db_test_helper_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use db_test_helper_v2 instead. This file will be removed in """ 数据库测试工具:CRUD / 事务 ACID / 迁移 / 备份恢复 / 慢查询 / 死锁 被引用方:05-数据准备 + 安全/可靠性测试 diff --git a/utils/design/suite_minimizer.py b/utils/design/suite_minimizer.py index c401bc69..cfe31d5b 100644 --- a/utils/design/suite_minimizer.py +++ b/utils/design/suite_minimizer.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use suite_minimizer_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use suite_minimizer_v2 instead. This file will be removed in """ 测试套件减重(Suite Minimization)- 检测重复用例 / 冗余覆盖 被引用方:testcase-designer / 测试质量 diff --git a/utils/performance/chaos_helper.py b/utils/performance/chaos_helper.py index f8619321..4217cafb 100644 --- a/utils/performance/chaos_helper.py +++ b/utils/performance/chaos_helper.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use chaos_helper_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use chaos_helper_v2 instead. This file will be removed in """ 混沌工程:故障注入(CPU/内存/磁盘/网络/进程杀死) 被引用方:16-可靠性稳定性 agent / chaos-test skill diff --git a/utils/platforms/mobile_driver.py b/utils/platforms/mobile_driver.py index 390ea1a9..989296ae 100644 --- a/utils/platforms/mobile_driver.py +++ b/utils/platforms/mobile_driver.py @@ -168,7 +168,7 @@ def _parse_gfxinfo_fps(output: str) -> Optional[float]: """ 粗略统计 gfxinfo framestats 帧数(非精确 FPS)。 PROFILEDATA 段下每行 CSV 是一帧;真精确 FPS 需 timestamp 列差。 - TODO(V2.x): 解析 timestamp 列,计算 (frame_count - 1) / (timestamp[-1] - timestamp[0]) 真 FPS + TODO: 解析 timestamp 列,计算 (frame_count - 1) / (timestamp[-1] - timestamp[0]) 真 FPS """ frame_count = 0 in_data = False diff --git a/utils/reporting/generate_report.py b/utils/reporting/generate_report.py index 632c0776..b66eb714 100644 --- a/utils/reporting/generate_report.py +++ b/utils/reporting/generate_report.py @@ -68,13 +68,13 @@ def _write_docx_degraded_warning(doc, data: Dict) -> None: warning_p = doc.add_paragraph() warning_run = warning_p.add_run( f"本次报告基于不完整测试数据生成。共 {len(degraded_upstream)} 个 expert " - f"输出 degraded(mock 兜底 / LLM 失败 / 未实装 V1.x rollout):" + f"输出 degraded(mock 兜底 / LLM 失败 / 未实装 rollout):" ) warning_run.font.color.rgb = RGBColor(255, 140, 0) warning_run.bold = True for name in degraded_upstream: item = doc.add_paragraph(style="List Bullet") - item.add_run(f"expert '{name}' — 详见 ROADMAP.md V1.x rollout 节奏") + item.add_run(f"expert '{name}' — 详见 ROADMAP.md rollout 节奏") impact_p = doc.add_paragraph() impact_run = impact_p.add_run("→ 上线决策建议: conditional 或 no-go(由 test-lead 判定);不应基于此报告直接发版。") impact_run.bold = True diff --git a/utils/security/ai_adversarial.py b/utils/security/ai_adversarial.py index c406dc3d..8378a08f 100644 --- a/utils/security/ai_adversarial.py +++ b/utils/security/ai_adversarial.py @@ -263,7 +263,7 @@ def test_llm_jailbreak(endpoint: str, - 默认 prompts=JAILBREAK_PROMPTS (DAN / system 泄漏 / 指令覆盖) 风险 HIGH, 需 confirm_offensive=True 显式确认(kwarg opt-in 防误调)。 - 拒绝检测 (替代旧 binary keyword 判定, 见协作章程 §1.3 f5/f6): + 拒绝检测 (替代旧 binary keyword 判定, 见协作章程 f5/f6): - 每条输出经 _compute_refusal_confidence 算 0.0-1.0 置信度 - min_confidence (默认 0.1, 范围 0.0-1.0) 为阈值, 高于阈值算"拒绝" - 阈值越高假阳性越少 (但漏报更多), 调用方自由权衡 diff --git a/utils/security/api_security_scanner.py b/utils/security/api_security_scanner.py index 0abed394..53fab3d9 100644 --- a/utils/security/api_security_scanner.py +++ b/utils/security/api_security_scanner.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use api_security_scanner_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use api_security_scanner_v2 instead. This file will be removed in """ API 安全测试 - OWASP API Top 10 覆盖 - API1: BOLA(越权访问对象) diff --git a/utils/testing/bdd_runner.py b/utils/testing/bdd_runner.py index a40da0fc..c9c4e646 100644 --- a/utils/testing/bdd_runner.py +++ b/utils/testing/bdd_runner.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use bdd_runner_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use bdd_runner_v2 instead. This file will be removed in """ BDD(Behavior-Driven Development)/ 验收测试 - pytest-bdd 包装 被引用方:03-用例设计 + 06-自动化脚本(验收测试场景) diff --git a/utils/testing/state_machine_tester.py b/utils/testing/state_machine_tester.py index 2c9b9ab9..d19ef58e 100644 --- a/utils/testing/state_machine_tester.py +++ b/utils/testing/state_machine_tester.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# DEPRECATED: use state_machine_tester_v2 instead. This file will be removed in V1.2. +# DEPRECATED: use state_machine_tester_v2 instead. This file will be removed in """ 状态迁移测试(State Transition Testing) 被引用方:03-用例设计 agent / testcase-design skill diff --git a/workspace/_demo/STARTUP.md b/workspace/_demo/STARTUP.md index 39185870..1fdb538b 100644 --- a/workspace/_demo/STARTUP.md +++ b/workspace/_demo/STARTUP.md @@ -57,7 +57,7 @@ tagent run "https://example.com" --mode learn |------|------| | `LLM 调用 raise` | 检查 API key + 网络;切 `TAGENT_LLM_PROVIDER=ollama` 离线兜底 | | `BugTracker 提交失败` | 占位没填或网络 / 权限错;不阻塞,但日报会少 | -| `通知没发出` | 至少配 1 个渠道(主宪章 §36);未配自动跳过 | +| `通知没发出` | 至少配 1 个渠道;未配自动跳过 | | `selftest n7 失败` | 装 python-docx:`pip install python-docx` | ## 7. 下一步 diff --git a/workspace/_demo/tagent.yml b/workspace/_demo/tagent.yml index d30dde7b..7c6cc84c 100644 --- a/workspace/_demo/tagent.yml +++ b/workspace/_demo/tagent.yml @@ -21,7 +21,7 @@ skills: bug_tracker: primary: webhook - # 多 tracker 并存(主宪章 §37):写成 [zentao, github],按 Bug label 路由 + # 多 tracker 并存:写成 [zentao, github],按 Bug label 路由 # extra: [github] notifiers: @@ -34,15 +34,15 @@ quality_gates: perf_p99_ms_max: 300 selftest: - # 主宪章 §33 自检铁律 + # 自检规则 pre_tag_required: true pass_threshold: 0.80 strict_on_release: true marketplace: - enabled: false # 默认关 · 主宪章 §30 safe-by-default + enabled: false # 默认关 -# ============== SAFETY GATES · safe-by-default(主宪章 §22 / §35 + W5 sprint v2) ============== +# ============== SAFETY GATES · safe-by-default ============== # 危险操作 / 自动化 / 影响生产 的功能 必须显式开启, 否则 destructive-guard 拒绝运行。 # 详见 SECURITY.md 武器化代码使用边界 + 测试工具准入控制 节。 @@ -68,7 +68,7 @@ gateway: # 例: enabled_platforms: [telegram, feishu] pentest: - # 法律契约(default refuse · charter §35); rollout 阶段 yml gate 占位, V1.x 激活后接入真实路由 + # 法律契约(default refuse · charter ); rollout 阶段 yml gate 占位, 激活后接入真实路由 authorized: false # 法律授权确认 scope_in_targets: [] # IP/domain/URL 白名单(IN) scope_out_targets: [] # 强制黑名单(覆盖 IN)