Add AGENTS.md (#374)

ultmaster · web-flow · commit feebaec24ce1 · 2025-12-06T13:16:26.000+08:00
diff --git a/.github/workflows/badge-examples.yml b/.github/workflows/badge-examples.yml
@@ -10,6 +10,7 @@ on:
       - Examples - Tinker
       - Examples - Azure
       - Examples - Claude Code
+      - Examples - RAG
     types: [completed]
 
   workflow_dispatch:
@@ -37,5 +38,6 @@ jobs:
               { workflow: 'examples-tinker.yml', label: 'examples-tinker.stable', variants: ['stable'] },
               { workflow: 'examples-azure.yml', label: 'examples-azure.stable', variants: ['stable'] },
               { workflow: 'examples-claude-code.yml', label: 'examples-claude-code.stable', variants: ['stable'] },
+              { workflow: 'examples-rag.yml', label: 'examples-rag.stable', variants: ['stable'] },
             ];
             await badgeAggregation({ github, context, core, dependencies });
diff --git a/.github/workflows/badge-latest.yml b/.github/workflows/badge-latest.yml
@@ -7,6 +7,8 @@ on:
       - Examples - Spider
       - Examples - APO
       - Examples - Unsloth
+      - Examples - RAG
+      - Examples - Claude Code
       - GPU Test
     types: [completed]
 
@@ -32,6 +34,8 @@ jobs:
               { workflow: 'examples-spider.yml', label: 'spider.latest', variants: ['latest'] },
               { workflow: 'examples-apo.yml', label: 'apo.latest', variants: ['latest'] },
               { workflow: 'examples-unsloth.yml', label: 'unsloth.latest', variants: ['latest'] },
+              { workflow: 'examples-claude-code.yml', label: 'claude-code.latest', variants: ['latest'] },
+              { workflow: 'examples-rag.yml', label: 'rag.latest', variants: ['latest'] },
               { workflow: 'tests-full.yml', label: 'tests-full.latest', variants: ['latest'] },
             ];
             await badgeAggregation({ github, context, core, dependencies });
diff --git a/.github/workflows/badge-rag.yml b/.github/workflows/badge-rag.yml
@@ -0,0 +1,29 @@
+name: Badge - RAG
+
+on:
+  workflow_run:
+    workflows:
+      - Examples - RAG
+    types: [completed]
+
+  workflow_dispatch:
+
+permissions:
+  actions: read
+  contents: read
+
+jobs:
+  badge:
+    if: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'workflow_run' && github.event.workflow_run.head_branch == 'main') }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/github-script@v8
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const badgeAggregation = require('./scripts/badge_aggregation.js');
+            const dependencies = [
+              { workflow: 'examples-rag.yml', label: 'rag', variants: ['legacy', 'stable'] },
+            ];
+            await badgeAggregation({ github, context, core, dependencies });
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,38 @@
+# Repository Guidelines
+
+## Architecture Overview
+Agent Lightning runs through a continuous loop: runners and tracers emit spans, `LightningStore` (`agentlightning/store/`) keeps them synchronized, and algorithms in `agentlightning/algorithm/` consume those traces to improve behavior.
+
+## Project Structure & Module Organization
+- `agentlightning/`: adapters, execution stack, training loop, tracer, reward logic, and the `agl` CLI.
+- `docs/` & `examples/`: narrative and procedural docs (assets in `docs/assets/`, navigation in `mkdocs.yml`) plus runnable workflows whose READMEs point to their companion how-to guides. `docs/how-to` covers task-focused instructions, while `docs/tutorials` explains concepts and subsystems.
+- `dashboard/`, `scripts/`, `tests/`: UI bundles, release/dataset/CI automation, and mirrored coverage of the runtime tree. Record download steps rather than committing binaries.
+
+## Build, Test, and Development Commands
+- `uv sync --group dev` — provision tooling once per environment.
+- `uv run --no-sync pytest -v` — execute the full suite; add a path or `-k expr` to narrow the run.
+- `uv run --no-sync pyright` — enforce static typing parity with CI.
+- `uv run --no-sync pre-commit run --all-files --show-diff-on-failure` and `uv run --no-sync mkdocs build --strict` — keep formatting tidy and documentation valid.
+Always commit the refreshed `uv.lock` when dependencies shift, and mention optional groups (VERL, APO, GPU) in PR notes.
+
+## Coding Style & Naming Conventions
+- Target `requires-python >= 3.10`, four-space indentation, 120-character lines (though docstrings may run longer), and formatter-owned diffs (Black + isort, `black` profile). Use `snake_case` for modules, functions, and variables; `PascalCase` for classes and React components; lowercase hyphenation for CLI flags, branch names, and TypeScript filenames.
+- Maintain exhaustive type hints (pyright enforces them) and prefer shared dataclasses or Pydantic models from `agentlightning.types`.
+- Author Google-style docstrings for new modules or public methods—succinct descriptions, no redundant type info, no redundant `Key features/components` bullet points, and `[][]` syntax for cross-references.
+- Writing logs is encouraged, especially for long functions with multiple steps and try-except blocks that catch all exceptions. Use `logging.getLogger(__name__)` to get loggers. Distinguish between DEBUG, INFO, WARNING, and ERROR logs.
+
+## Testing Guidelines
+- Mirror runtime directories under `tests/` and match filenames for quick traceability.
+- Parametrize pytest cases and apply markers (`openai`, `gpu`, `agentops`, `mongo`, `llmproxy`) so optional suites can be skipped via selectors like `-m "not mongo"` yet still exercised in CI.
+- Lean on fixtures, favor real stores/spans/agents over mocks, and drive coverage across the majority of branches.
+- If an imported module is missing from the environment, check whether `uv sync` has been run with the right groups. Do not make stubs for external dependencies unless necessary.
+
+## Example Contributions
+- Ship each example with a README that includes smoke-test instructions so maintainers can validate quickly. The README must contain an "Included Files" section summarizing every file and its role.
+- Keep runnable example modules self-contained with a module-level docstring describing CLI usage. Document important or educational classes/functions with targeted docstrings and inline comments where clarity matters.
+- Add a CI workflow per example named `examples-<name>.yml` in `.github/workflows/`. Register it in `badge-<name>.yml`, `badge-examples.yml`, and `badge-latest.yml` when applicable so badges stay accurate.
+
+## Commit & Pull Request Guidelines
+- Branch from a fresh `main` using `feature/<slug>`, `fix/<slug>`, `docs/<slug>`, or `chore/<slug>`.
+- Write imperative, scoped commits, reference issues with `Fixes #123`, and rerun pre-commit plus the relevant pytest/doc builds before pushing.
+- Use PR descriptions to summarize intent, list verification commands, call out dependency or docs-navigation updates, and link new docs/examples via `mkdocs.yml` or `examples/README.md`. Include logs for dashboard changes.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
diff --git a/docs/community/contributing.md b/docs/community/contributing.md
@@ -46,6 +46,7 @@ Bonus points for examples that:
 - Ship CI or self-test coverage so we know they still work as the core evolves.  **Otherwise, we would have to mark the example as unmaintained because we won't be able to test the examples manually before each release.**
 - Include a [`docs/how-to/`]({{ src("docs/how-to/") }}) guide (or a detailed README if no how-to exists) without duplicating content in multiple places.
 - Favor simple, dependency-light code over heavy abstractions.
+- Ship a README that documents smoke-test instructions and includes an "Included Files" section summarizing every file and its role; keep the runnable module self-contained with a module-level docstring explaining CLI usage, plus targeted docstrings or inline comments for educational functions/classes.
 
 !!! warning "Please discuss first"
 
@@ -126,13 +127,13 @@ After `uv sync`, run commands via `uv run ...` (add `--no-sync` once the environ
 Formatting and linting are enforced through [pre-commit](https://pre-commit.com/). Install once, then run before each push:
 
 ```bash
-uv run pre-commit install
-uv run pre-commit run --all-files --show-diff-on-failure --color=always
+uv run --no-sync pre-commit install
+uv run --no-sync pre-commit run --all-files --show-diff-on-failure --color=always
 ```
 
 Once installed, the hooks run automatically on every `git commit`. Running the pre-commit hooks locally keeps CI green and diffs manageable.
 
-### 3. Branch From a Fresh `main`
+### 3. Branch from Fresh `main` and Code
 
 Start all work from the latest upstream state:
 
@@ -165,28 +166,36 @@ Use lowercase with hyphens, e.g., `feature/async-runner-hooks`.
 
     Remember to register new docs in [`mkdocs.yml`]({{ src("mkdocs.yml") }}), add examples to [examples/README]({{ src("examples/README.md") }}), and update the [Examples Catalog](../how-to/examples-catalog.md).
 
+Before you start coding, bring the shared coding conventions with you:
+
+- Target `requires-python >= 3.10`, four-space indentation, ~120-character lines (docstrings may run longer), and formatter-owned diffs (Black + isort with the `black` profile).
+- Use `snake_case` for modules, functions, and variables; `PascalCase` for classes and React components; lowercase hyphenation for CLI flags, branch names, and TypeScript filenames.
+- Maintain exhaustive type hints (pyright enforces them), write succinct Google-style docstrings (with `[][]` cross-references).
+- Prefer dataclasses or Pydantic models from `agentlightning.types`.
+- Log via `logging.getLogger(__name__)` with targeted DEBUG/INFO/WARNING/ERROR calls—especially for long multi-step functions or broad `try/except` blocks.
+
 ### 4. Test and Validate
 
-Most contributions require automated checks. Prefix commands with `uv run` so they use the project environment.
+Most contributions require automated checks. Once `uv sync` locks dependencies, prefix commands with `uv run --no-sync ...` so they share the same environment as CI.
 
 **Full test suite**
 
 ```bash
-uv run pytest -v
+uv run --no-sync pytest -v
 ```
 
 **Targeted tests**
 
 ```bash
-uv run pytest tests/path/to/test_file.py -k test_name
+uv run --no-sync pytest tests/path/to/test_file.py -k test_name
 ```
 
 **Optional/gated tests:** GPU-specific suites or API-dependent tests run automatically when the required hardware or environment variables (such as `OPENAI_API_KEY`) are present.
 
 **Static analysis:**
 
 ```bash
-uv run pyright
+uv run --no-sync pyright
 ```
 
 If you have touched code under `examples/`, you should run the example-specific smoke tests. Each directory includes a README with example-specific smoke tests—run those too.
@@ -196,16 +205,16 @@ If you have touched code under `examples/`, you should run the example-specific
     Keep API references under [docs/reference]({{ src("docs/reference/") }}) up to date. Doc-only changes should still build cleanly:
 
     ```bash
-    uv run mkdocs serve --strict   # live reload
-    uv run mkdocs build --strict   # CI-equivalent
+    uv run --no-sync mkdocs serve --strict   # live reload
+    uv run --no-sync mkdocs build --strict   # CI-equivalent
     ```
 
     `--strict` elevates warnings to errors so you catch issues before CI.
 
 Before opening a PR, double-check the basics:
 
 - Run `uv lock` if you changed dependencies.
-- Run `uv run pre-commit run --all-files` (hooks installed via `pre-commit install` run automatically on `git commit`, but rerun them if you amended history).
+- Run `uv run --no-sync pre-commit run --all-files --show-diff-on-failure` (hooks installed via `pre-commit install` run automatically on `git commit`, but rerun them if you amended history).
 - Execute the relevant commands from the test list above.
 - Validate each affected example via its README instructions.
 
diff --git a/examples/README.md b/examples/README.md
@@ -9,8 +9,8 @@ This catalog highlights the examples shipped with Agent-lightning.
 | [calc_x](./calc_x) | VERL-powered math reasoning agent training that uses AutoGen with an MCP calculator tool. | [![calc_x workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-calc-x.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-calc-x.yml) |
 | [claude_code](./claude_code) | Claude Code SWE-bench harness that records Agent-lightning traces across Anthropic, vLLM, and OpenAI-compatible backends. | [![claude_code workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-claude-code.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-claude-code.yml) |
 | [minimal](./minimal) | Bite-sized programs that demonstrate how individual Agent-lightning building blocks behave in isolation. | [![minimal workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-unit.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/badge-unit.yml) |
-| [rag](./rag) | Retrieval-Augmented Generation pipeline targeting the MuSiQue dataset with Wikipedia retrieval. | **Unmaintained** — last verified with Agent-lightning v0.1.1 |
-| [search_r1](./search_r1) | Framework-free Search-R1 reinforcement learning training workflow with a retrieval backend. | **Unmaintained** — last verified with Agent-lightning v0.1.2 |
+| [rag](./rag) | Retrieval-Augmented Generation pipeline targeting the MuSiQue dataset with Wikipedia retrieval. | [![rag workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-rag.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-rag.yml) |
+| [search_r1](./search_r1) | Framework-free Search-R1 reinforcement learning training workflow with a retrieval backend. | **Last verified with Agent-lightning v0.1.2** |
 | [spider](./spider) | Text-to-SQL reinforcement learning training on the Spider dataset using LangGraph. | [![spider workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-spider.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-spider.yml) |
 | [tinker](./tinker) | Reinforcement learning with Tinker as the backend training service. | [![tinker workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-tinker.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-tinker.yml) |
 | [unsloth](./unsloth) | Supervised fine-tuning example powered by Unsloth with 4-bit quantization and LoRA. | [![unsloth workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-unsloth.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-unsloth.yml) |
diff --git a/examples/rag/README.md b/examples/rag/README.md
@@ -1,5 +1,7 @@
 # RAG Agent Example
 
+[![rag workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/examples-rag.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-rag.yml)
+
 This example demonstrates training a Retrieval-Augmented Generation (RAG) agent using Agent-Lightning with retrieval capabilities. The agent answers multi-hop questions from a tiny MuSiQue dataset by retrieving and reasoning over Wikipedia passages.
 
 ## Overview