diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..8560e8a6 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,187 @@ +name: Python CI + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +on: + push: + branches: [main] + paths-ignore: + - "docs/**" + - "*.md" + pull_request: + branches: [main] + paths-ignore: + - "docs/**" + - "*.md" + workflow_dispatch: + +jobs: + lint-and-type-check: + name: Lint & Type Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all tags and branches + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + + - name: Install the project + run: uv sync --locked --all-extras --dev + + - name: Install tau2 for testing + run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main + + - name: Lint with flake8 + run: uv run flake8 eval_protocol tests examples scripts --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics + + - name: Type check with mypy + run: uv run mypy eval_protocol + + test-core: + name: Core Tests (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + needs: lint-and-type-check + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all tags and branches + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + + - name: Install the project + run: uv sync --locked --all-extras --dev + + - name: Install tau2 for testing + run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main + + - name: Run Core Tests with pytest-xdist + env: + E2B_API_KEY: ${{ secrets.E2B_API_KEY }} + FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} + FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} + PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning" + run: | + # Run most tests in parallel, but explicitly ignore tests that manage their own servers + uv run pytest \ + -n auto \ + --ignore=tests/test_batch_evaluation.py \ + --cov=eval_protocol --cov-append --cov-report=xml --cov-report=term-missing -v --durations=10 + + - name: Store coverage file + uses: actions/upload-artifact@v4 + with: + name: coverage-core-${{ matrix.python-version }} + path: coverage.xml + retention-days: 1 + + test-batch-evaluation: + name: Batch Evaluation Tests + runs-on: ubuntu-latest + needs: lint-and-type-check + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all tags and branches + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + + - name: Install the project + run: uv sync --locked --all-extras --dev + + - name: Install tau2 for testing + run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main + + - name: Run Batch Evaluation Tests + env: + E2B_API_KEY: ${{ secrets.E2B_API_KEY }} + FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} + FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} + PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning" + run: | + # Run only this specific test file, WITHOUT xdist + uv run pytest tests/test_batch_evaluation.py --cov=eval_protocol --cov-append --cov-report=xml -v --durations=10 + - name: Store coverage file + uses: actions/upload-artifact@v4 + with: + name: coverage-batch-eval + path: coverage.xml + retention-days: 1 + + test-mcp-e2e: + name: MCP End-to-End Tests + runs-on: ubuntu-latest + needs: lint-and-type-check + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all tags and branches + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + + - name: Install the project + run: uv sync --locked --all-extras --dev + + - name: Install tau2 for testing + run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main + + - name: Store coverage file + uses: actions/upload-artifact@v4 + with: + name: coverage-mcp-e2e + path: coverage.xml + retention-days: 1 + + upload-coverage: + name: Upload Coverage + runs-on: ubuntu-latest + needs: [test-core, test-batch-evaluation, test-mcp-e2e] + steps: + - name: Download all coverage artifacts + uses: actions/download-artifact@v4 + with: + path: coverage-artifacts + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + directory: ./coverage-artifacts/ + fail_ci_if_error: false + verbose: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..87742f12 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,70 @@ +name: Python Package Release + +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' # Trigger on version tags like v1.2.3, v1.2.3-alpha + +jobs: + build-and-publish: + runs-on: ubuntu-latest + permissions: + contents: write # Needed to create GitHub releases + id-token: write # Needed for PyPI trusted publishing + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + run: python -m build + + - name: Create GitHub Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref_name }} + release_name: Release ${{ github.ref_name }} + body: | + Changes in this release: + - TODO: Add release notes here or link to CHANGELOG.md + draft: false + prerelease: ${{ contains(github.ref_name, '-') }} # Mark as prerelease if tag contains '-' (e.g., v1.0.0-alpha) + + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + # with: + # user: __token__ + # password: ${{ secrets.PYPI_API_TOKEN }} # Requires a PYPI_API_TOKEN secret in repository + + # If using trusted publishing (recommended), the above `with` block for user/password is not needed. + # Ensure PyPI project settings are configured for trusted publishing from this GitHub repository and workflow. + + - name: Upload release assets (package files) to GitHub Release + uses: softprops/action-gh-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + files: ./dist/* + + # Consider adding another asset upload for the .whl file if desired + # - name: Upload Wheel to GitHub Release + # uses: actions/upload-release-asset@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # with: + # upload_url: ${{ steps.create_release.outputs.upload_url }} + # asset_path: ./dist/*.whl + # asset_name: ${{ github.event.repository.name }}-${{ github.ref_name }}.whl + # asset_content_type: application/wheel