Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
name: Python CI

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

on:
push:
branches: [main]
paths-ignore:
- "docs/**"
- "*.md"
pull_request:
branches: [main]
paths-ignore:
- "docs/**"
- "*.md"
workflow_dispatch:

jobs:
lint-and-type-check:
name: Lint & Type Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for all tags and branches

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true

- name: Install the project
run: uv sync --locked --all-extras --dev

- name: Install tau2 for testing
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main

- name: Lint with flake8
run: uv run flake8 eval_protocol tests examples scripts --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics

- name: Type check with mypy
run: uv run mypy eval_protocol

test-core:
name: Core Tests (Python ${{ matrix.python-version }})
runs-on: ubuntu-latest
needs: lint-and-type-check
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for all tags and branches

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true

- name: Install the project
run: uv sync --locked --all-extras --dev

- name: Install tau2 for testing
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main

- name: Run Core Tests with pytest-xdist
env:
E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }}
PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning"
run: |
# Run most tests in parallel, but explicitly ignore tests that manage their own servers
uv run pytest \
-n auto \
--ignore=tests/test_batch_evaluation.py \
--cov=eval_protocol --cov-append --cov-report=xml --cov-report=term-missing -v --durations=10

- name: Store coverage file
uses: actions/upload-artifact@v4
with:
name: coverage-core-${{ matrix.python-version }}
path: coverage.xml
retention-days: 1

test-batch-evaluation:
name: Batch Evaluation Tests
runs-on: ubuntu-latest
needs: lint-and-type-check
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for all tags and branches

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true

- name: Install the project
run: uv sync --locked --all-extras --dev

- name: Install tau2 for testing
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main

- name: Run Batch Evaluation Tests
env:
E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }}
PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning"
run: |
# Run only this specific test file, WITHOUT xdist
uv run pytest tests/test_batch_evaluation.py --cov=eval_protocol --cov-append --cov-report=xml -v --durations=10
- name: Store coverage file
uses: actions/upload-artifact@v4
with:
name: coverage-batch-eval
path: coverage.xml
retention-days: 1

test-mcp-e2e:
name: MCP End-to-End Tests
runs-on: ubuntu-latest
needs: lint-and-type-check
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for all tags and branches
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true

- name: Install the project
run: uv sync --locked --all-extras --dev

- name: Install tau2 for testing
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main

- name: Store coverage file
uses: actions/upload-artifact@v4
with:
name: coverage-mcp-e2e
path: coverage.xml
retention-days: 1

upload-coverage:
name: Upload Coverage
runs-on: ubuntu-latest
needs: [test-core, test-batch-evaluation, test-mcp-e2e]
steps:
- name: Download all coverage artifacts
uses: actions/download-artifact@v4
with:
path: coverage-artifacts
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: ./coverage-artifacts/
fail_ci_if_error: false
verbose: true
70 changes: 70 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
name: Python Package Release

on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+*' # Trigger on version tags like v1.2.3, v1.2.3-alpha

jobs:
build-and-publish:
runs-on: ubuntu-latest
permissions:
contents: write # Needed to create GitHub releases
id-token: write # Needed for PyPI trusted publishing

steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build twine

- name: Build package
run: python -m build

- name: Create GitHub Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ github.ref_name }}
release_name: Release ${{ github.ref_name }}
body: |
Changes in this release:
- TODO: Add release notes here or link to CHANGELOG.md
draft: false
prerelease: ${{ contains(github.ref_name, '-') }} # Mark as prerelease if tag contains '-' (e.g., v1.0.0-alpha)

- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
# with:
# user: __token__
# password: ${{ secrets.PYPI_API_TOKEN }} # Requires a PYPI_API_TOKEN secret in repository

# If using trusted publishing (recommended), the above `with` block for user/password is not needed.
# Ensure PyPI project settings are configured for trusted publishing from this GitHub repository and workflow.

- name: Upload release assets (package files) to GitHub Release
uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
files: ./dist/*

# Consider adding another asset upload for the .whl file if desired
# - name: Upload Wheel to GitHub Release
# uses: actions/upload-release-asset@v1
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# upload_url: ${{ steps.create_release.outputs.upload_url }}
# asset_path: ./dist/*.whl
# asset_name: ${{ github.event.repository.name }}-${{ github.ref_name }}.whl
# asset_content_type: application/wheel