Skip to content

Commit bacce91

Browse files
authored
Merge pull request #2 from eval-protocol/restore-ci-workflows
Restore CI workflow files
2 parents 2a399f4 + 62461f2 commit bacce91

File tree

2 files changed

+257
-0
lines changed

2 files changed

+257
-0
lines changed

.github/workflows/ci.yml

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
name: Python CI
2+
3+
concurrency:
4+
group: ${{ github.workflow }}-${{ github.ref }}
5+
cancel-in-progress: true
6+
7+
on:
8+
push:
9+
branches: [main]
10+
paths-ignore:
11+
- "docs/**"
12+
- "*.md"
13+
pull_request:
14+
branches: [main]
15+
paths-ignore:
16+
- "docs/**"
17+
- "*.md"
18+
workflow_dispatch:
19+
20+
jobs:
21+
lint-and-type-check:
22+
name: Lint & Type Check
23+
runs-on: ubuntu-latest
24+
steps:
25+
- uses: actions/checkout@v4
26+
with:
27+
fetch-depth: 0 # Fetch all history for all tags and branches
28+
29+
- name: Set up Python 3.12
30+
uses: actions/setup-python@v5
31+
with:
32+
python-version: "3.12"
33+
34+
- name: Install uv
35+
uses: astral-sh/setup-uv@v6
36+
with:
37+
enable-cache: true
38+
39+
- name: Install the project
40+
run: uv sync --locked --all-extras --dev
41+
42+
- name: Install tau2 for testing
43+
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main
44+
45+
- name: Lint with flake8
46+
run: uv run flake8 eval_protocol tests examples scripts --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
47+
48+
- name: Type check with mypy
49+
run: uv run mypy eval_protocol
50+
51+
test-core:
52+
name: Core Tests (Python ${{ matrix.python-version }})
53+
runs-on: ubuntu-latest
54+
needs: lint-and-type-check
55+
strategy:
56+
fail-fast: false
57+
matrix:
58+
python-version: ["3.10", "3.11", "3.12"]
59+
60+
steps:
61+
- uses: actions/checkout@v4
62+
with:
63+
fetch-depth: 0 # Fetch all history for all tags and branches
64+
65+
- name: Set up Python ${{ matrix.python-version }}
66+
uses: actions/setup-python@v5
67+
with:
68+
python-version: ${{ matrix.python-version }}
69+
70+
- name: Install uv
71+
uses: astral-sh/setup-uv@v6
72+
with:
73+
enable-cache: true
74+
75+
- name: Install the project
76+
run: uv sync --locked --all-extras --dev
77+
78+
- name: Install tau2 for testing
79+
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main
80+
81+
- name: Run Core Tests with pytest-xdist
82+
env:
83+
E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
84+
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
85+
FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }}
86+
PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning"
87+
run: |
88+
# Run most tests in parallel, but explicitly ignore tests that manage their own servers
89+
uv run pytest \
90+
-n auto \
91+
--ignore=tests/test_batch_evaluation.py \
92+
--cov=eval_protocol --cov-append --cov-report=xml --cov-report=term-missing -v --durations=10
93+
94+
- name: Store coverage file
95+
uses: actions/upload-artifact@v4
96+
with:
97+
name: coverage-core-${{ matrix.python-version }}
98+
path: coverage.xml
99+
retention-days: 1
100+
101+
test-batch-evaluation:
102+
name: Batch Evaluation Tests
103+
runs-on: ubuntu-latest
104+
needs: lint-and-type-check
105+
steps:
106+
- uses: actions/checkout@v4
107+
with:
108+
fetch-depth: 0 # Fetch all history for all tags and branches
109+
110+
- name: Set up Python 3.12
111+
uses: actions/setup-python@v5
112+
with:
113+
python-version: "3.12"
114+
115+
- name: Install uv
116+
uses: astral-sh/setup-uv@v6
117+
with:
118+
enable-cache: true
119+
120+
- name: Install the project
121+
run: uv sync --locked --all-extras --dev
122+
123+
- name: Install tau2 for testing
124+
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main
125+
126+
- name: Run Batch Evaluation Tests
127+
env:
128+
E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
129+
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
130+
FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }}
131+
PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning"
132+
run: |
133+
# Run only this specific test file, WITHOUT xdist
134+
uv run pytest tests/test_batch_evaluation.py --cov=eval_protocol --cov-append --cov-report=xml -v --durations=10
135+
- name: Store coverage file
136+
uses: actions/upload-artifact@v4
137+
with:
138+
name: coverage-batch-eval
139+
path: coverage.xml
140+
retention-days: 1
141+
142+
test-mcp-e2e:
143+
name: MCP End-to-End Tests
144+
runs-on: ubuntu-latest
145+
needs: lint-and-type-check
146+
steps:
147+
- uses: actions/checkout@v4
148+
with:
149+
fetch-depth: 0 # Fetch all history for all tags and branches
150+
- name: Set up Python 3.12
151+
uses: actions/setup-python@v5
152+
with:
153+
python-version: "3.12"
154+
- name: Install uv
155+
uses: astral-sh/setup-uv@v6
156+
with:
157+
enable-cache: true
158+
159+
- name: Install the project
160+
run: uv sync --locked --all-extras --dev
161+
162+
- name: Install tau2 for testing
163+
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main
164+
165+
- name: Store coverage file
166+
uses: actions/upload-artifact@v4
167+
with:
168+
name: coverage-mcp-e2e
169+
path: coverage.xml
170+
retention-days: 1
171+
172+
upload-coverage:
173+
name: Upload Coverage
174+
runs-on: ubuntu-latest
175+
needs: [test-core, test-batch-evaluation, test-mcp-e2e]
176+
steps:
177+
- name: Download all coverage artifacts
178+
uses: actions/download-artifact@v4
179+
with:
180+
path: coverage-artifacts
181+
- name: Upload coverage to Codecov
182+
uses: codecov/codecov-action@v3
183+
with:
184+
token: ${{ secrets.CODECOV_TOKEN }}
185+
directory: ./coverage-artifacts/
186+
fail_ci_if_error: false
187+
verbose: true

.github/workflows/release.yml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
name: Python Package Release
2+
3+
on:
4+
push:
5+
tags:
6+
- 'v[0-9]+.[0-9]+.[0-9]+*' # Trigger on version tags like v1.2.3, v1.2.3-alpha
7+
8+
jobs:
9+
build-and-publish:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: write # Needed to create GitHub releases
13+
id-token: write # Needed for PyPI trusted publishing
14+
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
- name: Set up Python
19+
uses: actions/setup-python@v4
20+
with:
21+
python-version: '3.10'
22+
23+
- name: Install dependencies
24+
run: |
25+
python -m pip install --upgrade pip
26+
pip install build twine
27+
28+
- name: Build package
29+
run: python -m build
30+
31+
- name: Create GitHub Release
32+
id: create_release
33+
uses: actions/create-release@v1
34+
env:
35+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
36+
with:
37+
tag_name: ${{ github.ref_name }}
38+
release_name: Release ${{ github.ref_name }}
39+
body: |
40+
Changes in this release:
41+
- TODO: Add release notes here or link to CHANGELOG.md
42+
draft: false
43+
prerelease: ${{ contains(github.ref_name, '-') }} # Mark as prerelease if tag contains '-' (e.g., v1.0.0-alpha)
44+
45+
- name: Publish package to PyPI
46+
uses: pypa/gh-action-pypi-publish@release/v1
47+
# with:
48+
# user: __token__
49+
# password: ${{ secrets.PYPI_API_TOKEN }} # Requires a PYPI_API_TOKEN secret in repository
50+
51+
# If using trusted publishing (recommended), the above `with` block for user/password is not needed.
52+
# Ensure PyPI project settings are configured for trusted publishing from this GitHub repository and workflow.
53+
54+
- name: Upload release assets (package files) to GitHub Release
55+
uses: softprops/action-gh-release@v1
56+
env:
57+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
58+
with:
59+
files: ./dist/*
60+
61+
# Consider adding another asset upload for the .whl file if desired
62+
# - name: Upload Wheel to GitHub Release
63+
# uses: actions/upload-release-asset@v1
64+
# env:
65+
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
66+
# with:
67+
# upload_url: ${{ steps.create_release.outputs.upload_url }}
68+
# asset_path: ./dist/*.whl
69+
# asset_name: ${{ github.event.repository.name }}-${{ github.ref_name }}.whl
70+
# asset_content_type: application/wheel

0 commit comments

Comments
 (0)