From 23d6a46122d773240d3427c6826771c53221409c Mon Sep 17 00:00:00 2001
From: Charles Li <licharles@google.com>
Date: Tue, 14 Apr 2026 10:00:04 -0700
Subject: [PATCH 1/2] Move AotJaxprIdenticalTest to sheduled_only due to long
 time consuming

Add unit tests best practice section in document
---
 .github/workflows/run_pathways_tests.yml |  8 ++++++--
 docs/guides/model_bringup.md             | 10 ++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run_pathways_tests.yml b/.github/workflows/run_pathways_tests.yml
index 60b2dd3d93..aa1a0f7c0f 100644
--- a/.github/workflows/run_pathways_tests.yml
+++ b/.github/workflows/run_pathways_tests.yml
@@ -92,17 +92,21 @@ jobs:
         run : gcloud storage cp gs://maxtext-test-assets/* tests/assets
       - name: Run Tests
         run: |
+          # TODO(b/454659463): Enable test_default_hlo_match after volume mount is supported.
+          PYTEST_MATCHER="not AotHloIdenticalTest and not CompileThenLoad"
           if [ "${{ inputs.is_scheduled_run }}" = "true" ]; then
             FINAL_PYTEST_MARKER="${{ inputs.pytest_marker }}"
           else
             FINAL_PYTEST_MARKER="${{ inputs.pytest_marker }} and not scheduled_only"
+            # TODO(b/496201097): AotJaxprIdenticalTest costs long time due to JAX API get_topology_desc
+            PYTEST_MATCHER="${PYTEST_MATCHER} and not AotJaxprIdenticalTest"
           fi
           export MAXTEXT_REPO_ROOT=$(pwd)
           export MAXTEXT_ASSETS_ROOT=$(pwd)/src/maxtext/assets
           export MAXTEXT_TEST_ASSETS_ROOT=$(pwd)/tests/assets
           export MAXTEXT_PKG_DIR=$(pwd)/src/maxtext
-          # TODO(b/454659463): Enable test_default_hlo_match after volume mount is supported.
-          .venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" -k "not AotHloIdenticalTest and not CompileThenLoad" --durations=0
+
+          .venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" -k "${PYTEST_MATCHER}" --durations=0
         env:
           PYTHONPATH: "${{ github.workspace }}/src"
     services:
diff --git a/docs/guides/model_bringup.md b/docs/guides/model_bringup.md
index fb231a49e6..ba48b739f0 100644
--- a/docs/guides/model_bringup.md
+++ b/docs/guides/model_bringup.md
@@ -76,6 +76,16 @@ Core Strategy:
 - **Forward Pass**: Run the input through both layers. Remember to set the PyTorch model to evaluation mode (`model_pt.eval()`) to disable dropout etc.
 - **Compare Outputs**: Convert the PyTorch output to a JAX array (or NumPy array) and use `numpy.testing.assert_allclose` to check if the outputs are numerically close within a specified tolerance (atol, rtol).
 
+Unit Tests Best Practices (performance perspective):
+
+As unit tests are part of CI/CD workflow, it is critical to keep the test running fast without impacting necessary code coverage. It is recommended to follow below best practices:
+
+- **Prioritize Synthetic Data (`dataset_type=synthetic`)**: Use a synthetic dataset whenever possible. This is significantly faster than using other dataset types that require loading from network storage. Only use a specific dataset type when the test's focus strictly requires it.
+- **Minimize Training Steps**: Keep the number of training steps to the minimum necessary to cover the intended code logic.
+- **Select Smallest Viable Model**: Utilize the smallest model size in unit tests that still provides adequate code coverage.
+- **Prioritize CPU-Only Testing**: To reduce TPU consumption for train compile-related tests, favor using `cpu_only` with a `tpu_backend` over `tpu_only` configurations.
+- **Move Non-Critical Tests to Scheduled-Only**: As a last resort, if the test cannot be sped up using the methods above and is not critical enough to block a Pull Request (PR), move it to run only on a schedule (`scheduled_only`). This is a compromise to improve overall CI performance.
+
 ## 5. End-to-end correctness
 
 This verification process can vary in duration. If you're working with a small model, you're fortunate as it allows for rapid iteration on your development machine. To verify a model's correctness, we could leverage two strategies below - comparing logits and evaluation.

From 792ac1946adc79f895fbd43c4ee27a8abbd04dac Mon Sep 17 00:00:00 2001
From: Charles Li <licharles@google.com>
Date: Tue, 14 Apr 2026 11:16:30 -0700
Subject: [PATCH 2/2] Move test_diloco_two_slices to cpu_only

---
 tests/unit/diloco_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit/diloco_test.py b/tests/unit/diloco_test.py
index 7a60b9acbd..4c0eac48d0 100644
--- a/tests/unit/diloco_test.py
+++ b/tests/unit/diloco_test.py
@@ -287,7 +287,8 @@ def test_diloco_qwen3_moe_two_slices(self):
         )
     )
 
-  @pytest.mark.tpu_only
+  @pytest.mark.cpu_only
+  @pytest.mark.tpu_backend
   def test_diloco_two_slices(self):
     temp_dir = gettempdir()
     compiled_trainstep_file = os.path.join(temp_dir, "test_compiled_diloco.pickle")