From 43d8ee147b9624c3cee3c97ef48c7a6b561b9d60 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 4 Jun 2026 11:01:13 -0700 Subject: [PATCH 1/4] [SPARK-57267][INFRA] Make branch-4.1 scheduled build workflows self-contained ### What changes were proposed in this pull request? This PR makes the remaining `branch-4.1` scheduled build workflow files self-contained, following the same approach as SPARK-57115 (which did this for `build_java17.yml`): - Rewrites `build_java21.yml`, `build_non_ansi.yml`, `build_maven.yml`, `build_maven_java21.yml`, `build_python_3.14.yml`, and `build_python_pypy3.10.yml` so they are triggered by `workflow_dispatch` only (the dormant `schedule:` triggers are removed; scheduled runs on a non-default branch never fire anyway), drop the inherited `branch: master` (so they build `branch-4.1`), and use generic, non-branch-tagged names. - Adds `build_python_3.11.yml`, the equivalent of `build_branch41_python.yml`. The per-build configurations are relocated from the active `build_branch41_*.yml` files on `master` so the coverage matches what `branch-4.1` schedules today. Notes: - `build_and_test.yml` defaults `branch` to `branch-4.1`, so those callers omit `branch`. `maven_test.yml` defaults `branch` to `master` and checks out `inputs.branch`, so `build_maven.yml` / `build_maven_java21.yml` keep an explicit `branch: branch-4.1`. - A pre-existing invalid trailing comma in the non-ANSI `envs` JSON was fixed during relocation. ### Why are the changes needed? This is the `branch-4.1` side of decoupling our scheduled CIs (cf. the `branch-4.x` effort). Scheduled workflows only fire from the default branch, so `branch-4.1` CI should consist of self-contained, dispatchable workflow files on `branch-4.1` that a single scheduler on `master` can trigger. This PR prepares those targets; a follow-up on `master` will add the scheduler and remove the `build_branch41_*.yml` files. ### Does this PR introduce _any_ user-facing change? No. CI only. ### How was this patch tested? These workflows can be triggered manually via `workflow_dispatch` once merged. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Claude Code (Claude Opus 4.8) Co-authored-by: Isaac --- .github/workflows/build_java21.yml | 5 +-- .github/workflows/build_maven.yml | 6 +-- .github/workflows/build_maven_java21.yml | 5 +-- .github/workflows/build_non_ansi.yml | 10 ++--- .github/workflows/build_python_3.11.yml | 44 +++++++++++++++++++++ .github/workflows/build_python_3.14.yml | 5 +-- .github/workflows/build_python_pypy3.10.yml | 5 +-- 7 files changed, 55 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/build_python_3.11.yml diff --git a/.github/workflows/build_java21.yml b/.github/workflows/build_java21.yml index c9a6ed270546c..62da995475fd9 100644 --- a/.github/workflows/build_java21.yml +++ b/.github/workflows/build_java21.yml @@ -17,11 +17,9 @@ # under the License. # -name: "Build / Java21 (master, Scala 2.13, Hadoop 3, JDK 21)" +name: "Build / Java21 (Scala 2.13, Hadoop 3, JDK 21)" on: - schedule: - - cron: '0 4 * * *' workflow_dispatch: jobs: @@ -33,7 +31,6 @@ jobs: if: github.repository == 'apache/spark' with: java: 21 - branch: master hadoop: hadoop3 envs: >- { diff --git a/.github/workflows/build_maven.yml b/.github/workflows/build_maven.yml index e047390add6f9..dfae820dfef8c 100644 --- a/.github/workflows/build_maven.yml +++ b/.github/workflows/build_maven.yml @@ -17,11 +17,9 @@ # under the License. # -name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 17)" +name: "Build / Maven (Scala 2.13, Hadoop 3, JDK 17)" on: - schedule: - - cron: '0 13 * * *' workflow_dispatch: jobs: @@ -31,3 +29,5 @@ jobs: name: Run uses: ./.github/workflows/maven_test.yml if: github.repository == 'apache/spark' + with: + branch: branch-4.1 diff --git a/.github/workflows/build_maven_java21.yml b/.github/workflows/build_maven_java21.yml index 9fbc7b84383f0..300946a4d4a74 100644 --- a/.github/workflows/build_maven_java21.yml +++ b/.github/workflows/build_maven_java21.yml @@ -17,11 +17,9 @@ # under the License. # -name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21)" +name: "Build / Maven (Scala 2.13, Hadoop 3, JDK 21)" on: - schedule: - - cron: '0 14 * * *' workflow_dispatch: jobs: @@ -32,4 +30,5 @@ jobs: uses: ./.github/workflows/maven_test.yml if: github.repository == 'apache/spark' with: + branch: branch-4.1 java: 21 diff --git a/.github/workflows/build_non_ansi.yml b/.github/workflows/build_non_ansi.yml index debdaf4f8709d..520801deb35b2 100644 --- a/.github/workflows/build_non_ansi.yml +++ b/.github/workflows/build_non_ansi.yml @@ -17,11 +17,9 @@ # under the License. # -name: "Build / Non-ANSI (master, Hadoop 3, JDK 17, Scala 2.13)" +name: "Build / Non-ANSI (Hadoop 3, JDK 17, Scala 2.13)" on: - schedule: - - cron: '0 1 * * *' workflow_dispatch: jobs: @@ -33,21 +31,19 @@ jobs: if: github.repository == 'apache/spark' with: java: 17 - branch: master hadoop: hadoop3 envs: >- { "PYSPARK_IMAGE_TO_TEST": "python-311", "PYTHON_TO_TEST": "python3.11", - "SPARK_ANSI_SQL_MODE": "false", - "SPARK_TEST_SPARK_BLOOM_FILTER_SUITE_ENABLED": "true" + "SPARK_ANSI_SQL_MODE": "false" } jobs: >- { "build": "true", + "build-core-utils": "false", "docs": "true", "pyspark": "true", - "pyspark-pandas": "true", "sparkr": "true", "tpcds-1g": "true", "docker-integration-tests": "true", diff --git a/.github/workflows/build_python_3.11.yml b/.github/workflows/build_python_3.11.yml new file mode 100644 index 0000000000000..c3891cd72353a --- /dev/null +++ b/.github/workflows/build_python_3.11.yml @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: "Build / Python-only (Python 3.11)" + +on: + workflow_dispatch: + +jobs: + run-build: + permissions: + packages: write + name: Run + uses: ./.github/workflows/build_and_test.yml + if: github.repository == 'apache/spark' + with: + java: 17 + hadoop: hadoop3 + envs: >- + { + "PYSPARK_IMAGE_TO_TEST": "python-311", + "PYTHON_TO_TEST": "python3.11" + } + jobs: >- + { + "pyspark": "true", + "pyspark-pandas": "true" + } diff --git a/.github/workflows/build_python_3.14.yml b/.github/workflows/build_python_3.14.yml index 45ea43f1d491c..90c368aa0a4c2 100644 --- a/.github/workflows/build_python_3.14.yml +++ b/.github/workflows/build_python_3.14.yml @@ -17,11 +17,9 @@ # under the License. # -name: "Build / Python-only (master, Python 3.14)" +name: "Build / Python-only (Python 3.14)" on: - schedule: - - cron: '0 21 * * *' workflow_dispatch: jobs: @@ -33,7 +31,6 @@ jobs: if: github.repository == 'apache/spark' with: java: 17 - branch: master hadoop: hadoop3 envs: >- { diff --git a/.github/workflows/build_python_pypy3.10.yml b/.github/workflows/build_python_pypy3.10.yml index 0bd2ef03ce77c..3b892240648b6 100644 --- a/.github/workflows/build_python_pypy3.10.yml +++ b/.github/workflows/build_python_pypy3.10.yml @@ -17,11 +17,9 @@ # under the License. # -name: "Build / Python-only (master, PyPy 3.10)" +name: "Build / Python-only (PyPy 3.10)" on: - schedule: - - cron: '0 15 * * *' workflow_dispatch: jobs: @@ -33,7 +31,6 @@ jobs: if: github.repository == 'apache/spark' with: java: 17 - branch: master hadoop: hadoop3 envs: >- { From 7898bf664d1cf319c27be8d2c8e2b62a0af26a72 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 4 Jun 2026 11:13:24 -0700 Subject: [PATCH 2/4] [SPARK-57267][INFRA] Default maven_test.yml branch to branch-4.1 Bump the `branch` input default in `maven_test.yml` from `master` to `branch-4.1`, matching `build_and_test.yml` (which was already bumped when the branch was cut). The maven default was missed, so maven callers that omit `branch` were checking out `apache/spark@master`. With the default fixed, `build_maven.yml` and `build_maven_java21.yml` no longer need an explicit `branch: branch-4.1` (now identical to their `branch-4.x` counterparts). This also corrects `build_maven_java21_macos26.yml` and `build_maven_java21_arm.yml`, which omit `branch` and were likewise defaulting to master. The `build_branch40_maven*.yml` files pass `branch: branch-4.0` explicitly and are unaffected. Co-authored-by: Isaac --- .github/workflows/build_maven.yml | 2 -- .github/workflows/build_maven_java21.yml | 1 - .github/workflows/maven_test.yml | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/build_maven.yml b/.github/workflows/build_maven.yml index dfae820dfef8c..4dedac6cee8b9 100644 --- a/.github/workflows/build_maven.yml +++ b/.github/workflows/build_maven.yml @@ -29,5 +29,3 @@ jobs: name: Run uses: ./.github/workflows/maven_test.yml if: github.repository == 'apache/spark' - with: - branch: branch-4.1 diff --git a/.github/workflows/build_maven_java21.yml b/.github/workflows/build_maven_java21.yml index 300946a4d4a74..2a20b8ef1f405 100644 --- a/.github/workflows/build_maven_java21.yml +++ b/.github/workflows/build_maven_java21.yml @@ -30,5 +30,4 @@ jobs: uses: ./.github/workflows/maven_test.yml if: github.repository == 'apache/spark' with: - branch: branch-4.1 java: 21 diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index 580593f1cfe5e..155c2bceb5887 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -30,7 +30,7 @@ on: description: Branch to run the build against required: false type: string - default: master + default: branch-4.1 hadoop: description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. required: false From fa663f597da177f813d68e79eeb96ad0d0c01bb0 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 4 Jun 2026 11:22:48 -0700 Subject: [PATCH 3/4] [SPARK-57267][INFRA] Default python_hosted_runner_test.yml branch to branch-4.1 Same fix as `maven_test.yml`: bump the `branch` input default in `python_hosted_runner_test.yml` from `master` to `branch-4.1`. Its callers `build_python_3.11_macos26.yml` and `build_python_3.11_arm.yml` omit `branch`, so they were checking out `apache/spark@master` (the input is used as the checkout `ref`). `release.yml` keeps `default: master` for its `workflow_dispatch` input, where the operator selects the branch to release. Co-authored-by: Isaac --- .github/workflows/python_hosted_runner_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml index 659171b901d3f..d5976223e5216 100644 --- a/.github/workflows/python_hosted_runner_test.yml +++ b/.github/workflows/python_hosted_runner_test.yml @@ -34,7 +34,7 @@ on: description: Branch to run the build against required: false type: string - default: master + default: branch-4.1 hadoop: description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. required: false From 5a1032873bd5e49f69e9e035c4bc33dd2b94db36 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 4 Jun 2026 11:25:39 -0700 Subject: [PATCH 4/4] Revert "[SPARK-57267][INFRA] Default python_hosted_runner_test.yml branch to branch-4.1" This reverts commit fa663f597da177f813d68e79eeb96ad0d0c01bb0. --- .github/workflows/python_hosted_runner_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml index d5976223e5216..659171b901d3f 100644 --- a/.github/workflows/python_hosted_runner_test.yml +++ b/.github/workflows/python_hosted_runner_test.yml @@ -34,7 +34,7 @@ on: description: Branch to run the build against required: false type: string - default: branch-4.1 + default: master hadoop: description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. required: false