diff --git a/AGENTS.md b/AGENTS.md
index 62732904dd3a..fd20f33248d9 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -92,7 +92,17 @@ Each annotation contains the test class, test name, and failure message.
## Pull Request Workflow
-PR title requires a JIRA ticket ID (e.g., `[SPARK-xxxx][SQL] Title`). Ask the user to create a new ticket or provide an existing one if not given. Before writing the PR description, read `.github/PULL_REQUEST_TEMPLATE` and fill in every section from that file.
+PR title format is `[SPARK-xxxx][Component] Title`. Infer the PR title from the changes. If no ticket ID is given, create one using `dev/create_spark_jira.py`, using the PR title (without the JIRA ID and component tag) as the ticket title.
+
+ python3 dev/create_spark_jira.py "
" -c { -t | -p }
+
+- **Component** (`-c`): e.g. "SQL", "Spark Core", "PySpark", "Connect". Run `python3 dev/create_spark_jira.py --list-components` for the full list.
+- **Issue type** (`-t`): "Bug", "Improvement", "New Feature", "Test", "Documentation", or "Dependency upgrade".
+- **Parent** (`-p`): if the user mentions a parent JIRA ticket (e.g., "this is a subtask of SPARK-12345"), pass it instead of `-t`. The issue type is automatically "Sub-task".
+
+The script sets the latest unreleased version as the default affected version. Ask the user to review and adjust versions and other fields on the JIRA ticket after creation.
+
+Before writing the PR description, read `.github/PULL_REQUEST_TEMPLATE` and fill in every section from that file.
DO NOT push to the upstream repo. Always push to the personal fork. Open PRs against `master` on the upstream repo.
diff --git a/dev/create_jira_and_branch.py b/dev/create_jira_and_branch.py
new file mode 100755
index 000000000000..10d1f55aa206
--- /dev/null
+++ b/dev/create_jira_and_branch.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import subprocess
+import sys
+import traceback
+
+from spark_jira_utils import create_jira_issue, fail, get_jira_client
+
+
+def run_cmd(cmd):
+ print(cmd)
+ if isinstance(cmd, list):
+ return subprocess.check_output(cmd).decode("utf-8")
+ else:
+ return subprocess.check_output(cmd.split(" ")).decode("utf-8")
+
+
+def create_and_checkout_branch(jira_id):
+ try:
+ run_cmd("git checkout -b %s" % jira_id)
+ print("Created and checked out branch: %s" % jira_id)
+ except subprocess.CalledProcessError as e:
+ fail("Failed to create branch %s: %s" % (jira_id, e))
+
+
+def create_commit(jira_id, title):
+ try:
+ run_cmd(["git", "commit", "-a", "-m", "[%s] %s" % (jira_id, title)])
+ print("Created a commit with message: [%s] %s" % (jira_id, title))
+ except subprocess.CalledProcessError as e:
+ fail("Failed to create commit: %s" % e)
+
+
+def choose_components(asf_jira):
+ components = asf_jira.project_components("SPARK")
+ components = [c for c in components if not c.raw.get("archived", False)]
+ for i, c in enumerate(components):
+ print("%d. %s" % (i + 1, c.name))
+
+ while True:
+ try:
+ choice = input("Please choose a component by number: ")
+ idx = int(choice) - 1
+ if 0 <= idx < len(components):
+ return components[idx].name
+ else:
+ print("Invalid number. Please try again.")
+ except ValueError:
+ print("Invalid input. Please enter a number.")
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Create a Spark JIRA issue.")
+ parser.add_argument("title", nargs="?", help="Title of the JIRA issue")
+ parser.add_argument("-p", "--parent", help="Parent JIRA ID for subtasks")
+ parser.add_argument(
+ "-t",
+ "--type",
+ help="Issue type to create when no parent is specified (e.g. Bug). Defaults to Improvement.",
+ )
+ parser.add_argument("-v", "--version", help="Version to use for the issue")
+ parser.add_argument("-c", "--component", help="Component for the issue")
+ args = parser.parse_args()
+
+ asf_jira = get_jira_client()
+
+ if args.parent:
+ parent_issue = asf_jira.issue(args.parent)
+ print("Parent issue title: %s" % parent_issue.fields.summary)
+ print("Creating a subtask of %s with title: %s" % (args.parent, args.title))
+ else:
+ print("Creating JIRA issue with title: %s" % args.title)
+
+ if not args.title:
+ parser.error("the following arguments are required: title")
+
+ if not args.component:
+ args.component = choose_components(asf_jira)
+
+ jira_id = create_jira_issue(asf_jira, args.title, args.component,
+ parent=args.parent, issue_type=args.type,
+ version=args.version)
+ print("Created JIRA issue: %s" % jira_id)
+
+ create_and_checkout_branch(jira_id)
+
+ create_commit(jira_id, args.title)
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except Exception:
+ traceback.print_exc()
+ sys.exit(-1)
diff --git a/dev/create_spark_jira.py b/dev/create_spark_jira.py
index 9259bf3adc81..18ad69ad40f4 100755
--- a/dev/create_spark_jira.py
+++ b/dev/create_spark_jira.py
@@ -18,158 +18,52 @@
#
import argparse
-import os
-import re
-import subprocess
import sys
import traceback
-try:
- import jira.client
-
- JIRA_IMPORTED = True
-except ImportError:
- JIRA_IMPORTED = False
-
-# ASF JIRA access token
-JIRA_ACCESS_TOKEN = os.environ.get("JIRA_ACCESS_TOKEN")
-JIRA_API_BASE = "https://issues.apache.org/jira"
-
-
-def fail(msg):
- print(msg)
- sys.exit(-1)
-
-
-def run_cmd(cmd):
- print(cmd)
- if isinstance(cmd, list):
- return subprocess.check_output(cmd).decode("utf-8")
- else:
- return subprocess.check_output(cmd.split(" ")).decode("utf-8")
-
-
-def create_jira_issue(title, parent_jira_id=None, issue_type=None, version=None, component=None):
- asf_jira = jira.client.JIRA(
- {"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
- )
-
- if version:
- affected_version = version
- else:
- versions = asf_jira.project_versions("SPARK")
- # Consider only x.y.z, unreleased, unarchived versions
- versions = [
- x
- for x in versions
- if not x.raw["released"]
- and not x.raw["archived"]
- and re.match(r"\d+\.\d+\.\d+", x.name)
- ]
- versions = sorted(versions, key=lambda x: x.name, reverse=True)
- affected_version = versions[0].name
-
- issue_dict = {
- "project": {"key": "SPARK"},
- "summary": title,
- "description": "",
- "versions": [{"name": affected_version}],
- }
-
- if component:
- issue_dict["components"] = [{"name": component}]
-
- if parent_jira_id:
- issue_dict["issuetype"] = {"name": "Sub-task"}
- issue_dict["parent"] = {"key": parent_jira_id}
- else:
- issue_dict["issuetype"] = {"name": issue_type if issue_type else "Improvement"}
-
- try:
- new_issue = asf_jira.create_issue(fields=issue_dict)
- return new_issue.key
- except Exception as e:
- fail("Failed to create JIRA issue: %s" % e)
-
-
-def create_and_checkout_branch(jira_id):
- try:
- run_cmd("git checkout -b %s" % jira_id)
- print("Created and checked out branch: %s" % jira_id)
- except subprocess.CalledProcessError as e:
- fail("Failed to create branch %s: %s" % (jira_id, e))
-
-
-def create_commit(jira_id, title):
- try:
- run_cmd(["git", "commit", "-a", "-m", "[%s] %s" % (jira_id, title)])
- print("Created a commit with message: [%s] %s" % (jira_id, title))
- except subprocess.CalledProcessError as e:
- fail("Failed to create commit: %s" % e)
-
-
-def choose_components():
- asf_jira = jira.client.JIRA(
- {"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
- )
- components = asf_jira.project_components("SPARK")
- components = [c for c in components if not c.raw.get("archived", False)]
- for i, c in enumerate(components):
- print("%d. %s" % (i + 1, c.name))
-
- while True:
- try:
- choice = input("Please choose a component by number: ")
- idx = int(choice) - 1
- if 0 <= idx < len(components):
- return components[idx].name
- else:
- print("Invalid number. Please try again.")
- except ValueError:
- print("Invalid input. Please enter a number.")
+from spark_jira_utils import create_jira_issue, get_jira_client, list_components
def main():
- if not JIRA_IMPORTED:
- fail("Could not find jira-python library. Run 'sudo pip3 install jira' to install.")
-
- if not JIRA_ACCESS_TOKEN:
- fail("The env-var JIRA_ACCESS_TOKEN is not set.")
-
parser = argparse.ArgumentParser(description="Create a Spark JIRA issue.")
parser.add_argument("title", nargs="?", help="Title of the JIRA issue")
- parser.add_argument("-p", "--parent", help="Parent JIRA ID for subtasks")
+ parser.add_argument(
+ "-p",
+ "--parent",
+ help="Parent JIRA ID to create a subtask (e.g. SPARK-12345).",
+ )
parser.add_argument(
"-t",
"--type",
- help="Issue type to create when no parent is specified (e.g. Bug). Defaults to Improvement.",
+ help="Issue type (e.g. Bug, Improvement)",
)
- parser.add_argument("-v", "--version", help="Version to use for the issue")
parser.add_argument("-c", "--component", help="Component for the issue")
+ parser.add_argument(
+ "--list-components", action="store_true", help="List available components and exit"
+ )
args = parser.parse_args()
- if args.parent:
- asf_jira = jira.client.JIRA(
- {"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
- )
- parent_issue = asf_jira.issue(args.parent)
- print("Parent issue title: %s" % parent_issue.fields.summary)
- print("Creating a subtask of %s with title: %s" % (args.parent, args.title))
- else:
- print("Creating JIRA issue with title: %s" % args.title)
+ if args.list_components:
+ asf_jira = get_jira_client()
+ list_components(asf_jira)
+ return
if not args.title:
parser.error("the following arguments are required: title")
if not args.component:
- args.component = choose_components()
+ parser.error("-c/--component is required")
- jira_id = create_jira_issue(args.title, args.parent, args.type, args.version, args.component)
- print("Created JIRA issue: %s" % jira_id)
+ if args.parent and args.type:
+ parser.error("--parent and --type cannot be used together")
- create_and_checkout_branch(jira_id)
+ if not args.parent and not args.type:
+ parser.error("-t/--type is required when not creating a subtask")
- create_commit(jira_id, args.title)
+ asf_jira = get_jira_client()
+ jira_id = create_jira_issue(asf_jira, args.title, args.component,
+ parent=args.parent, issue_type=args.type)
+ print(jira_id)
if __name__ == "__main__":
diff --git a/dev/spark_jira_utils.py b/dev/spark_jira_utils.py
new file mode 100644
index 000000000000..b7e5126df00c
--- /dev/null
+++ b/dev/spark_jira_utils.py
@@ -0,0 +1,102 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import re
+import sys
+
+try:
+ import jira.client
+
+ JIRA_IMPORTED = True
+except ImportError:
+ JIRA_IMPORTED = False
+
+# ASF JIRA access token
+JIRA_ACCESS_TOKEN = os.environ.get("JIRA_ACCESS_TOKEN")
+JIRA_API_BASE = "https://issues.apache.org/jira"
+
+
+def fail(msg):
+ print(msg)
+ sys.exit(-1)
+
+
+def get_jira_client():
+ """Create and return a JIRA client, or exit with a helpful message."""
+ errors = []
+ if not JIRA_IMPORTED:
+ errors.append("jira-python library not installed, run 'pip install jira'")
+ if not JIRA_ACCESS_TOKEN:
+ errors.append("JIRA_ACCESS_TOKEN env-var not set")
+ if errors:
+ fail("Cannot create JIRA ticket automatically (%s). "
+ "Please create the ticket manually at %s"
+ % ("; ".join(errors), JIRA_API_BASE))
+ return jira.client.JIRA(
+ {"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
+ )
+
+
+def detect_affected_version(asf_jira):
+ """Return the latest unreleased x.y.z version, or exit."""
+ versions = asf_jira.project_versions("SPARK")
+ versions = [
+ x
+ for x in versions
+ if not x.raw["released"]
+ and not x.raw["archived"]
+ and re.match(r"\d+\.\d+\.\d+", x.name)
+ ]
+ versions = sorted(versions, key=lambda x: x.name, reverse=True)
+ if not versions:
+ fail("Cannot detect affected version. "
+ "Please create the ticket manually at %s" % JIRA_API_BASE)
+ return versions[0].name
+
+
+def list_components(asf_jira):
+ """Print all non-archived Spark JIRA components."""
+ components = asf_jira.project_components("SPARK")
+ components = [c for c in components if not c.raw.get("archived", False)]
+ for c in sorted(components, key=lambda x: x.name):
+ print(c.name)
+
+
+def create_jira_issue(asf_jira, title, component, parent=None, issue_type=None, version=None):
+ """Create a JIRA issue and return the issue key (e.g. SPARK-12345)."""
+ affected_version = version if version else detect_affected_version(asf_jira)
+
+ issue_dict = {
+ "project": {"key": "SPARK"},
+ "summary": title,
+ "description": "",
+ "versions": [{"name": affected_version}],
+ "components": [{"name": component}],
+ }
+
+ if parent:
+ issue_dict["issuetype"] = {"name": "Sub-task"}
+ issue_dict["parent"] = {"key": parent}
+ else:
+ issue_dict["issuetype"] = {"name": issue_type if issue_type else "Improvement"}
+
+ try:
+ new_issue = asf_jira.create_issue(fields=issue_dict)
+ return new_issue.key
+ except Exception as e:
+ fail("Failed to create JIRA issue: %s" % e)