Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,17 @@ Each annotation contains the test class, test name, and failure message.

## Pull Request Workflow

PR title requires a JIRA ticket ID (e.g., `[SPARK-xxxx][SQL] Title`). Ask the user to create a new ticket or provide an existing one if not given. Before writing the PR description, read `.github/PULL_REQUEST_TEMPLATE` and fill in every section from that file.
PR title format is `[SPARK-xxxx][Component] Title`. Infer the PR title from the changes. If no ticket ID is given, create one using `dev/create_spark_jira.py`, using the PR title (without the JIRA ID and component tag) as the ticket title.

python3 dev/create_spark_jira.py "<title>" -c <component> { -t <type> | -p <parent-jira-id> }

- **Component** (`-c`): e.g. "SQL", "Spark Core", "PySpark", "Connect". Run `python3 dev/create_spark_jira.py --list-components` for the full list.
- **Issue type** (`-t`): "Bug", "Improvement", "New Feature", "Test", "Documentation", or "Dependency upgrade".
- **Parent** (`-p`): if the user mentions a parent JIRA ticket (e.g., "this is a subtask of SPARK-12345"), pass it instead of `-t`. The issue type is automatically "Sub-task".

The script sets the latest unreleased version as the default affected version. Ask the user to review and adjust versions and other fields on the JIRA ticket after creation.

Before writing the PR description, read `.github/PULL_REQUEST_TEMPLATE` and fill in every section from that file.

DO NOT push to the upstream repo. Always push to the personal fork. Open PRs against `master` on the upstream repo.

Expand Down
113 changes: 113 additions & 0 deletions dev/create_jira_and_branch.py
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dongjoon-hyun the old script is kept with a more accurate name, as it does more than jira creation. dev/spark_jira_utils.py is created to share code between the two scripts.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR description also updated.

Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import subprocess
import sys
import traceback

from spark_jira_utils import create_jira_issue, fail, get_jira_client


def run_cmd(cmd):
print(cmd)
if isinstance(cmd, list):
return subprocess.check_output(cmd).decode("utf-8")
else:
return subprocess.check_output(cmd.split(" ")).decode("utf-8")


def create_and_checkout_branch(jira_id):
try:
run_cmd("git checkout -b %s" % jira_id)
print("Created and checked out branch: %s" % jira_id)
except subprocess.CalledProcessError as e:
fail("Failed to create branch %s: %s" % (jira_id, e))


def create_commit(jira_id, title):
try:
run_cmd(["git", "commit", "-a", "-m", "[%s] %s" % (jira_id, title)])
print("Created a commit with message: [%s] %s" % (jira_id, title))
except subprocess.CalledProcessError as e:
fail("Failed to create commit: %s" % e)


def choose_components(asf_jira):
components = asf_jira.project_components("SPARK")
components = [c for c in components if not c.raw.get("archived", False)]
for i, c in enumerate(components):
print("%d. %s" % (i + 1, c.name))

while True:
try:
choice = input("Please choose a component by number: ")
idx = int(choice) - 1
if 0 <= idx < len(components):
return components[idx].name
else:
print("Invalid number. Please try again.")
except ValueError:
print("Invalid input. Please enter a number.")


def main():
parser = argparse.ArgumentParser(description="Create a Spark JIRA issue.")
parser.add_argument("title", nargs="?", help="Title of the JIRA issue")
parser.add_argument("-p", "--parent", help="Parent JIRA ID for subtasks")
parser.add_argument(
"-t",
"--type",
help="Issue type to create when no parent is specified (e.g. Bug). Defaults to Improvement.",
)
parser.add_argument("-v", "--version", help="Version to use for the issue")
parser.add_argument("-c", "--component", help="Component for the issue")
args = parser.parse_args()

asf_jira = get_jira_client()

if args.parent:
parent_issue = asf_jira.issue(args.parent)
print("Parent issue title: %s" % parent_issue.fields.summary)
print("Creating a subtask of %s with title: %s" % (args.parent, args.title))
else:
print("Creating JIRA issue with title: %s" % args.title)

if not args.title:
parser.error("the following arguments are required: title")

if not args.component:
args.component = choose_components(asf_jira)

jira_id = create_jira_issue(asf_jira, args.title, args.component,
parent=args.parent, issue_type=args.type,
version=args.version)
print("Created JIRA issue: %s" % jira_id)

create_and_checkout_branch(jira_id)

create_commit(jira_id, args.title)


if __name__ == "__main__":
try:
main()
except Exception:
traceback.print_exc()
sys.exit(-1)
152 changes: 23 additions & 129 deletions dev/create_spark_jira.py
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add back the old script if people still need it, but with a different name, as create_spark_jira.py should only create ticket.

Original file line number Diff line number Diff line change
Expand Up @@ -18,158 +18,52 @@
#

import argparse
import os
import re
import subprocess
import sys
import traceback

try:
import jira.client

JIRA_IMPORTED = True
except ImportError:
JIRA_IMPORTED = False

# ASF JIRA access token
JIRA_ACCESS_TOKEN = os.environ.get("JIRA_ACCESS_TOKEN")
JIRA_API_BASE = "https://issues.apache.org/jira"


def fail(msg):
print(msg)
sys.exit(-1)


def run_cmd(cmd):
print(cmd)
if isinstance(cmd, list):
return subprocess.check_output(cmd).decode("utf-8")
else:
return subprocess.check_output(cmd.split(" ")).decode("utf-8")


def create_jira_issue(title, parent_jira_id=None, issue_type=None, version=None, component=None):
asf_jira = jira.client.JIRA(
{"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
)

if version:
affected_version = version
else:
versions = asf_jira.project_versions("SPARK")
# Consider only x.y.z, unreleased, unarchived versions
versions = [
x
for x in versions
if not x.raw["released"]
and not x.raw["archived"]
and re.match(r"\d+\.\d+\.\d+", x.name)
]
versions = sorted(versions, key=lambda x: x.name, reverse=True)
affected_version = versions[0].name

issue_dict = {
"project": {"key": "SPARK"},
"summary": title,
"description": "",
"versions": [{"name": affected_version}],
}

if component:
issue_dict["components"] = [{"name": component}]

if parent_jira_id:
issue_dict["issuetype"] = {"name": "Sub-task"}
issue_dict["parent"] = {"key": parent_jira_id}
else:
issue_dict["issuetype"] = {"name": issue_type if issue_type else "Improvement"}

try:
new_issue = asf_jira.create_issue(fields=issue_dict)
return new_issue.key
except Exception as e:
fail("Failed to create JIRA issue: %s" % e)


def create_and_checkout_branch(jira_id):
try:
run_cmd("git checkout -b %s" % jira_id)
print("Created and checked out branch: %s" % jira_id)
except subprocess.CalledProcessError as e:
fail("Failed to create branch %s: %s" % (jira_id, e))


def create_commit(jira_id, title):
try:
run_cmd(["git", "commit", "-a", "-m", "[%s] %s" % (jira_id, title)])
print("Created a commit with message: [%s] %s" % (jira_id, title))
except subprocess.CalledProcessError as e:
fail("Failed to create commit: %s" % e)


def choose_components():
asf_jira = jira.client.JIRA(
{"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
)
components = asf_jira.project_components("SPARK")
components = [c for c in components if not c.raw.get("archived", False)]
for i, c in enumerate(components):
print("%d. %s" % (i + 1, c.name))

while True:
try:
choice = input("Please choose a component by number: ")
idx = int(choice) - 1
if 0 <= idx < len(components):
return components[idx].name
else:
print("Invalid number. Please try again.")
except ValueError:
print("Invalid input. Please enter a number.")
from spark_jira_utils import create_jira_issue, get_jira_client, list_components


def main():
if not JIRA_IMPORTED:
fail("Could not find jira-python library. Run 'sudo pip3 install jira' to install.")

if not JIRA_ACCESS_TOKEN:
fail("The env-var JIRA_ACCESS_TOKEN is not set.")

parser = argparse.ArgumentParser(description="Create a Spark JIRA issue.")
parser.add_argument("title", nargs="?", help="Title of the JIRA issue")
parser.add_argument("-p", "--parent", help="Parent JIRA ID for subtasks")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm alway using this parent JIRA ID feature. Please recover this, @cloud-fan .

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

parser.add_argument(
"-p",
"--parent",
help="Parent JIRA ID to create a subtask (e.g. SPARK-12345).",
)
parser.add_argument(
"-t",
"--type",
help="Issue type to create when no parent is specified (e.g. Bug). Defaults to Improvement.",
help="Issue type (e.g. Bug, Improvement)",
)
parser.add_argument("-v", "--version", help="Version to use for the issue")
parser.add_argument("-c", "--component", help="Component for the issue")
parser.add_argument(
"--list-components", action="store_true", help="List available components and exit"
)
args = parser.parse_args()

if args.parent:
asf_jira = jira.client.JIRA(
{"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
)
parent_issue = asf_jira.issue(args.parent)
print("Parent issue title: %s" % parent_issue.fields.summary)
print("Creating a subtask of %s with title: %s" % (args.parent, args.title))
else:
print("Creating JIRA issue with title: %s" % args.title)
if args.list_components:
asf_jira = get_jira_client()
list_components(asf_jira)
return

if not args.title:
parser.error("the following arguments are required: title")

if not args.component:
args.component = choose_components()
parser.error("-c/--component is required")

jira_id = create_jira_issue(args.title, args.parent, args.type, args.version, args.component)
print("Created JIRA issue: %s" % jira_id)
if args.parent and args.type:
parser.error("--parent and --type cannot be used together")

create_and_checkout_branch(jira_id)
if not args.parent and not args.type:
parser.error("-t/--type is required when not creating a subtask")

create_commit(jira_id, args.title)
asf_jira = get_jira_client()
jira_id = create_jira_issue(asf_jira, args.title, args.component,
parent=args.parent, issue_type=args.type)
print(jira_id)


if __name__ == "__main__":
Expand Down
Loading