Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,40 @@ def __hash__(self):
test_tags=["org.apache.spark.tags.DockerTest"],
)


# dev_tools is a pseudo module that contains all the dev related files that
# won't impact the CI build and tests (except for CI which is forced to
# run anyway).
# This module is created so modifying files in this module won't trigger any
# tests to run.
dev_tools = Module(
name="dev-tools",
dependencies=[],
source_file_regexes=[
".*README.md",
".*AGENTS.md",
r".*\.gitignore",
"CONTRIBUTING.md",
".asf.yaml",
"SECURITY.md",
"NOTICE-binary",
"LICENSE-binary",
"ui-test/package.json",
"ui-test/package-lock.json",
"scalastyle-config.xml",
"dev/checkstyle.xml",
"dev/checkstyle-suppressions.xml",
"dev/spark-test-image/lint/Dockerfile",
"dev/lint-python",
"dev/lint-scala",
"dev/reformat-python",
"dev/structured_logging_style.py",
"dev/merge_spark_pr.py",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually have unittests in this script. Shouldn't we run it?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe we currently run the unittests (doctest) in this file in our CI. Maybe we should do it, but this PR has no impact to the current situation.

"dev/create_spark_jira.py",
"dev/create-release/",
],
)

# The root module is a dummy module which is used to run all of the tests.
# No other modules should directly depend on this module.
root = Module(
Expand Down
17 changes: 1 addition & 16 deletions dev/sparktestsupport/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,30 +33,15 @@ def determine_modules_for_files(filenames):
"""
Given a list of filenames, return the set of modules that contain those files.
If a file is not associated with a more specific submodule, then this method will consider that
file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions,
and `README.md`, `AGENTS.md`, `CONTRIBUTING.md` are always ignored.
file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions.

>>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"]))
['pyspark-core', 'pyspark-errors', 'sql']
>>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])]
['root']
>>> [x.name for x in determine_modules_for_files(["sql/README.md"])]
[]
>>> [x.name for x in determine_modules_for_files(["AGENTS.md"])]
[]
>>> [x.name for x in determine_modules_for_files(["CONTRIBUTING.md"])]
[]
"""
changed_modules = set()
for filename in filenames:
if filename.endswith(("README.md", "AGENTS.md", "CONTRIBUTING.md")):
continue
if filename in (
"scalastyle-config.xml",
"dev/checkstyle.xml",
"dev/checkstyle-suppressions.xml",
):
continue
if ("GITHUB_ACTIONS" not in os.environ) and filename.startswith(".github"):
continue
matched_at_least_one_module = False
Expand Down