diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index d914f04b50271..3979795b85a40 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -1680,6 +1680,40 @@ def __hash__(self): test_tags=["org.apache.spark.tags.DockerTest"], ) + +# dev_tools is a pseudo module that contains all the dev related files that +# won't impact the CI build and tests (except for CI which is forced to +# run anyway). +# This module is created so modifying files in this module won't trigger any +# tests to run. +dev_tools = Module( + name="dev-tools", + dependencies=[], + source_file_regexes=[ + ".*README.md", + ".*AGENTS.md", + r".*\.gitignore", + "CONTRIBUTING.md", + ".asf.yaml", + "SECURITY.md", + "NOTICE-binary", + "LICENSE-binary", + "ui-test/package.json", + "ui-test/package-lock.json", + "scalastyle-config.xml", + "dev/checkstyle.xml", + "dev/checkstyle-suppressions.xml", + "dev/spark-test-image/lint/Dockerfile", + "dev/lint-python", + "dev/lint-scala", + "dev/reformat-python", + "dev/structured_logging_style.py", + "dev/merge_spark_pr.py", + "dev/create_spark_jira.py", + "dev/create-release/", + ], +) + # The root module is a dummy module which is used to run all of the tests. # No other modules should directly depend on this module. root = Module( diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py index b969b96a16c52..fe795303e63f3 100755 --- a/dev/sparktestsupport/utils.py +++ b/dev/sparktestsupport/utils.py @@ -33,30 +33,15 @@ def determine_modules_for_files(filenames): """ Given a list of filenames, return the set of modules that contain those files. If a file is not associated with a more specific submodule, then this method will consider that - file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions, - and `README.md`, `AGENTS.md`, `CONTRIBUTING.md` are always ignored. + file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions. >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"])) ['pyspark-core', 'pyspark-errors', 'sql'] >>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])] ['root'] - >>> [x.name for x in determine_modules_for_files(["sql/README.md"])] - [] - >>> [x.name for x in determine_modules_for_files(["AGENTS.md"])] - [] - >>> [x.name for x in determine_modules_for_files(["CONTRIBUTING.md"])] - [] """ changed_modules = set() for filename in filenames: - if filename.endswith(("README.md", "AGENTS.md", "CONTRIBUTING.md")): - continue - if filename in ( - "scalastyle-config.xml", - "dev/checkstyle.xml", - "dev/checkstyle-suppressions.xml", - ): - continue if ("GITHUB_ACTIONS" not in os.environ) and filename.startswith(".github"): continue matched_at_least_one_module = False