diff --git a/.bazelrc b/.bazelrc index e9768ddf..842fbd1f 100644 --- a/.bazelrc +++ b/.bazelrc @@ -13,3 +13,13 @@ build --protocopt=--experimental_allow_proto3_optional # parameter 'user_link_flags' is deprecated and will be removed soon. # It may be temporarily re-enabled by setting --incompatible_require_linker_input_cc_api=false build --incompatible_require_linker_input_cc_api=false + +# Force use of protoc from com_google_protobuf +build --proto_compiler=@com_google_protobuf//:protoc + +# Add include path for Protobuf headers +build --cxxopt="-Iexternal/com_google_protobuf/src" +build --host_cxxopt="-Iexternal/com_google_protobuf/src" + +# Disable Bzlmod to avoid conflicts with WORKSPACE +common --noenable_bzlmod diff --git a/.bazelversion b/.bazelversion index f22d756d..1985849f 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -6.5.0 +7.7.0 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index a48e8684..00000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: Build - -on: - push: - branches: - - master - pull_request: - branches: - - master - workflow_dispatch: - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9", "3.10", "3.11"] - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Build data-validation - id: build-data-validation - uses: ./.github/reusable-build - with: - python-version: ${{ matrix.python-version }} - upload-artifact: true - - upload_to_pypi: - name: Upload to PyPI - runs-on: ubuntu-latest - if: (github.event_name == 'release' && startsWith(github.ref, 'refs/tags')) || (github.event_name == 'workflow_dispatch') - needs: [build] - environment: - name: pypi - url: https://pypi.org/p/tensorflow-data-validation/ - permissions: - id-token: write - steps: - - name: Retrieve wheels - uses: actions/download-artifact@v4.1.8 - with: - merge-multiple: true - path: wheels - - - name: List the build artifacts - run: | - ls -lAs wheels/ - - - name: Upload to PyPI - uses: pypa/gh-action-pypi-publish@release/v1.9 - with: - packages_dir: wheels/ diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index e671a709..26044004 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest] - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout @@ -49,7 +49,6 @@ jobs: fi chmod +x /tmp/bazelisk sudo mv /tmp/bazelisk /usr/local/bin/bazel - echo "USE_BAZEL_VERSION=6.5.0" >> $GITHUB_ENV bazel --version # Needed for Xcode 26.x in some CI environments; without this patch, @@ -64,7 +63,7 @@ jobs: - name: Install build tooling shell: bash -l {0} run: | - python -m pip install numpy~=1.22.0 + python -m pip install numpy>=1.22.0 python -m pip install --upgrade pip build wheel "setuptools<69.3" - name: Build the package @@ -72,8 +71,58 @@ jobs: run: | python -m build --wheel --no-isolation + - name: Install wheel and dependencies + shell: bash -l {0} + run: | + python -m pip install "apache-beam[gcp]>=2.53,<3" "pandas>=1.0,<3" "numpy>=1.22.0" + python -m pip install "tensorflow-metadata>=1.17.1" + python -m pip install "tfx-bsl>=1.17.1" --no-deps + python -m pip install "tensorflow>=2.21,<2.22" + python -m pip install "absl-py>=0.9,<2.0.0" "joblib>=1.2.0" "pyarrow>=14" "pyfarmhash>=0.2.2,<0.4" "six>=1.12,<2" "dill" "scipy" "scikit-learn" + python -m pip install "protobuf==6.31.1" --force-reinstall + python -m pip install dist/*.whl --no-deps + python -m pip install pytest + + - name: Run tests + shell: bash -l {0} + run: | + mv tensorflow_data_validation tensorflow_data_validation_bak + SITE_DIR=$(python -c "import site; print(site.getsitepackages()[0])") + cp -r tensorflow_data_validation_bak/statistics/generators/testdata $SITE_DIR/tensorflow_data_validation/statistics/generators/ + mkdir -p /tmp/test_dir + cd /tmp/test_dir + python -m pytest --pyargs tensorflow_data_validation + cd - + mv tensorflow_data_validation_bak tensorflow_data_validation + - name: Upload wheel artifact uses: actions/upload-artifact@v4.4.0 with: name: data-validation-wheel-${{ matrix.os }}-py${{ matrix.python-version }} path: dist/*.whl + + upload_to_pypi: + name: Upload to PyPI + runs-on: ubuntu-latest + if: (github.event_name == 'release' && startsWith(github.ref, 'refs/tags')) || (github.event_name == 'workflow_dispatch') + needs: [build] + environment: + name: pypi + url: https://pypi.org/p/tensorflow-data-validation/ + permissions: + id-token: write + steps: + - name: Retrieve wheels + uses: actions/download-artifact@v4.1.8 + with: + merge-multiple: true + path: wheels + + - name: List the build artifacts + run: | + ls -lAs wheels/ + + - name: Upload to PyPI + uses: pypa/gh-action-pypi-publish@release/v1.9 + with: + packages_dir: wheels/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index b8a65fd3..00000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Test - -on: - push: - branches: - - master - pull_request: - branches: - - master - workflow_dispatch: - -jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9", "3.10", "3.11"] - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Build data-validation - id: build-data-validation - uses: ./.github/reusable-build - with: - python-version: ${{ matrix.python-version }} - - - name: Install built wheel - shell: bash - run: | - PYTHON_VERSION_TAG="cp$(echo ${{ matrix.python-version }} | sed 's/\.//')" - WHEEL_FILE=$(ls dist/*${PYTHON_VERSION_TAG}*.whl) - pip install "${WHEEL_FILE}[test]" - - - name: Run Test - run: | - rm -rf bazel-* - # run tests - pytest -vv diff --git a/RELEASE.md b/RELEASE.md index f9e2c3ec..1135d914 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -4,8 +4,17 @@ ## Major Features and Improvements +* Upgraded to support TensorFlow 2.21.0. +* Added support for Python 3.12 and 3.13. +* Dropped support for Python 3.9. + ## Bug Fixes and Other Changes +* Aligned Protobuf dependency to `>=6.0.0,<7.0.0`. +* Updated PyArrow dependency to `>=14`. +* Fixed C++ test build issues by defining missing `ASSERT_OK` and `EXPECT_OK` macros, replacing `LOG(FATAL)` with `abort()`, and fixing invalid Protobuf includes. +* Fixed Python test failures by updating `assertRaisesRegex` to expect `RuntimeError` wrapping `ValueError` in Beam pipelines. + ## Known Issues ## Breaking Changes diff --git a/WORKSPACE b/WORKSPACE index 19d64954..e1a5408a 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,6 +2,24 @@ workspace(name = "tensorflow_data_validation") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +_PROTOBUF_COMMIT = "6.31.1" + +http_archive( + name = "com_google_protobuf", + sha256 = "6e09bbc950ba60c3a7b30280210cd285af8d7d8ed5e0a6ed101c72aff22e8d88", + strip_prefix = "protobuf-%s" % _PROTOBUF_COMMIT, + urls = [ + "https://github.com/protocolbuffers/protobuf/archive/refs/tags/v%s.zip" % _PROTOBUF_COMMIT, + ], + patch_cmds = [ + "touch BUILD", + ], +) + +load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") + +protobuf_deps() + http_archive( name = "zlib", build_file = "@com_google_protobuf//:third_party/zlib.BUILD", @@ -21,24 +39,24 @@ http_archive( # Generic Bazel Support # ################################################################################ -http_archive( - name = "rules_proto", - sha256 = "6fb6767d1bef535310547e03247f7518b03487740c11b6c6adb7952033fe1295", - strip_prefix = "rules_proto-6.0.2", - url = "https://github.com/bazelbuild/rules_proto/releases/download/6.0.2/rules_proto-6.0.2.tar.gz", -) - -load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies") - -rules_proto_dependencies() - -load("@rules_proto//proto:setup.bzl", "rules_proto_setup") - -rules_proto_setup() - -load("@rules_proto//proto:toolchains.bzl", "rules_proto_toolchains") - -rules_proto_toolchains() +# http_archive( +# name = "rules_proto", +# sha256 = "6fb6767d1bef535310547e03247f7518b03487740c11b6c6adb7952033fe1295", +# strip_prefix = "rules_proto-6.0.2", +# url = "https://github.com/bazelbuild/rules_proto/releases/download/6.0.2/rules_proto-6.0.2.tar.gz", +# ) +# +# load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies") +# +# rules_proto_dependencies() +# +# load("@rules_proto//proto:setup.bzl", "rules_proto_setup") +# +# rules_proto_setup() +# +# load("@rules_proto//proto:toolchains.bzl", "rules_proto_toolchains") +# +# rules_proto_toolchains() # Install version 0.9.0 of rules_foreign_cc, as default version causes an # invalid escape sequence error to be raised, which can't be avoided with @@ -61,27 +79,29 @@ rules_foreign_cc_dependencies() http_archive( name = "bazel_skylib", - sha256 = "97e70364e9249702246c0e9444bccdc4b847bed1eb03c5a3ece4f83dfe6abc44", + sha256 = "3b5b49006181f5f8ff626ef8ddceaa95e9bb8ad294f7b5d7b11ea9f7ddaf8c59", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.0.2/bazel-skylib-1.0.2.tar.gz", - "https://github.com/bazelbuild/bazel-skylib/releases/download/1.0.2/bazel-skylib-1.0.2.tar.gz", + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.9.0/bazel-skylib-1.9.0.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.9.0/bazel-skylib-1.9.0.tar.gz", ], ) -_PROTOBUF_COMMIT = "4.25.6" # 4.25.6 - http_archive( - name = "com_google_protobuf", - sha256 = "ff6e9c3db65f985461d200c96c771328b6186ee0b10bc7cb2bbc87cf02ebd864", - strip_prefix = "protobuf-%s" % _PROTOBUF_COMMIT, - urls = [ - "https://github.com/protocolbuffers/protobuf/archive/v4.25.6.zip", - ], + name = "rules_python", + sha256 = "c68bdc4fbec25de5b5493b8819cfc877c4ea299c0dcb15c244c5a00208cde311", + strip_prefix = "rules_python-0.31.0", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.31.0/rules_python-0.31.0.tar.gz", +) + +load("@rules_python//python:repositories.bzl", "py_repositories") +py_repositories() + +local_repository( + name = "compatibility_proxy", + path = "third_party/dummy_compatibility_proxy", ) -load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") -protobuf_deps() # Use the last commit on the relevant release branch to update. # LINT.IfChange(arrow_archive_version) @@ -101,9 +121,28 @@ http_archive( http_archive( name = "com_google_absl", - urls = ["https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.tar.gz"], - strip_prefix = "abseil-cpp-20230802.1", - sha256 = "987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed", + urls = ["https://github.com/abseil/abseil-cpp/archive/refs/tags/20250127.2.tar.gz"], + strip_prefix = "abseil-cpp-20250127.2", + sha256 = "f5a67394128fb4d9a18124820026014591942d9c882d9055d4d2412b13bf1c91", + patch_cmds = [ + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/debugging/BUILD.bazel", + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/base/BUILD.bazel", + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/random/internal/BUILD.bazel", + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/synchronization/BUILD.bazel", + ], +) + +http_archive( + name = "abseil-cpp", + urls = ["https://github.com/abseil/abseil-cpp/archive/refs/tags/20250127.2.tar.gz"], + strip_prefix = "abseil-cpp-20250127.2", + sha256 = "f5a67394128fb4d9a18124820026014591942d9c882d9055d4d2412b13bf1c91", + patch_cmds = [ + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/debugging/BUILD.bazel", + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/base/BUILD.bazel", + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/random/internal/BUILD.bazel", + "sed -i.bak '/@rules_cc\\/\\/cc\\/compiler:emscripten/d' absl/synchronization/BUILD.bazel", + ], ) @@ -128,14 +167,15 @@ http_archive( # TODO(b/177694034): Follow the new format for tensorflow import after TF 2.5. #here -TENSORFLOW_COMMIT = "3c92ac03cab816044f7b18a86eb86aa01a294d95" # 2.17.1 +# Corresponds to tag v2.21.0 +TENSORFLOW_COMMIT = "a481b10260dfdf833a1b16007eead49c1d7febf3" http_archive( name = "org_tensorflow_no_deps", patches = [ "//third_party:tensorflow_expose_example_proto.patch", ], - sha256 = "317dd95c4830a408b14f3e802698eb68d70d81c7c7cfcd3d28b0ba023fe84a68", + sha256 = "ef3568bb4865d6c1b2564fb5689c19b6b9a5311572cd1f2ff9198636a8520921", strip_prefix = "tensorflow-%s" % TENSORFLOW_COMMIT, urls = [ "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % TENSORFLOW_COMMIT, @@ -152,6 +192,13 @@ http_archive( urls = ["https://github.com/pybind/pybind11/archive/%s.zip" % PYBIND11_COMMIT], ) +http_archive( + name = "com_google_googletest", + urls = ["https://github.com/google/googletest/archive/refs/tags/v1.14.0.tar.gz"], + strip_prefix = "googletest-1.14.0", + sha256 = "8ad598c73ad796e0d8280b082cebd82a630d73e73cd3c70057938a6501bba5d7", +) + load("//third_party:python_configure.bzl", "local_python_configure") local_python_configure(name = "local_config_python") @@ -236,7 +283,7 @@ http_archive( # Specify the minimum required bazel version. load("@bazel_skylib//lib:versions.bzl", "versions") -versions.check("6.5.0") +versions.check("7.7.0") # Please add all new TensorFlow Data Validation dependencies in workspace.bzl. load("//tensorflow_data_validation:workspace.bzl", "tf_data_validation_workspace") diff --git a/pyproject.toml b/pyproject.toml index 0db16c19..b12fd43e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ requires = [ "setuptools", "wheel", # Required for using org_tensorflow bazel repository. - "numpy~=1.22.0", + "numpy>=1.22.0", ] [tool.ruff] diff --git a/setup.py b/setup.py index 8e3a410c..a6645298 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,8 @@ class _BazelBuildCommand(setuptools.Command): file. """ + user_options = [] + def initialize_options(self): pass @@ -80,6 +82,14 @@ def finalize_options(self): self._additional_build_options = ["--macos_minimum_os=10.14"] def run(self): + bazelversion_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), ".bazelversion" + ) + if os.path.exists(bazelversion_path): + with open(bazelversion_path) as f: + bazel_version = f.read().strip() + os.environ["USE_BAZEL_VERSION"] = bazel_version + subprocess.check_call( [self._bazel_cmd, "run", "-c", "opt"] + self._additional_build_options @@ -179,9 +189,10 @@ def select_constraint(default, nightly=None, git_master=None): "Operating System :: POSIX :: Linux", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3 :: Only", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", @@ -195,19 +206,17 @@ def select_constraint(default, nightly=None, git_master=None): # six, and protobuf) with TF. install_requires=[ "absl-py>=0.9,<2.0.0", - 'apache-beam[gcp]>=2.53,<3;python_version>="3.11"', - 'apache-beam[gcp]>=2.50,<2.51;python_version<"3.11"', + "apache-beam[gcp]>=2.53,<3", # TODO(b/139941423): Consider using multi-processing provided by # Beam's DirectRunner. "joblib>=1.2.0", # Dependency for multi-processing. "numpy>=1.22.0", - "pandas>=1.0,<2", - 'protobuf>=4.25.2,<6.0.0;python_version>="3.11"', - 'protobuf>=4.21.6,<6.0.0;python_version<"3.11"', - "pyarrow>=10,<11", + "pandas>=1.0,<3", + "protobuf>=6.0.0,<7.0.0", + "pyarrow>=14", "pyfarmhash>=0.2.2,<0.4", "six>=1.12,<2", - "tensorflow>=2.17,<2.18", + "tensorflow>=2.21,<2.22", "tensorflow-metadata" + select_constraint( default=">=1.17.1,<1.18", @@ -233,7 +242,7 @@ def select_constraint(default, nightly=None, git_master=None): ], "all": _make_all_extra_requirements(), }, - python_requires=">=3.9,<4", + python_requires=">=3.10,<4", packages=find_packages(), include_package_data=True, package_data={"": ["*.lib", "*.pyd", "*.so"]}, diff --git a/tensorflow_data_validation/BUILD b/tensorflow_data_validation/BUILD index 198c42b7..d7f16487 100644 --- a/tensorflow_data_validation/BUILD +++ b/tensorflow_data_validation/BUILD @@ -38,3 +38,9 @@ sh_binary( ], }), ) + +py_library( + name = "tensorflow_data_validation", + srcs = glob(["*.py"], exclude=["*_test.py"]), + visibility = ["//visibility:public"], +) diff --git a/tensorflow_data_validation/anomalies/BUILD b/tensorflow_data_validation/anomalies/BUILD index 7d97f36e..775c163f 100644 --- a/tensorflow_data_validation/anomalies/BUILD +++ b/tensorflow_data_validation/anomalies/BUILD @@ -357,7 +357,6 @@ cc_library( deps = [ ":map_util", ":path", - "//third_party/tensorflow/tsl/platform:protobuf", "@com_github_tensorflow_metadata//tensorflow_metadata/proto/v0:metadata_v0_proto_cc_pb2", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", diff --git a/tensorflow_data_validation/anomalies/feature_statistics_validator_test.cc b/tensorflow_data_validation/anomalies/feature_statistics_validator_test.cc index 1212b9df..296e77fd 100644 --- a/tensorflow_data_validation/anomalies/feature_statistics_validator_test.cc +++ b/tensorflow_data_validation/anomalies/feature_statistics_validator_test.cc @@ -26,6 +26,10 @@ limitations under the License. #include "tensorflow_metadata/proto/v0/schema.pb.h" #include "tensorflow_data_validation/anomalies/proto/validation_metadata.pb.h" +#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok()) +#define EXPECT_OK(expr) EXPECT_TRUE((expr).ok()) +#define CHECK_OK(expr) ASSERT_TRUE((expr).ok()) + namespace tensorflow { namespace data_validation { namespace { diff --git a/tensorflow_data_validation/anomalies/feature_util_test.cc b/tensorflow_data_validation/anomalies/feature_util_test.cc index 093d7ab3..ed0522c3 100644 --- a/tensorflow_data_validation/anomalies/feature_util_test.cc +++ b/tensorflow_data_validation/anomalies/feature_util_test.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow_data_validation/anomalies/feature_util.h" +#include +#include #include #include #include @@ -62,8 +64,8 @@ Feature GetFeatureProtoOrDie( return feature_proto; } } - LOG(FATAL) << "Name " << field_name << " not found in " - << schema_proto.DebugString(); + fprintf(stderr, "Name %s not found in %s\n", field_name.c_str(), schema_proto.DebugString().c_str()); + abort(); } TEST(FeatureUtilTest, ClearDomain) { diff --git a/tensorflow_data_validation/anomalies/metrics_test.cc b/tensorflow_data_validation/anomalies/metrics_test.cc index 5dea858d..4f15500d 100644 --- a/tensorflow_data_validation/anomalies/metrics_test.cc +++ b/tensorflow_data_validation/anomalies/metrics_test.cc @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow_metadata/proto/v0/schema.pb.h" #include "tensorflow_metadata/proto/v0/statistics.pb.h" +#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok()) + namespace tensorflow { namespace data_validation { diff --git a/tensorflow_data_validation/anomalies/path_test.cc b/tensorflow_data_validation/anomalies/path_test.cc index 2b324d42..95a015e1 100644 --- a/tensorflow_data_validation/anomalies/path_test.cc +++ b/tensorflow_data_validation/anomalies/path_test.cc @@ -19,6 +19,8 @@ limitations under the License. #include "tensorflow_data_validation/anomalies/test_util.h" #include "tensorflow_metadata/proto/v0/path.pb.h" +#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok()) + namespace tensorflow { namespace data_validation { namespace { diff --git a/tensorflow_data_validation/anomalies/schema_anomalies_test.cc b/tensorflow_data_validation/anomalies/schema_anomalies_test.cc index 41a9404a..0c2f4816 100644 --- a/tensorflow_data_validation/anomalies/schema_anomalies_test.cc +++ b/tensorflow_data_validation/anomalies/schema_anomalies_test.cc @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow_metadata/proto/v0/anomalies.pb.h" #include "tensorflow_metadata/proto/v0/statistics.pb.h" +#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok()) + namespace tensorflow { namespace data_validation { namespace { diff --git a/tensorflow_data_validation/anomalies/schema_test.cc b/tensorflow_data_validation/anomalies/schema_test.cc index 520c6964..97eff0cb 100644 --- a/tensorflow_data_validation/anomalies/schema_test.cc +++ b/tensorflow_data_validation/anomalies/schema_test.cc @@ -30,6 +30,9 @@ limitations under the License. #include "tensorflow_metadata/proto/v0/schema.pb.h" #include "tensorflow_metadata/proto/v0/statistics.pb.h" +#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok()) +#define EXPECT_OK(expr) EXPECT_TRUE((expr).ok()) + namespace tensorflow { namespace data_validation { namespace { diff --git a/tensorflow_data_validation/anomalies/test_util.cc b/tensorflow_data_validation/anomalies/test_util.cc index 522d36c5..f2e5e424 100644 --- a/tensorflow_data_validation/anomalies/test_util.cc +++ b/tensorflow_data_validation/anomalies/test_util.cc @@ -25,7 +25,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "tensorflow_data_validation/anomalies/map_util.h" #include "tensorflow_data_validation/anomalies/path.h" -#include "tensorflow/tsl/platform/protobuf.h" +#include #include "tensorflow_metadata/proto/v0/anomalies.pb.h" #include "tensorflow_metadata/proto/v0/schema.pb.h" @@ -39,7 +39,7 @@ ProtoStringMatcher::ProtoStringMatcher(const string& expected) ProtoStringMatcher::ProtoStringMatcher(const google::protobuf::Message& expected) : expected_([&]() -> std::string { std::string result; - tsl::protobuf::TextFormat::PrintToString(expected, &result); + google::protobuf::TextFormat::PrintToString(expected, &result); return result; }()) {} diff --git a/tensorflow_data_validation/anomalies/test_util.h b/tensorflow_data_validation/anomalies/test_util.h index 7a2f6c3b..265410b4 100644 --- a/tensorflow_data_validation/anomalies/test_util.h +++ b/tensorflow_data_validation/anomalies/test_util.h @@ -26,6 +26,8 @@ limitations under the License. #include #include #include +#include +#include #include "google/protobuf/text_format.h" #include @@ -71,7 +73,9 @@ class ProtoStringMatcher { template T CreateProto(const string& textual_proto) { T proto; - CHECK(TextFormat::ParseFromString(textual_proto, &proto)); + if (!TextFormat::ParseFromString(textual_proto, &proto)) { + abort(); + } return proto; } @@ -100,8 +104,10 @@ inline ::testing::PolymorphicMatcher EqualsProto( template T ParseTextProtoOrDie(const string& input) { T result; - CHECK(TextFormat::ParseFromString(input, &result)) - << "Failed to parse: " << input; + if (!TextFormat::ParseFromString(input, &result)) { + fprintf(stderr, "Failed to parse: %s\n", input.c_str()); + abort(); + } return result; } diff --git a/tensorflow_data_validation/build_macros.bzl b/tensorflow_data_validation/build_macros.bzl index e0dd8c55..3980868e 100644 --- a/tensorflow_data_validation/build_macros.bzl +++ b/tensorflow_data_validation/build_macros.bzl @@ -78,6 +78,7 @@ def tfdv_pybind_extension( # "-Wl,-rename_section,__TEXT,text_env,__TEXT,__text", "-Wl,-w", "-Wl,-exported_symbols_list,$(location %s)" % exported_symbols_file, + "-Wl,-undefined,dynamic_lookup", ], "//conditions:default": [ "-Wl,--version-script", diff --git a/tensorflow_data_validation/skew/feature_skew_detector_test.py b/tensorflow_data_validation/skew/feature_skew_detector_test.py index cd75c767..4e8fa054 100644 --- a/tensorflow_data_validation/skew/feature_skew_detector_test.py +++ b/tensorflow_data_validation/skew/feature_skew_detector_test.py @@ -928,7 +928,7 @@ def _confusion_result( }, ) def test_confusion_analysis_errors(self, input_example, expected_error_regex): - with self.assertRaisesRegex(ValueError, expected_error_regex): + with self.assertRaisesRegex(RuntimeError, expected_error_regex): # Use the direct runner here to get exception propagation. with beam.Pipeline() as p: baseline_examples = p | "Create Base" >> beam.Create([input_example]) diff --git a/tensorflow_data_validation/statistics/generators/lift_stats_generator_test.py b/tensorflow_data_validation/statistics/generators/lift_stats_generator_test.py index 54008c23..49c92076 100644 --- a/tensorflow_data_validation/statistics/generators/lift_stats_generator_test.py +++ b/tensorflow_data_validation/statistics/generators/lift_stats_generator_test.py @@ -1345,7 +1345,7 @@ def test_lift_weighted_missing_weight(self): ) examples = [(None, e) for e in examples] with self.assertRaisesRegex( - ValueError, + RuntimeError, r'Weight column "weight" must have exactly one ' "value in each example.*", ): with beam.Pipeline() as p: @@ -1386,7 +1386,7 @@ def test_lift_weighted_weight_is_none(self): ) examples = [(None, e) for e in examples] with self.assertRaisesRegex( - ValueError, r'Weight column "weight" cannot be null.*' + RuntimeError, r'Weight column "weight" cannot be null.*' ): with beam.Pipeline() as p: _ = p | beam.Create(examples) | generator.ptransform diff --git a/tensorflow_data_validation/tools/BUILD b/tensorflow_data_validation/tools/BUILD index 4ba4127e..e5deff31 100644 --- a/tensorflow_data_validation/tools/BUILD +++ b/tensorflow_data_validation/tools/BUILD @@ -1,6 +1,6 @@ # Opensource tools, not part of the pip package. -load("//third_party/bazel_rules/rules_python/python:py_binary.bzl", "py_binary") +load("@rules_python//python:defs.bzl", "py_binary") licenses(["notice"]) @@ -10,9 +10,6 @@ py_binary( name = "build_docs", srcs = ["build_docs.py"], deps = [ - "# Implicit absl dependency:app", - "# Implicit apache_beam dependency.", - "# Implicit tensorflow_docs dependency./api_generator", "//tensorflow_data_validation", ], ) diff --git a/tensorflow_data_validation/workspace.bzl b/tensorflow_data_validation/workspace.bzl index 9f67ea52..15537aff 100644 --- a/tensorflow_data_validation/workspace.bzl +++ b/tensorflow_data_validation/workspace.bzl @@ -8,12 +8,12 @@ def tf_data_validation_workspace(): git_repository( name = "com_github_tensorflow_metadata", - branch = "master", - remote = "https://github.com/tensorflow/metadata.git", + branch = "align-tf-2.21", + remote = "https://github.com/vkarampudi/metadata.git", ) git_repository( name = "com_github_tfx_bsl", - branch = "master", - remote = "https://github.com/tensorflow/tfx-bsl", + branch = "testing", + remote = "https://github.com/vkarampudi/tfx-bsl.git", ) diff --git a/third_party/dummy_compatibility_proxy/BUILD b/third_party/dummy_compatibility_proxy/BUILD new file mode 100644 index 00000000..e69de29b diff --git a/third_party/dummy_compatibility_proxy/WORKSPACE b/third_party/dummy_compatibility_proxy/WORKSPACE new file mode 100644 index 00000000..e69de29b diff --git a/third_party/dummy_compatibility_proxy/proxy.bzl b/third_party/dummy_compatibility_proxy/proxy.bzl new file mode 100644 index 00000000..28f40240 --- /dev/null +++ b/third_party/dummy_compatibility_proxy/proxy.bzl @@ -0,0 +1,4 @@ +JavaInfo = provider() +java_common = struct() +java_common_internal_compile = struct() +java_info_internal_merge = struct()