From 70230ee18d7e717f737591bc9523047c28d44632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 5 Jun 2026 12:26:38 +0200 Subject: [PATCH] Initial exploration for migration from minio to seaweedfs --- .github/workflows/cpp.yml | 9 +- .github/workflows/cpp_extra.yml | 4 +- .github/workflows/python.yml | 4 +- .github/workflows/r.yml | 6 +- .github/workflows/r_extra.yml | 4 +- ci/docker/alpine-linux-3.22-cpp.dockerfile | 4 +- ci/docker/alpine-linux-3.22-r.dockerfile | 2 +- ci/docker/conda-cpp.dockerfile | 4 +- ci/docker/debian-13-cpp.dockerfile | 4 +- ci/docker/debian-experimental-cpp.dockerfile | 4 +- ci/docker/fedora-42-cpp.dockerfile | 4 +- ci/docker/fedora-42-r-clang.dockerfile | 2 +- ci/docker/linux-apt-r.dockerfile | 4 +- ci/docker/linux-r.dockerfile | 2 +- ci/docker/ubuntu-22.04-cpp-minimal.dockerfile | 4 +- ci/docker/ubuntu-22.04-cpp.dockerfile | 4 +- ci/docker/ubuntu-24.04-cpp-minimal.dockerfile | 4 +- ci/docker/ubuntu-24.04-cpp.dockerfile | 4 +- ci/scripts/cpp_test.sh | 2 +- ci/scripts/install_seaweedfs.sh | 92 +++++++++++++++++++ ci/scripts/r_install_system_dependencies.sh | 4 +- dev/tasks/python-wheels/github.osx.yml | 4 +- .../continuous_integration/docker.rst | 2 +- python/pyarrow/tests/conftest.py | 11 ++- python/pyarrow/tests/test_dataset.py | 6 +- python/pyarrow/tests/test_fs.py | 4 +- python/pyarrow/tests/util.py | 1 + 27 files changed, 147 insertions(+), 52 deletions(-) create mode 100755 ci/scripts/install_seaweedfs.sh diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 90c06c7be0e4..c19140ed85b4 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -33,7 +33,7 @@ on: - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' - 'ci/scripts/install_gcs_testbench.sh' - - 'ci/scripts/install_minio.sh' + - 'ci/scripts/install_seaweedfs.sh' - 'ci/scripts/msys2_*' - 'ci/scripts/util_*' - 'cpp/**' @@ -50,7 +50,7 @@ on: - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' - 'ci/scripts/install_gcs_testbench.sh' - - 'ci/scripts/install_minio.sh' + - 'ci/scripts/install_seaweedfs.sh' - 'ci/scripts/msys2_*' - 'ci/scripts/util_*' - 'cpp/**' @@ -235,10 +235,10 @@ jobs: # Remove once the runner ships a newer Homebrew. brew update brew bundle --file=cpp/Brewfile - - name: Install MinIO + - name: Install SeaweedFS run: | $(brew --prefix bash)/bin/bash \ - ci/scripts/install_minio.sh latest ${ARROW_HOME} + ci/scripts/install_seaweedfs.sh 4.31 ${ARROW_HOME} - name: Set up Python uses: actions/setup-python@v6 with: @@ -381,6 +381,7 @@ jobs: # https://github.com/apache/arrow/issues/48593 ci/scripts/download_tz_database.sh - name: Download MinIO + # TODO: Install seaweedfs # Match the version pinned in ci/scripts/install_minio.sh. shell: msys2 {0} run: | diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 73b06f9deec5..d15b381bf245 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -34,7 +34,7 @@ on: - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' - 'ci/scripts/install_gcs_testbench.sh' - - 'ci/scripts/install_minio.sh' + - 'ci/scripts/install_seaweedfs.sh' - 'ci/scripts/msys2_*' - 'ci/scripts/util_*' - 'cpp/**' @@ -57,7 +57,7 @@ on: - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' - 'ci/scripts/install_gcs_testbench.sh' - - 'ci/scripts/install_minio.sh' + - 'ci/scripts/install_seaweedfs.sh' - 'ci/scripts/msys2_*' - 'ci/scripts/util_*' - 'cpp/**' diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 59a180f9cb58..725333e52914 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -205,10 +205,10 @@ jobs: python -m pip install \ -r python/requirements-build.txt \ -r python/requirements-test.txt - - name: Install MinIO + - name: Install SeaweedFS run: | $(brew --prefix bash)/bin/bash \ - ci/scripts/install_minio.sh latest /usr/local + ci/scripts/install_seaweedfs.sh 4.31 /usr/local - name: Setup ccache shell: bash run: ci/scripts/ccache_setup.sh diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index f4c5d8a5bd2d..ecff6d0f1db0 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -31,7 +31,7 @@ on: - "ci/etc/rprofile" - "ci/scripts/PKGBUILD" - "ci/scripts/cpp_*.sh" - - "ci/scripts/install_minio.sh" + - "ci/scripts/install_seaweedfs.sh" - "ci/scripts/r_*.sh" - "cpp/**" - "compose.yaml" @@ -44,7 +44,7 @@ on: - "ci/etc/rprofile" - "ci/scripts/PKGBUILD" - "ci/scripts/cpp_*.sh" - - "ci/scripts/install_minio.sh" + - "ci/scripts/install_seaweedfs.sh" - "ci/scripts/r_*.sh" - "cpp/**" - "compose.yaml" @@ -279,7 +279,7 @@ jobs: shell: bash run: | mkdir -p "$HOME/.local/bin" - ci/scripts/install_minio.sh latest "$HOME/.local" + ci/scripts/install_seaweedfs.sh 4.31 "$HOME/.local" echo "$HOME/.local/bin" >> $GITHUB_PATH - name: Download Timezone Database shell: bash diff --git a/.github/workflows/r_extra.yml b/.github/workflows/r_extra.yml index eac55f72bf69..d775fe1b9d1e 100644 --- a/.github/workflows/r_extra.yml +++ b/.github/workflows/r_extra.yml @@ -31,7 +31,7 @@ on: - 'ci/etc/rprofile' - 'ci/scripts/PKGBUILD' - 'ci/scripts/cpp_*.sh' - - 'ci/scripts/install_minio.sh' + - 'ci/scripts/install_seaweedfs.sh' - 'ci/scripts/r_*.sh' - 'cpp/**' - 'compose.yaml' @@ -49,7 +49,7 @@ on: - 'ci/etc/rprofile' - 'ci/scripts/PKGBUILD' - 'ci/scripts/cpp_*.sh' - - 'ci/scripts/install_minio.sh' + - 'ci/scripts/install_seaweedfs.sh' - 'ci/scripts/r_*.sh' - 'cpp/**' - 'compose.yaml' diff --git a/ci/docker/alpine-linux-3.22-cpp.dockerfile b/ci/docker/alpine-linux-3.22-cpp.dockerfile index c3a2a58ef959..7e3778f0ad5f 100644 --- a/ci/docker/alpine-linux-3.22-cpp.dockerfile +++ b/ci/docker/alpine-linux-3.22-cpp.dockerfile @@ -70,8 +70,8 @@ RUN apk add \ ln -s /usr/share/zoneinfo/Etc/UTC /etc/localtime && \ echo "Etc/UTC" > /etc/timezone -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/alpine-linux-3.22-r.dockerfile b/ci/docker/alpine-linux-3.22-r.dockerfile index 887cb6445e7d..470369bb01c9 100644 --- a/ci/docker/alpine-linux-3.22-r.dockerfile +++ b/ci/docker/alpine-linux-3.22-r.dockerfile @@ -67,7 +67,7 @@ RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> /usr/lib/R/e COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ COPY ci/etc/rprofile /arrow/ci/etc/ COPY ci/scripts/r_install_system_dependencies.sh /arrow/ci/scripts/ -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/r_docker_configure.sh diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index a387fb266990..65346d0b0344 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -19,8 +19,8 @@ ARG repo ARG arch FROM ${repo}:${arch}-conda -COPY ci/scripts/install_minio.sh /arrow/ci/scripts -RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /opt/conda # Unless overridden use Python 3.10 # Google GCS fails building with Python 3.11 at the moment. diff --git a/ci/docker/debian-13-cpp.dockerfile b/ci/docker/debian-13-cpp.dockerfile index 951ee98e6a21..674d80a03439 100644 --- a/ci/docker/debian-13-cpp.dockerfile +++ b/ci/docker/debian-13-cpp.dockerfile @@ -95,8 +95,8 @@ RUN apt-get update -y -q && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/debian-experimental-cpp.dockerfile b/ci/docker/debian-experimental-cpp.dockerfile index 7e00378ea6b6..704a7492b509 100644 --- a/ci/docker/debian-experimental-cpp.dockerfile +++ b/ci/docker/debian-experimental-cpp.dockerfile @@ -98,8 +98,8 @@ RUN if [ -n "${gcc}" ]; then \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/fedora-42-cpp.dockerfile b/ci/docker/fedora-42-cpp.dockerfile index 9a8533688fe0..90459457f09a 100644 --- a/ci/docker/fedora-42-cpp.dockerfile +++ b/ci/docker/fedora-42-cpp.dockerfile @@ -68,8 +68,8 @@ RUN dnf update -y && \ which \ zlib-devel -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/fedora-42-r-clang.dockerfile b/ci/docker/fedora-42-r-clang.dockerfile index 9bc970e06097..9d24a0661dc9 100644 --- a/ci/docker/fedora-42-r-clang.dockerfile +++ b/ci/docker/fedora-42-r-clang.dockerfile @@ -205,7 +205,7 @@ RUN mkdir -p /root/.R && \ COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ COPY ci/etc/rprofile /arrow/ci/etc/ COPY ci/scripts/r_install_system_dependencies.sh /arrow/ci/scripts/ -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/r_docker_configure.sh diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 83a7b8b9baad..58396bb3cf00 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -93,8 +93,8 @@ RUN python3 -m venv ${ARROW_PYTHON_VENV} && \ pip install -U pip setuptools wheel && \ pip install -r arrow/python/requirements-build.txt -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index da378eac4302..213aeb6b6032 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -43,7 +43,7 @@ ENV PATH "${RPREFIX}/bin:${PATH}" COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ COPY ci/etc/rprofile /arrow/ci/etc/ COPY ci/scripts/r_install_system_dependencies.sh /arrow/ci/scripts/ -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/r_docker_configure.sh diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile index 80e97c440a45..ef20a5e62d55 100644 --- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile @@ -74,8 +74,8 @@ ARG cmake COPY ci/scripts/install_cmake.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local/ -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 3572f156ba8c..7a65e1a2e9af 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -169,8 +169,8 @@ ARG cmake COPY ci/scripts/install_cmake.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local/ -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile index 904cfa99dfef..e7002d3bf65a 100644 --- a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile @@ -71,8 +71,8 @@ RUN latest_system_llvm=14 && \ apt-get clean && \ rm -rf /var/lib/apt/lists* -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 3798e0817326..410b33c04fa7 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -155,8 +155,8 @@ RUN if [ "${gcc}" = "" ]; then \ update-alternatives --set c++ /usr/bin/g++; \ fi -COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_minio.sh latest /usr/local +COPY ci/scripts/install_seaweedfs.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index 2f88cdc819b2..c5e59cf964d3 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -50,7 +50,7 @@ fi if ! type storage-testbench >/dev/null 2>&1; then exclude_tests+=("arrow-gcsfs-test") fi -if ! type minio >/dev/null 2>&1; then +if ! type weed >/dev/null 2>&1; then exclude_tests+=("arrow-s3fs-test") fi case "$(uname)" in diff --git a/ci/scripts/install_seaweedfs.sh b/ci/scripts/install_seaweedfs.sh new file mode 100755 index 000000000000..4d5994096dc6 --- /dev/null +++ b/ci/scripts/install_seaweedfs.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eu + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +# The version is the SeaweedFS release tag without the leading "v" (e.g. 4.31). +version=$1 +prefix=$2 + +declare -A archs +archs=([x86_64]=amd64 + [arm64]=arm64 + [aarch64]=arm64) + +arch=$(uname -m) +if [ -z "${archs[$arch]}" ]; then + echo "Unsupported architecture: ${arch}" + exit 0 +fi +arch=${archs[$arch]} + +# SeaweedFS ships statically linked Go binaries, so there is no +# glibc/musl distinction: a single Linux archive works everywhere. +platform=$(uname) +extension=tar.gz +case ${platform} in + Linux) + platform=linux + ;; + Darwin) + platform=darwin + ;; + MSYS_NT*|MINGW64_NT*) + platform=windows + extension=zip + ;; + *) + echo "Unsupported platform: ${platform}" + exit 0 + ;; +esac + +download() +{ + local output=$1 + local url=$2 + + mkdir -p "$(dirname "${output}")" + if type wget > /dev/null 2>&1; then + wget -nv --output-document "${output}" "${url}" + else + curl --fail --location --output "${output}" "${url}" + fi +} + +if [[ ! -x ${prefix}/bin/weed ]]; then + url="https://github.com/seaweedfs/seaweedfs/releases/download/${version}/${platform}_${arch}.${extension}" + echo "Fetching ${url}..." + tmp=$(mktemp -d) + archive="${tmp}/seaweedfs.${extension}" + download "${archive}" "${url}" + if [ "${extension}" = "zip" ]; then + unzip -q "${archive}" -d "${tmp}" + else + tar -xzf "${archive}" -C "${tmp}" + fi + mkdir -p "${prefix}/bin" + mv "${tmp}/weed" "${prefix}/bin/weed" + chmod +x "${prefix}/bin/weed" + rm -rf "${tmp}" +fi diff --git a/ci/scripts/r_install_system_dependencies.sh b/ci/scripts/r_install_system_dependencies.sh index 237e0e9408b3..80c7a9472a2c 100755 --- a/ci/scripts/r_install_system_dependencies.sh +++ b/ci/scripts/r_install_system_dependencies.sh @@ -53,8 +53,8 @@ esac if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then # The Dockerfile should have put this file here - if [ "$ARROW_S3" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then - "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" latest /usr/local + if [ "$ARROW_S3" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_seaweedfs.sh" ] && [ "`which wget`" ]; then + "${ARROW_SOURCE_HOME}/ci/scripts/install_seaweedfs.sh" 4.31 /usr/local fi if [ "$ARROW_GCS" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh" ]; then diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml index 3de8c3c021bf..765a1f863ec9 100644 --- a/dev/tasks/python-wheels/github.osx.yml +++ b/dev/tasks/python-wheels/github.osx.yml @@ -123,10 +123,10 @@ jobs: PIPX_BASE_PYTHON: {{ '${{ steps.gcs-python-install.outputs.python-path }}' }} run: arrow/ci/scripts/install_gcs_testbench.sh default - - name: Install MinIO + - name: Install SeaweedFS run: | $(brew --prefix bash)/bin/bash \ - arrow/ci/scripts/install_minio.sh latest /usr/local + arrow/ci/scripts/install_seaweedfs.sh 4.31 /usr/local - name: Test Wheel env: diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst index 5b02bfe5be7f..0557037e0b51 100644 --- a/docs/source/developers/continuous_integration/docker.rst +++ b/docs/source/developers/continuous_integration/docker.rst @@ -222,7 +222,7 @@ responsible for. Like: - ``docs_build.sh``: build the Sphinx documentation. - ``integration_dask.sh``: execute the dask integration tests. - ``integration_pandas.sh``: execute the pandas integration tests. -- ``install_minio.sh``: install minio server for multiple platforms. +- ``install_seaweedfs.sh``: install SeaweedFS server for multiple platforms. - ``install_conda.sh``: install miniconda for multiple platforms. - ``install_gcs_testbench.sh``: install the GCS testbench for multiple platforms. diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index b3c90707a80d..b80618ddcadf 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -200,7 +200,7 @@ def wrapper(*args, **kwargs): def s3_server(s3_connection, tmpdir_factory): @retry(attempts=5, delay=1, backoff=2) def minio_server_health_check(address): - resp = urllib.request.urlopen(f"http://{address}/minio/health/live") + resp = urllib.request.urlopen(f"http://{address}/status") assert resp.getcode() == 200 tmpdir = tmpdir_factory.getbasetemp() @@ -209,17 +209,18 @@ def minio_server_health_check(address): address = f'{host}:{port}' env = os.environ.copy() env.update({ + 'WEED_FILER_OPTIONS_RECURSIVE_DELETE': 'true', 'MINIO_ACCESS_KEY': access_key, 'MINIO_SECRET_KEY': secret_key }) - - args = ['minio', '--compat', 'server', '--quiet', '--address', - address, tmpdir] + # '--quiet', + args = ['weed', 'server', f'-ip={host}', '-s3', f'-s3.port={port}', + f'-dir={tmpdir}', '-volume.max=100', '-volume.minFreeSpacePercent=0'] proc = None try: proc = subprocess.Popen(args, env=env) except OSError: - pytest.skip('`minio` command cannot be located') + pytest.skip('`weed` command cannot be located') else: # Wait for the server to startup before yielding minio_server_health_check(address) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 3afe3281cbc3..2509e02986b9 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -5126,7 +5126,7 @@ def test_write_dataset_s3(s3_example_simple): assert result.equals(table) -_minio_put_only_policy = """{ +_put_only_policy = """{ "Version": "2012-10-17", "Statement": [ { @@ -5157,7 +5157,7 @@ def test_write_dataset_s3_put_only(s3_server): # write dataset with s3 filesystem host, port, _, _ = s3_server['connection'] - _configure_s3_limited_user(s3_server, _minio_put_only_policy, + _configure_s3_limited_user(s3_server, _put_only_policy, 'test_dataset_limited_user', 'limited123') fs = S3FileSystem( access_key='test_dataset_limited_user', @@ -5206,7 +5206,7 @@ def test_write_dataset_s3_put_only(s3_server): existing_data_behavior='overwrite_or_ignore' ) - # Error enforced by minio / S3 service + # Error enforced by S3 service (seaweedfs) fs = S3FileSystem( access_key='limited', secret_key='limited123', diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 5bf1950c0654..ea599ae612cb 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -276,7 +276,7 @@ def subtree_s3fs(request, s3fs): ) -_minio_limited_policy = """{ +_limited_policy = """{ "Version": "2012-10-17", "Statement": [ { @@ -526,7 +526,7 @@ def skip_azure(fs, reason): @pytest.mark.s3 def test_s3fs_limited_permissions_create_bucket(s3_server): from pyarrow.fs import S3FileSystem - _configure_s3_limited_user(s3_server, _minio_limited_policy, + _configure_s3_limited_user(s3_server, _limited_policy, 'test_fs_limited_user', 'limited123') host, port, _, _ = s3_server['connection'] diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py index cf48ac807be1..8dda67e3e231 100644 --- a/python/pyarrow/tests/util.py +++ b/python/pyarrow/tests/util.py @@ -373,6 +373,7 @@ def _run_mc_command(mcdir, *args): raise ChildProcessError("Could not run mc") +# TODO: Migrate this to match seaweedfs not minio. def _configure_s3_limited_user(s3_server, policy, username, password): """ Attempts to use the mc command to configure the minio server