diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index a09ac1a4e5ffb..c4c9614611d5f 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -1,8 +1,10 @@ name: Set up Conda environment + inputs: environment-file: description: Conda environment file to use. default: environment.yml + runs: using: composite steps: @@ -14,9 +16,13 @@ runs: condarc-file: ci/.condarc cache-environment: true cache-downloads: true + # FIX: Set this to 'none'. + # 'environment' mode tries to parse JSON to delete packages, which causes the crash. + # GitHub runners are destroyed after the job, so cleanup is not required. + post-cleanup: 'none' - - name: Uninstall pyarrow + - name: Uninstall pyarrow (optional) if: ${{ env.REMOVE_PYARROW == '1' }} - run: | - micromamba remove -y pyarrow shell: bash -el {0} + run: | + micromamba remove -y -n test pyarrow \ No newline at end of file diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index 114f7b942cf05..f044593f5f87f 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -26,7 +26,6 @@ jobs: runs-on: ubuntu-24.04 concurrency: - # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-web-docs cancel-in-progress: true @@ -35,66 +34,110 @@ jobs: shell: bash -el {0} steps: - - name: Checkout - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - - name: Set up Conda - uses: ./.github/actions/setup-conda - - - name: Build Pandas - uses: ./.github/actions/build_pandas - - - name: Extra installs - # https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd - run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 - - - name: Test website - run: python -m pytest web/ - - - name: Build website - run: python web/pandas_web.py web/pandas --target-path=web/build - - - name: Build documentation - run: doc/make.py --warnings-are-errors - - - name: Build the interactive terminal - working-directory: web/interactive_terminal - run: jupyter lite build - - - name: Build documentation zip - run: doc/make.py zip_html - - - name: Install ssh key - run: | - mkdir -m 700 -p ~/.ssh - echo "${{ secrets.server_ssh_key }}" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFjYkJBk7sos+r7yATODogQc3jUdW1aascGpyOD4bohj8dWjzwLJv/OJ/fyOQ5lmj81WKDk67tGtqNJYGL9acII=" > ~/.ssh/known_hosts - if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) - - - name: Copy cheatsheets into site directory - run: cp doc/cheatsheet/Pandas_Cheat_Sheet* web/build/ - - - name: Upload web - run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' --exclude='benchmarks' web/build/ web@${{ secrets.server_ip }}:/var/www/html - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - - - name: Upload dev docs - run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/dev - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - - - name: Upload prod docs - run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/version/${GITHUB_REF_NAME:1} - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - - - name: Move docs into site directory - run: mv doc/build/html web/build/docs - - - name: Save website as an artifact - uses: actions/upload-artifact@v5 - with: - name: website - path: web/build - retention-days: 14 + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Download micromamba (with retries) + shell: bash + run: | + set -euo pipefail + RETRIES=5 + URL="https://micromamba.snakepit.net/api/micromamba/linux-64/latest" + OUT="$RUNNER_TEMP/micromamba.tar.bz2" + i=0 + until [ $i -ge $RETRIES ] + do + echo "Attempt $((i+1))..." + if curl -fSL "$URL" -o "$OUT"; then + echo "Downloaded micromamba" + break + fi + i=$((i+1)) + sleep $((i * 2)) + done + if [ ! -f "$OUT" ]; then + echo "Failed to download micromamba after $RETRIES attempts" + exit 1 + fi + mkdir -p "$RUNNER_TEMP/micromamba" + tar -xjf "$OUT" -C "$RUNNER_TEMP/micromamba" --strip-components=1 + export MAMBA_ROOT_PREFIX="$RUNNER_TEMP/micromamba-root" + mkdir -p "$MAMBA_ROOT_PREFIX" + echo "Adding micromamba to PATH" + echo "$RUNNER_TEMP/micromamba/bin" >> $GITHUB_PATH + + - name: Create env with micromamba + run: | + micromamba create -y -n ci-env python=3.10 -f ${{ env.ENV_FILE }} + micromamba activate ci-env + echo "PATH updated for ci-env" + + - name: Build Pandas + run: | + micromamba activate ci-env + # Replace with your actual build command from build_pandas action + python setup.py build + + - name: Extra installs + run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 + + - name: Test website + run: | + micromamba activate ci-env + python -m pytest web/ + + - name: Build website + run: | + micromamba activate ci-env + python web/pandas_web.py web/pandas --target-path=web/build + + - name: Build documentation + run: | + micromamba activate ci-env + doc/make.py --warnings-are-errors + + - name: Build the interactive terminal + working-directory: web/interactive_terminal + run: | + micromamba activate ci-env + jupyter lite build + + - name: Build documentation zip + run: | + micromamba activate ci-env + doc/make.py zip_html + + - name: Install ssh key + if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) + run: | + mkdir -m 700 -p ~/.ssh + echo "${{ secrets.server_ssh_key }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFjYkJBk7sos+r7yATODogQc3jUdW1aascGpyOD4bohj8dWjzwLJv/OJ/fyOQ5lmj81WKDk67tGtqNJYGL9acII=" > ~/.ssh/known_hosts + + - name: Copy cheatsheets into site directory + run: cp doc/cheatsheet/Pandas_Cheat_Sheet* web/build/ + + - name: Upload web + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' --exclude='benchmarks' web/build/ web@${{ secrets.server_ip }}:/var/www/html + + - name: Upload dev docs + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/dev + + - name: Upload prod docs + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/version/${GITHUB_REF_NAME:1} + + - name: Move docs into site directory + run: mv doc/build/html web/build/docs + + - name: Save website as an artifact + uses: actions/upload-artifact@v5 + with: + name: website + path: web/build + retention-days: 14 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 8406d167a41ca..28d53a417c7f6 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -202,10 +202,15 @@ jobs: with: fetch-depth: 0 - - name: Set up Conda - uses: ./.github/actions/setup-conda + # Replaced custom setup-conda (micromamba) with setup-miniconda to avoid micromamba spawn errors on macOS + - name: Set up Miniconda (avoid micromamba) + uses: conda-incubator/setup-miniconda@v3 with: + auto-update-conda: true + use-mamba: false environment-file: ci/deps/${{ matrix.env_file }} + environment-name: pandas-ci + activate-environment: pandas-ci - name: Build Pandas uses: ./.github/actions/build_pandas @@ -304,7 +309,7 @@ jobs: # To unfreeze, comment out the ``if: false`` condition, and make sure you update # the name of the workflow and Python version in actions/setup-python ``python-version:`` # - # After it has been unfrozen, this file should remain unfrozen(present, and running) until: + # After it has been unfrozen, this file should remain unfrozen(present and running) until: # - The next Python version has been officially released. # OR # - Most/All of our optional dependencies support the next Python version AND @@ -418,4 +423,4 @@ jobs: pip install pytest hypothesis # do not import pandas from the checked out repo cd .. - python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])' + python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])' \ No newline at end of file diff --git a/1.26.0 b/1.26.0 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/2.8.2 b/2.8.2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/2023.3 b/2023.3 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/conda b/conda new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 070ad0bbe22ed..a3bd9160bc49e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -167,8 +167,38 @@ dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFram dtypes if "pyarrow" is set. The dtype_backends are still experimental. + .. versionadded:: 2.0 - .. versionadded:: 2.0 + +Google Colab +^^^^^^^^^^^^ + +Google Colab provides several methods to load data for :func:`read_csv` and similar functions. + +File upload ++++++++++++ + +.. ipython:: python + + from google.colab import files + # uploaded = files.upload() # Interactive in Colab + import io + import pandas as pd + # df = pd.read_csv(io.BytesIO(uploaded['example.csv'])) + +Google Drive +++++++++++++ + +.. ipython:: python + + from google.colab import drive + drive.mount('/content/drive') + df = pd.read_csv('/content/drive/MyDrive/example.csv') + +See `Google Colab IO notebook `_. + + + engine : {``'c'``, ``'python'``, ``'pyarrow'``} Parser engine to use. The C and pyarrow engines are faster, while the python engine