Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,16 @@ jobs:
'name = "rapids-singlecell"',
f'name = "rapids-singlecell-cu{cuda}"',
)
# Rename matching extra to "rapids", remove the other
# Rename matching extra to "rapids", remove the other CUDA extra
text = text.replace(f'rapids-cu{cuda} =', 'rapids =')
# Remove the other CUDA extra line entirely
lines = text.splitlines(keepends=True)
text = "".join(l for l in lines if f'rapids-cu{other}' not in l)
# Remove the other CUDA extra (handles multi-line TOML arrays)
import re
text = re.sub(
rf'^rapids-cu{other}\s*=\s*\[.*?\]\s*\n',
'',
text,
flags=re.MULTILINE | re.DOTALL,
)

# Set CUDA architectures (replace "native" with CI target archs)
text = text.replace(
Expand Down Expand Up @@ -112,14 +117,31 @@ jobs:
CIBW_ENVIRONMENT_PASS_LINUX: SETUPTOOLS_SCM_PRETEND_VERSION
CIBW_ENVIRONMENT: >
CUDA_PATH=/usr/local/cuda
LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
PATH=/usr/local/cuda/bin:$PATH
CIBW_BEFORE_BUILD: >
python -m pip install -U pip
scikit-build-core cmake ninja nanobind
librmm-cu${{ matrix.cuda_major }} &&
RMM_ROOT=$(python -c "import librmm;print(librmm.__path__[0])") &&
LOG_ROOT=$(python -c "import rapids_logger;print(rapids_logger.__path__[0])") &&
echo "[rsc-build] librmm=$RMM_ROOT" &&
echo "[rsc-build] rapids_logger=$LOG_ROOT" &&
ln -sf "$RMM_ROOT/lib64/librmm.so" /usr/local/lib/librmm.so &&
ln -sf "$LOG_ROOT/lib64/librapids_logger.so" /usr/local/lib/librapids_logger.so &&
ldconfig &&
python -c "import librmm;print(librmm.__path__[0])" > /tmp/.librmm_dir &&
echo "[rsc-build] marker=$(cat /tmp/.librmm_dir)"
CIBW_TEST_SKIP: "*"
CIBW_TEST_COMMAND: ""
CIBW_REPAIR_WHEEL_COMMAND: "auditwheel repair --exclude libcublas.so.${{ matrix.cuda_major }} --exclude libcublasLt.so.${{ matrix.cuda_major }} --exclude libcudart.so.${{ matrix.cuda_major }} -w {dest_dir} {wheel}"
CIBW_REPAIR_WHEEL_COMMAND: >
auditwheel repair
--exclude libcublas.so.${{ matrix.cuda_major }}
--exclude libcublasLt.so.${{ matrix.cuda_major }}
--exclude libcudart.so.${{ matrix.cuda_major }}
--exclude librmm.so
--exclude librapids_logger.so
-w {dest_dir} {wheel}
&& pipx run abi3audit --strict --report {wheel}
CIBW_BUILD_VERBOSITY: "1"

- uses: actions/upload-artifact@v4
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ __pycache__/
/data/
test-data/
.vscode/
.codex

# Distribution / packaging
/dist/
Expand Down Expand Up @@ -50,3 +51,4 @@ CLAUDE.md

# tmp_scripts
tmp_scripts/
benchmarks/
80 changes: 79 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,79 @@ if (RSC_BUILD_EXTENSIONS)
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT})
find_package(nanobind CONFIG REQUIRED)
find_package(CUDAToolkit REQUIRED)

# Find librmm cmake config.
# Searches all plausible locations: env vars, Python prefix, base_prefix
# (survives build isolation), and CI marker files. Works with conda, pip,
# pixi, uv, hatch, and cibuildwheel.
set(_env_roots "")
# Explicit override
if(DEFINED ENV{LIBRMM_DIR})
file(GLOB _librmm_hints "$ENV{LIBRMM_DIR}/lib*/cmake"
"$ENV{LIBRMM_DIR}/../rapids_logger/lib*/cmake")
list(APPEND CMAKE_PREFIX_PATH ${_librmm_hints})
endif()
# Environment managers
foreach(_var CONDA_PREFIX VIRTUAL_ENV PIXI_PROJECT_ROOT)
if(DEFINED ENV{${_var}})
list(APPEND _env_roots "$ENV{${_var}}")
endif()
endforeach()
# Python prefix, base_prefix, and real executable's env root
foreach(_attr prefix base_prefix)
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import sys; print(sys.${_attr})"
OUTPUT_VARIABLE _pp OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET)
if(_pp)
list(APPEND _env_roots "${_pp}")
endif()
endforeach()
# Resolve symlinks to find the real Python env (works through build isolation)
execute_process(
COMMAND "${Python_EXECUTABLE}" -c
"import sys,pathlib; print(pathlib.Path(sys.executable).resolve().parents[1])"
OUTPUT_VARIABLE _real_prefix OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET)
if(_real_prefix)
list(APPEND _env_roots "${_real_prefix}")
endif()
# Direct site-packages search — the most reliable way to find pip-installed
# librmm regardless of venv nesting depth
execute_process(
COMMAND "${Python_EXECUTABLE}" -c
"import site; print(';'.join(site.getsitepackages()))"
OUTPUT_VARIABLE _site_paths OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET)
if(_site_paths)
foreach(_sp ${_site_paths})
file(GLOB _sp_hints "${_sp}/librmm/lib*/cmake"
"${_sp}/rapids_logger/lib*/cmake")
foreach(_h ${_sp_hints})
get_filename_component(_dir "${_h}" DIRECTORY)
list(APPEND CMAKE_PREFIX_PATH "${_dir}")
endforeach()
endforeach()
endif()
# CI/cibuildwheel: CIBW_BEFORE_BUILD writes the librmm path to a marker file
if(EXISTS "/tmp/.librmm_dir")
file(READ "/tmp/.librmm_dir" _rmm_marker)
string(STRIP "${_rmm_marker}" _rmm_marker)
file(GLOB _marker_hints "${_rmm_marker}/lib*/cmake"
"${_rmm_marker}/../rapids_logger/lib*/cmake")
list(APPEND CMAKE_PREFIX_PATH ${_marker_hints})
endif()
# Search each root for rmm + rapids_logger cmake configs
list(REMOVE_DUPLICATES _env_roots)
foreach(_root ${_env_roots})
file(GLOB _hints "${_root}/lib/cmake/rmm"
"${_root}/lib/python*/site-packages/librmm/lib*/cmake/rmm"
"${_root}/lib/python*/site-packages/rapids_logger/lib*/cmake/rapids_logger")
foreach(_h ${_hints})
get_filename_component(_dir "${_h}" DIRECTORY)
list(APPEND CMAKE_PREFIX_PATH "${_dir}")
endforeach()
endforeach()
message(STATUS "rmm search roots: ${_env_roots}")
message(STATUS "rmm CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}")
find_package(rmm CONFIG REQUIRED)
message(STATUS "Building for CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
else()
message(STATUS "RSC_BUILD_EXTENSIONS=OFF -> skipping compiled extensions for docs")
Expand Down Expand Up @@ -84,7 +157,8 @@ if (RSC_BUILD_EXTENSIONS)
add_nb_cuda_module(_edistance_cuda src/rapids_singlecell/_cuda/edistance/edistance.cu)
add_nb_cuda_module(_hvg_cuda src/rapids_singlecell/_cuda/hvg/hvg.cu)
add_nb_cuda_module(_kde_cuda src/rapids_singlecell/_cuda/kde/kde.cu)
add_nb_cuda_module(_wilcoxon_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon.cu)
add_nb_cuda_module(_wilcoxon_ovr_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon_ovr.cu)
add_nb_cuda_module(_wilcoxon_ovo_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon_ovo.cu)
# Harmony CUDA modules
add_nb_cuda_module(_harmony_scatter_cuda src/rapids_singlecell/_cuda/harmony/scatter/scatter.cu)
add_nb_cuda_module(_harmony_outer_cuda src/rapids_singlecell/_cuda/harmony/outer/outer.cu)
Expand All @@ -100,4 +174,8 @@ if (RSC_BUILD_EXTENSIONS)
target_link_libraries(_harmony_correction_batched_cuda PRIVATE CUDA::cublas)
# Wilcoxon binned histogram CUDA module
add_nb_cuda_module(_wilcoxon_binned_cuda src/rapids_singlecell/_cuda/wilcoxon_binned/wilcoxon_binned.cu)
if(rmm_FOUND)
target_link_libraries(_wilcoxon_ovr_cuda PRIVATE rmm::rmm)
target_link_libraries(_wilcoxon_ovo_cuda PRIVATE rmm::rmm)
endif()
endif()
22 changes: 20 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ requires = [
"scikit-build-core>=0.10",
"nanobind>=2.0.0",
"setuptools-scm>=8",
# librmm headers are needed at build time for the Wilcoxon CUDA kernels.
# The headers are identical across cu12/cu13; the runtime .so is loaded
# from the installed RAPIDS package.
"librmm-cu12>=25.10",
]
build-backend = "scikit_build_core.build"

Expand Down Expand Up @@ -32,8 +36,22 @@ dependencies = [
]

[project.optional-dependencies]
rapids-cu13 = [ "cupy-cuda13x", "cudf-cu13>=25.10", "cuml-cu13>=25.10", "cugraph-cu13>=25.10", "cuvs-cu13>=25.10" ]
rapids-cu12 = [ "cupy-cuda12x", "cudf-cu12>=25.10", "cuml-cu12>=25.10", "cugraph-cu12>=25.10", "cuvs-cu12>=25.10" ]
rapids-cu13 = [
"cupy-cuda13x",
"librmm-cu13>=25.10",
"cudf-cu13>=25.10",
"cuml-cu13>=25.10",
"cugraph-cu13>=25.10",
"cuvs-cu13>=25.10",
]
rapids-cu12 = [
"cupy-cuda12x",
"librmm-cu12>=25.10",
"cudf-cu12>=25.10",
"cuml-cu12>=25.10",
"cugraph-cu12>=25.10",
"cuvs-cu12>=25.10",
]

doc = [
"sphinx>=4.5.0",
Expand Down
13 changes: 12 additions & 1 deletion src/rapids_singlecell/_cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@

import importlib

# Pre-load librmm.so + deps so the dynamic linker can resolve them when
# our nanobind extensions (which link rmm) are imported. This is the same
# pattern used by cuml, cuvs, and other RAPIDS packages.
try:
import librmm

librmm.load_library()
except (ImportError, OSError):
pass

__all__ = [
"_aggr_cuda",
"_aucell_cuda",
Expand Down Expand Up @@ -44,7 +54,8 @@
"_sparse2dense_cuda",
"_spca_cuda",
"_wilcoxon_binned_cuda",
"_wilcoxon_cuda",
"_wilcoxon_ovo_cuda",
"_wilcoxon_ovr_cuda",
]


Expand Down
2 changes: 1 addition & 1 deletion src/rapids_singlecell/_cuda/cooc/kernels_cooc.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ __global__ void occur_count_kernel_csr_catpairs_tiled(
// Binary search for threshold bin
int lo = 0, hi = l_val;
while (lo < hi) {
int mid = (lo + hi) >> 1;
int mid = lo + ((hi - lo) >> 1);
if (dist_sq <= thresholds[mid])
hi = mid;
else
Expand Down
7 changes: 7 additions & 0 deletions src/rapids_singlecell/_cuda/nb_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ using gpu_array = nb::ndarray<T, Device>;
template <typename T, typename Device, typename Contig>
using gpu_array_contig = nb::ndarray<T, Device, Contig>;

// Host (NumPy) array aliases
template <typename T>
using host_array = nb::ndarray<T, nb::numpy, nb::ndim<1>>;

template <typename T>
using host_array_2d = nb::ndarray<T, nb::numpy, nb::ndim<2>>;

// Register bindings for both regular CUDA and managed-memory arrays.
// Usage:
// template <typename Device>
Expand Down
Loading
Loading