From 7aca4b38439e789e059150f205251848239fa8a5 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 Apr 2026 16:39:30 +0200 Subject: [PATCH 01/23] feat: add `illico` --- pyproject.toml | 2 + src/testing/scanpy/_pytest/marks.py | 1 + tests/test_rank_genes_groups.py | 62 +++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 49bafdf047..8e5f47f18f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ scanorama = [ "scanorama" ] scrublet = [ "scikit-image>=0.23.1" ] # highly_variable_genes method 'seurat_v3' skmisc = [ "scikit-misc>=0.5.1" ] +illico = [ "illico" ] [dependency-groups] dev = [ @@ -108,6 +109,7 @@ dev = [ test = [ "scanpy[dask-ml]", "scanpy[dask]", + "scanpy[illico]", "scanpy[leiden]", "scanpy[plotting]", "scanpy[scrublet]", diff --git a/src/testing/scanpy/_pytest/marks.py b/src/testing/scanpy/_pytest/marks.py index 8b25f0457d..ebf5707c4d 100644 --- a/src/testing/scanpy/_pytest/marks.py +++ b/src/testing/scanpy/_pytest/marks.py @@ -43,6 +43,7 @@ def _generate_next_value_( skimage = "scikit-image" skmisc = "scikit-misc" zarr = auto() + illico = auto() # external bbknn = auto() harmony = "harmonyTS" diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index ba38ffc94d..1c351cff97 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -19,6 +19,7 @@ from scanpy.tools._rank_genes_groups import _RankGenes from testing.scanpy._helpers import random_mask from testing.scanpy._helpers.data import pbmc68k_reduced +from testing.scanpy._pytest.marks import needs from testing.scanpy._pytest.params import ARRAY_TYPES, ARRAY_TYPES_MEM if TYPE_CHECKING: @@ -311,3 +312,64 @@ def test_mask_not_equal(): with_mask = pbmc.uns["rank_genes_groups"]["names"] assert not np.array_equal(no_mask, with_mask) + + +@pytest.mark.parametrize("corr_method", ["benjamini-hochberg", "bonferroni"]) +@pytest.mark.parametrize("test", ["ovo", "ovr"]) +@pytest.mark.parametrize("exp_post_agg", [True, False]) +# Beause illico does not add 1e-9 to its values before log? +@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning") +@needs.illico +def test_illico(test, corr_method, exp_post_agg): + from illico.asymptotic_wilcoxon import asymptotic_wilcoxon + + pbmc = pbmc68k_reduced() + reference = pbmc.obs["bulk_labels"].iloc[0] if test == "ovo" else None + + asy_results = asymptotic_wilcoxon( + adata=pbmc.copy(), + group_keys="bulk_labels", + is_log1p=True, # Scanpy assumes log1p + exp_post_agg=exp_post_agg, # Post-aggregation exponentiation is needed to match Scanpy's fold change output + reference=reference, + use_continuity=False, # False because scanpy does not apply continuity correction + tie_correct=False, # False because scanpy takes a lot of time to adjust + n_threads=1, + batch_size=16, + alternative="two-sided", # Scanpy only implments two-sided test + use_rust=False, + return_as_scanpy=True, + corr_method=corr_method, + ) + + sc.tl.rank_genes_groups( + pbmc, + groupby="bulk_labels", + method="wilcoxon", + reference=reference if test == "ovo" else "rest", + n_genes=pbmc.n_vars, + tie_correct=False, + corr_method=corr_method, + exp_post_agg=exp_post_agg, + ) + scanpy_results = pbmc.uns["rank_genes_groups"] + assert set(asy_results.keys()) == set(scanpy_results.keys()), ( + "Output keys do not match Scanpy's output format." + ) + + for k, ref in scanpy_results.items(): + if k in ["params", "names"]: + # We can skip names ordering check as if incorrect, other values will mismatch + continue + res = np.array(asy_results[k].tolist()) + ref_arr = np.array(ref.tolist()) + mask = np.isfinite(ref_arr) * np.isfinite( + res + ) # Mask to ignore inf values in the comparison + np.testing.assert_allclose( + ref_arr[mask], + res[mask], + rtol=0, + atol=1e-2, + err_msg=f"Mismatch in '{k}' values between asymptotic_wilcoxon and Scanpy outputs.", + ) From 72318fb59ed8bbcdb99cb8cdbc704d4757daa4dc Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 Apr 2026 16:51:59 +0200 Subject: [PATCH 02/23] fix: bump numba --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8e5f47f18f..384b002226 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ dependencies = [ "matplotlib>=3.9", "natsort", "networkx>=2.8.8", - "numba>=0.60", + "numba>=0.63.1", "numpy>=2", "packaging>=25", "pandas>=2.2.2", From b9c8257b6836bf6e4e973e5bcbb1ddcdd1d83c33 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 Apr 2026 16:58:53 +0200 Subject: [PATCH 03/23] chore: probably not either --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 384b002226..12c3665653 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dependencies = [ "patsy", "pynndescent>=0.5.13", "scikit-learn>=1.4.2", - "scipy>=1.13", + "scipy>=1.16.3", "seaborn>=0.13.2", "session-info2", "statsmodels>=0.14.5", From 5394d2b10e5e0eca038432031d8ebdbb25edc195 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 Apr 2026 16:59:44 +0200 Subject: [PATCH 04/23] chore: now pandas --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 12c3665653..d01a72380f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ dependencies = [ "numba>=0.63.1", "numpy>=2", "packaging>=25", - "pandas>=2.2.2", + "pandas>=2.3.3", "patsy", "pynndescent>=0.5.13", "scikit-learn>=1.4.2", From 8928dfda6fb87ed1f149c9f49af544a2ae89e6c0 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 Apr 2026 17:00:59 +0200 Subject: [PATCH 05/23] fix: anndata --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d01a72380f..da3292f44c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ classifiers = [ ] dynamic = [ "version" ] dependencies = [ - "anndata>=0.10.8", + "anndata>=0.11", "certifi", "fast-array-utils[accel,sparse]>=1.4", "h5py>=3.11", From 897a6464a6b3b4df416fd812bfcfe5287199a1e9 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 Apr 2026 17:06:01 +0200 Subject: [PATCH 06/23] fix: just stable then --- hatch.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hatch.toml b/hatch.toml index b7e7a5985b..8f2112c943 100644 --- a/hatch.toml +++ b/hatch.toml @@ -45,4 +45,4 @@ overrides.matrix.deps.dependency-groups = [ ] [[envs.hatch-test.matrix]] -deps = [ "stable", "pre", "low-vers", "few-extras" ] +deps = [ "stable" ] From af1f523e3f6f7e3f042e04cdd1e2a54b03426a98 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 13 Apr 2026 11:38:05 +0200 Subject: [PATCH 07/23] fix: pin rc --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index da3292f44c..336d9d2ad7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,7 +99,7 @@ scanorama = [ "scanorama" ] scrublet = [ "scikit-image>=0.23.1" ] # highly_variable_genes method 'seurat_v3' skmisc = [ "scikit-misc>=0.5.1" ] -illico = [ "illico" ] +illico = [ "illico==0.5.0rc1" ] [dependency-groups] dev = [ From 40d5946992cabd11e5bdf59989e9b8b6144d5a7b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 13 Apr 2026 15:12:56 +0200 Subject: [PATCH 08/23] fix: agg name --- tests/test_rank_genes_groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 1c351cff97..9ac7390623 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -316,7 +316,7 @@ def test_mask_not_equal(): @pytest.mark.parametrize("corr_method", ["benjamini-hochberg", "bonferroni"]) @pytest.mark.parametrize("test", ["ovo", "ovr"]) -@pytest.mark.parametrize("exp_post_agg", [True, False]) +@pytest.mark.parametrize("exp_post_agg", [True, False], ids=["post_exp", "pre_exp"]) # Beause illico does not add 1e-9 to its values before log? @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning") @needs.illico From 74b6d87f7e694a4b964528c53aa2c8881ea5caf7 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 13 Apr 2026 15:40:25 +0200 Subject: [PATCH 09/23] fix: only consider scores and pvals --- tests/test_rank_genes_groups.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 9ac7390623..583ceb4731 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -358,6 +358,8 @@ def test_illico(test, corr_method, exp_post_agg): ) for k, ref in scanpy_results.items(): + if k in {"logfoldchanges"}: + continue if k in ["params", "names"]: # We can skip names ordering check as if incorrect, other values will mismatch continue From 83524459b30498636b76de224337848126a4dff1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 13 Apr 2026 18:04:23 +0200 Subject: [PATCH 10/23] chore: p values and z scores only --- src/scanpy/_settings/presets.py | 4 +- src/scanpy/tools/_rank_genes_groups.py | 50 ++++++++++++++++-- tests/test_rank_genes_groups.py | 70 +++++++++++++------------- 3 files changed, 83 insertions(+), 41 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 9a6e0488e6..e348a3eef8 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -25,7 +25,9 @@ ] -type DETest = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] +type DETest = Literal[ + "logreg", "t-test", "wilcoxon", "wilcoxon_illico", "t-test_overestim_var" +] type HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] type LeidenFlavor = Literal["leidenalg", "igraph"] diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index aec02c4b2a..421e5af1a2 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -7,6 +7,7 @@ import numba import numpy as np import pandas as pd +from anndata import AnnData from fast_array_utils.numba import njit from fast_array_utils.stats import mean_var from scipy import sparse @@ -27,7 +28,6 @@ from collections.abc import Generator, Iterable from typing import Literal - from anndata import AnnData from numpy.typing import NDArray @@ -423,7 +423,7 @@ def logreg( if len(self.groups_order) <= 2: break - def compute_statistics( # noqa: PLR0912 + def compute_statistics( # noqa: PLR0912, PLR0915 self, method: DETest, *, @@ -437,8 +437,49 @@ def compute_statistics( # noqa: PLR0912 if method in {"t-test", "t-test_overestim_var"}: self._basic_stats(exponentiate_values=False) generate_test_results = self.t_test(method) - elif method == "wilcoxon": - generate_test_results = self.wilcoxon(tie_correct=tie_correct) + elif "wilcoxon" in method: + if "illico" in method: + from illico import asymptotic_wilcoxon + + illico_df = asymptotic_wilcoxon( + AnnData( + X=self.X, + var=pd.DataFrame(index=self.var_names), + obs=pd.DataFrame( + index=pd.RangeIndex(self.X.shape[0]).astype("str"), + data={ + "group": pd.Categorical( + np.array(range(self.groups_masks_obs.shape[0]))[ + self.groups_masks_obs.T.argmax(axis=1) + ], + categories=np.array( + range(self.groups_masks_obs.shape[0]) + ), + ) + }, + ), + ), + reference=self.ireference, + group_keys="group", + return_as_scanpy=False, + is_log1p=True, + tie_correct=tie_correct, + use_continuity=False, + alternative="two-sided", + ) + generate_test_results = ( + ( + group_idx, + group["z_score"].to_numpy(), + group["p_value"].to_numpy(), + ) + for group_idx, (_, group) in enumerate( + illico_df.groupby(level="pert") + ) + if group_idx != self.ireference + ) + else: + generate_test_results = self.wilcoxon(tie_correct=tie_correct) # If we're not exponentiating after the mean aggregation, then do it now. self._basic_stats(exponentiate_values=not exp_post_agg) elif method == "logreg": @@ -447,7 +488,6 @@ def compute_statistics( # noqa: PLR0912 self.stats = None n_genes = self.X.shape[1] - for group_index, scores, pvals in generate_test_results: group_name = str(self.groups_order[group_index]) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 583ceb4731..47cd8719ab 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -317,29 +317,29 @@ def test_mask_not_equal(): @pytest.mark.parametrize("corr_method", ["benjamini-hochberg", "bonferroni"]) @pytest.mark.parametrize("test", ["ovo", "ovr"]) @pytest.mark.parametrize("exp_post_agg", [True, False], ids=["post_exp", "pre_exp"]) +@pytest.mark.parametrize( + "tie_correct", [True, False], ids=["tie_correct", "no_tie_correct"] +) # Beause illico does not add 1e-9 to its values before log? @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning") @needs.illico -def test_illico(test, corr_method, exp_post_agg): - from illico.asymptotic_wilcoxon import asymptotic_wilcoxon +def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): pbmc = pbmc68k_reduced() - reference = pbmc.obs["bulk_labels"].iloc[0] if test == "ovo" else None + pbmc.raw.X.sum_duplicates() + pbmc.raw.X.sort_indices() + pbmc_illico = pbmc.copy() - asy_results = asymptotic_wilcoxon( - adata=pbmc.copy(), - group_keys="bulk_labels", - is_log1p=True, # Scanpy assumes log1p - exp_post_agg=exp_post_agg, # Post-aggregation exponentiation is needed to match Scanpy's fold change output - reference=reference, - use_continuity=False, # False because scanpy does not apply continuity correction - tie_correct=False, # False because scanpy takes a lot of time to adjust - n_threads=1, - batch_size=16, - alternative="two-sided", # Scanpy only implments two-sided test - use_rust=False, - return_as_scanpy=True, + reference = pbmc.obs["bulk_labels"].iloc[0] if test == "ovo" else None + sc.tl.rank_genes_groups( + pbmc_illico, + groupby="bulk_labels", + method="wilcoxon_illico", + reference=reference if test == "ovo" else "rest", + n_genes=pbmc.n_vars, + tie_correct=tie_correct, corr_method=corr_method, + exp_post_agg=exp_post_agg, ) sc.tl.rank_genes_groups( @@ -348,30 +348,30 @@ def test_illico(test, corr_method, exp_post_agg): method="wilcoxon", reference=reference if test == "ovo" else "rest", n_genes=pbmc.n_vars, - tie_correct=False, + tie_correct=tie_correct, corr_method=corr_method, exp_post_agg=exp_post_agg, ) scanpy_results = pbmc.uns["rank_genes_groups"] - assert set(asy_results.keys()) == set(scanpy_results.keys()), ( + illico_results = pbmc_illico.uns["rank_genes_groups"] + assert set(illico_results.keys()) == set(scanpy_results.keys()), ( "Output keys do not match Scanpy's output format." ) for k, ref in scanpy_results.items(): - if k in {"logfoldchanges"}: - continue - if k in ["params", "names"]: - # We can skip names ordering check as if incorrect, other values will mismatch - continue - res = np.array(asy_results[k].tolist()) - ref_arr = np.array(ref.tolist()) - mask = np.isfinite(ref_arr) * np.isfinite( - res - ) # Mask to ignore inf values in the comparison - np.testing.assert_allclose( - ref_arr[mask], - res[mask], - rtol=0, - atol=1e-2, - err_msg=f"Mismatch in '{k}' values between asymptotic_wilcoxon and Scanpy outputs.", - ) + with subtests.test(k): + if k in ["params", "names"]: + # We can skip names ordering check as if incorrect, other values will mismatch + continue + res = np.array(illico_results[k].tolist()) + ref_arr = np.array(ref.tolist()) + mask = np.isfinite(ref_arr) * np.isfinite( + res + ) # Mask to ignore inf values in the comparison + np.testing.assert_allclose( + ref_arr[mask], + res[mask], + rtol=0, + atol=1e-2, + err_msg=f"Mismatch in '{k}' values between asymptotic_wilcoxon and Scanpy outputs.", + ) From 1cad4317f8a5b29503d30616d64799b842ee529a Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 13 Apr 2026 18:59:14 +0200 Subject: [PATCH 11/23] fix: point an low-vers safe version --- hatch.toml | 5 ++++- pyproject.toml | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/hatch.toml b/hatch.toml index 8f2112c943..f100ebfd00 100644 --- a/hatch.toml +++ b/hatch.toml @@ -45,4 +45,7 @@ overrides.matrix.deps.dependency-groups = [ ] [[envs.hatch-test.matrix]] -deps = [ "stable" ] +deps = [ "stable", "pre", "low-vers", "few-extras" ] + +[metadata] +allow-direct-references = true diff --git a/pyproject.toml b/pyproject.toml index 336d9d2ad7..8f75de2629 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,14 +59,14 @@ dependencies = [ "matplotlib>=3.9", "natsort", "networkx>=2.8.8", - "numba>=0.63.1", + "numba>=0.60", "numpy>=2", "packaging>=25", - "pandas>=2.3.3", + "pandas>=2.2.2", "patsy", "pynndescent>=0.5.13", "scikit-learn>=1.4.2", - "scipy>=1.16.3", + "scipy>=1.13", "seaborn>=0.13.2", "session-info2", "statsmodels>=0.14.5", @@ -99,7 +99,7 @@ scanorama = [ "scanorama" ] scrublet = [ "scikit-image>=0.23.1" ] # highly_variable_genes method 'seurat_v3' skmisc = [ "scikit-misc>=0.5.1" ] -illico = [ "illico==0.5.0rc1" ] +illico = [ "illico @ git+https://github.com/ilan-gold/illico.git@ig/illico_low_bounds" ] [dependency-groups] dev = [ From c83f82b41f0b10452e840b76a79fad3e86705ffa Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 14 Apr 2026 14:53:33 +0200 Subject: [PATCH 12/23] fix: make a copy of vectors for writing --- src/scanpy/tools/_rank_genes_groups.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 421e5af1a2..3ff3c77dcb 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -470,8 +470,8 @@ def compute_statistics( # noqa: PLR0912, PLR0915 generate_test_results = ( ( group_idx, - group["z_score"].to_numpy(), - group["p_value"].to_numpy(), + group["z_score"].to_numpy(copy=True), + group["p_value"].to_numpy(copy=True), ) for group_idx, (_, group) in enumerate( illico_df.groupby(level="pert") From 3ca29573b76d857f1bdfff2174fe9c4efc37a617 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 14 Apr 2026 15:05:38 +0200 Subject: [PATCH 13/23] fix: remove warning filter + `use_rust` --- src/scanpy/tools/_rank_genes_groups.py | 1 + tests/test_rank_genes_groups.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 3ff3c77dcb..f42e8b624e 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -466,6 +466,7 @@ def compute_statistics( # noqa: PLR0912, PLR0915 tie_correct=tie_correct, use_continuity=False, alternative="two-sided", + use_rust=False, ) generate_test_results = ( ( diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 47cd8719ab..c2299ad842 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -320,7 +320,6 @@ def test_mask_not_equal(): @pytest.mark.parametrize( "tie_correct", [True, False], ids=["tie_correct", "no_tie_correct"] ) -# Beause illico does not add 1e-9 to its values before log? @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning") @needs.illico def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): From fa454d74c65626f954585aa6b5b01db72c9f0876 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Apr 2026 10:08:27 +0000 Subject: [PATCH 14/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_rank_genes_groups.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index fc6f3eefb0..42bf40deca 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -375,6 +375,7 @@ def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): err_msg=f"Mismatch in '{k}' values between asymptotic_wilcoxon and Scanpy outputs.", ) + @pytest.mark.parametrize( ("exp_post_agg", "expected_logfc"), [ From ee037e64bee7a4216c4f115c5ebc15b91307998d Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 15 Apr 2026 19:33:24 +0200 Subject: [PATCH 15/23] fix: clarify usage of categories --- src/scanpy/tools/_rank_genes_groups.py | 29 ++++++++++++++++---------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index f42e8b624e..ee91136e6a 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -448,18 +448,18 @@ def compute_statistics( # noqa: PLR0912, PLR0915 obs=pd.DataFrame( index=pd.RangeIndex(self.X.shape[0]).astype("str"), data={ - "group": pd.Categorical( - np.array(range(self.groups_masks_obs.shape[0]))[ - self.groups_masks_obs.T.argmax(axis=1) - ], - categories=np.array( + "group": pd.Categorical.from_codes( + codes=np.array( range(self.groups_masks_obs.shape[0]) - ), + )[self.groups_masks_obs.T.argmax(axis=1)], + categories=self.groups_order, ) }, ), ), - reference=self.ireference, + reference=self.groups_order[self.ireference] + if self.ireference is not None + else None, group_keys="group", return_as_scanpy=False, is_log1p=True, @@ -470,14 +470,21 @@ def compute_statistics( # noqa: PLR0912, PLR0915 ) generate_test_results = ( ( - group_idx, + self.groups_order.tolist().index(group_cat), group["z_score"].to_numpy(copy=True), group["p_value"].to_numpy(copy=True), ) - for group_idx, (_, group) in enumerate( - illico_df.groupby(level="pert") + for (_, group) in illico_df.groupby(level="pert") + if ( + group_cat := np.unique( + group.index.get_level_values("pert").to_numpy(copy=True) + ).item() + ) + != ( + None + if self.ireference is None + else self.groups_order[self.ireference] ) - if group_idx != self.ireference ) else: generate_test_results = self.wilcoxon(tie_correct=tie_correct) From 6557bcfa50750a4af299c836af9f9e2409439bd4 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 15 Apr 2026 19:42:36 +0200 Subject: [PATCH 16/23] fix: order --- src/scanpy/tools/_rank_genes_groups.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index ee91136e6a..b41a3d2442 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -468,9 +468,8 @@ def compute_statistics( # noqa: PLR0912, PLR0915 alternative="two-sided", use_rust=False, ) - generate_test_results = ( - ( - self.groups_order.tolist().index(group_cat), + generate_test_results_map = { + group_cat: ( group["z_score"].to_numpy(copy=True), group["p_value"].to_numpy(copy=True), ) @@ -485,6 +484,14 @@ def compute_statistics( # noqa: PLR0912, PLR0915 if self.ireference is None else self.groups_order[self.ireference] ) + } + generate_test_results = ( + ( + self.groups_order.tolist().index(group_cat), + *generate_test_results_map[group_cat], + ) + for group_cat in self.groups_order + if group_cat in generate_test_results_map ) else: generate_test_results = self.wilcoxon(tie_correct=tie_correct) From 480a57addf96b94e9f49dbd0b465615a5ff0057d Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 15 Apr 2026 19:55:03 +0200 Subject: [PATCH 17/23] fix: decrease absolute tolerance --- tests/test_rank_genes_groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 42bf40deca..dfeca75773 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -371,7 +371,7 @@ def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): ref_arr[mask], res[mask], rtol=0, - atol=1e-2, + atol=1e-6, err_msg=f"Mismatch in '{k}' values between asymptotic_wilcoxon and Scanpy outputs.", ) From c755a14c40c4980dcb744efa7c77254a187feca1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 Apr 2026 12:04:55 +0200 Subject: [PATCH 18/23] fix: `rest` instead of `None` --- tests/test_rank_genes_groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index dfeca75773..5684c6fe1d 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -329,7 +329,7 @@ def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): pbmc.raw.X.sort_indices() pbmc_illico = pbmc.copy() - reference = pbmc.obs["bulk_labels"].iloc[0] if test == "ovo" else None + reference = pbmc.obs["bulk_labels"].iloc[0] if test == "ovo" else "rest" sc.tl.rank_genes_groups( pbmc_illico, groupby="bulk_labels", From ac76f90fd8c5eba5cbae8acf7960781d16c58062 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 Apr 2026 12:21:10 +0200 Subject: [PATCH 19/23] fix: respect `groups` arg --- src/scanpy/tools/_rank_genes_groups.py | 10 ++-------- tests/test_rank_genes_groups.py | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index b41a3d2442..d1adc068dc 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -140,6 +140,7 @@ def __init__( self.expm1_func = lambda x: np.expm1(x * np.log(base)) else: self.expm1_func = np.expm1 + self.group_col = adata.obs[groupby].array self.groups_order, self.groups_masks_obs = _utils.select_groups( adata, groups, groupby @@ -447,14 +448,7 @@ def compute_statistics( # noqa: PLR0912, PLR0915 var=pd.DataFrame(index=self.var_names), obs=pd.DataFrame( index=pd.RangeIndex(self.X.shape[0]).astype("str"), - data={ - "group": pd.Categorical.from_codes( - codes=np.array( - range(self.groups_masks_obs.shape[0]) - )[self.groups_masks_obs.T.argmax(axis=1)], - categories=self.groups_order, - ) - }, + data={"group": self.group_col}, ), ), reference=self.groups_order[self.ireference] diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 5684c6fe1d..6a7ac3c2ef 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -23,7 +23,7 @@ from testing.scanpy._pytest.params import ARRAY_TYPES, ARRAY_TYPES_MEM if TYPE_CHECKING: - from collections.abc import Callable + from collections.abc import Callable, Sequence from typing import Any, Literal from numpy.lib.npyio import NpzFile @@ -320,9 +320,18 @@ def test_mask_not_equal(): @pytest.mark.parametrize( "tie_correct", [True, False], ids=["tie_correct", "no_tie_correct"] ) +@pytest.mark.parametrize("groups", [["CD14+ Monocyte", "Dendritic"], "all"]) @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning") @needs.illico -def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): +def test_illico( + test: Literal["ovo", "ovr"], + corr_method: Literal["benjamini-hochberg", "bonferroni"], + subtests: pytest.Subtests, + groups: Literal["all"] | Sequence[str], + *, + exp_post_agg: bool, + tie_correct: bool, +): pbmc = pbmc68k_reduced() pbmc.raw.X.sum_duplicates() @@ -339,6 +348,7 @@ def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): tie_correct=tie_correct, corr_method=corr_method, exp_post_agg=exp_post_agg, + groups=groups, ) sc.tl.rank_genes_groups( @@ -350,6 +360,7 @@ def test_illico(test, corr_method, exp_post_agg, tie_correct, subtests): tie_correct=tie_correct, corr_method=corr_method, exp_post_agg=exp_post_agg, + groups=groups, ) scanpy_results = pbmc.uns["rank_genes_groups"] illico_results = pbmc_illico.uns["rank_genes_groups"] From 259d8f3fc515393032007ae9bf323c3aadc8927d Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 Apr 2026 12:22:47 +0200 Subject: [PATCH 20/23] chore: add note --- src/scanpy/tools/_rank_genes_groups.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index d1adc068dc..2fe6ad4a88 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -448,6 +448,9 @@ def compute_statistics( # noqa: PLR0912, PLR0915 var=pd.DataFrame(index=self.var_names), obs=pd.DataFrame( index=pd.RangeIndex(self.X.shape[0]).astype("str"), + # This self.group_col means illico will run tests against *all* data + # instead of what's in self.groups_order as controlled by the `groups` arg. + # TODO: Only run the subset once illico supports a `groups` argument data={"group": self.group_col}, ), ), From 3dac20200a2e3f1d4589c45ca5f7dcfb95619c77 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 17 Apr 2026 12:01:30 +0200 Subject: [PATCH 21/23] fix; dont make list multiple times --- src/scanpy/tools/_rank_genes_groups.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index d30a5cdce7..841aa41bc7 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -468,6 +468,7 @@ def compute_statistics( # noqa: PLR0912, PLR0915 alternative="two-sided", use_rust=False, ) + # Generate a lookup of category -> result excluding the refernece if it is present. generate_test_results_map = { group_cat: ( group["z_score"].to_numpy(copy=True), @@ -485,9 +486,11 @@ def compute_statistics( # noqa: PLR0912, PLR0915 else self.groups_order[self.ireference] ) } + # Create the iterator that is expected by the other method-branches. + groups_order_list = self.groups_order.tolist() generate_test_results = ( ( - self.groups_order.tolist().index(group_cat), + groups_order_list.index(group_cat), *generate_test_results_map[group_cat], ) for group_cat in self.groups_order From 023aa0f708df8661625a0ade6613daddb4f7fb81 Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Tue, 21 Apr 2026 14:02:22 +0200 Subject: [PATCH 22/23] illico bound --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a9ded1fe38..e75d22a0b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,7 +97,7 @@ scanorama = [ "scanorama" ] scrublet = [ "scikit-image>=0.23.1" ] # highly_variable_genes method 'seurat_v3' skmisc = [ "scikit-misc>=0.5.1" ] -illico = [ "illico @ git+https://github.com/ilan-gold/illico.git@ig/illico_low_bounds" ] +illico = [ "illico>=0.5.0rc2" ] scanpy2 = [ "igraph>=0.10.8", "scikit-misc>=0.5.1" ] [dependency-groups] From 9fe02a3da0aa5ccd8dfc69c3bb819532fb5da352 Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Tue, 21 Apr 2026 14:02:51 +0200 Subject: [PATCH 23/23] fix: re-disallow direct references --- hatch.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/hatch.toml b/hatch.toml index 44bb08494c..9ca489ac55 100644 --- a/hatch.toml +++ b/hatch.toml @@ -46,6 +46,3 @@ overrides.matrix.deps.dependency-groups = [ [[envs.hatch-test.matrix]] deps = [ "stable", "pre", "low-vers", "few-extras" ] - -[metadata] -allow-direct-references = true