From c5591c8d5bcfb3bcafcda7aaf7ea56466ca818f2 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 7 Apr 2026 16:00:55 +0200 Subject: [PATCH 01/11] feat: allow exponentiation post agg for log-fold-change --- src/scanpy/_settings/presets.py | 7 ++++-- src/scanpy/tools/_rank_genes_groups.py | 31 +++++++++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 697c55e765..9a6e0488e6 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -63,6 +63,7 @@ class PcaPreset(NamedTuple): class RankGenesGroupsPreset(NamedTuple): method: DETest mask_var: str | None + exp_post_agg: bool class ScalePreset(NamedTuple): @@ -167,9 +168,11 @@ def pca() -> Mapping[Preset, PcaPreset]: def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: """Correlation method for :func:`~scanpy.tl.rank_genes_groups`.""" return { - Preset.ScanpyV1: RankGenesGroupsPreset(method="t-test", mask_var=None), + Preset.ScanpyV1: RankGenesGroupsPreset( + method="t-test", mask_var=None, exp_post_agg=True + ), Preset.ScanpyV2Preview: RankGenesGroupsPreset( - method="wilcoxon", mask_var=None + method="wilcoxon", mask_var=None, exp_post_agg=False ), } diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 02d2e3ebad..aec02c4b2a 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -201,7 +201,7 @@ def __init__( self.grouping_mask = adata.obs[groupby].isin(self.groups_order) self.grouping = adata.obs.loc[self.grouping_mask, groupby] - def _basic_stats(self) -> None: + def _basic_stats(self, *, exponentiate_values: bool = False) -> None: """Set self.{means,vars,pts}{,_rest} depending on X.""" n_genes = self.X.shape[1] n_groups = self.groups_masks_obs.shape[0] @@ -217,6 +217,8 @@ def _basic_stats(self) -> None: else: mask_rest = self.groups_masks_obs[self.ireference] x_rest = self.X[mask_rest] + if exponentiate_values: + x_rest = self.expm1_func(x_rest) self.means[self.ireference], self.vars[self.ireference] = mean_var( x_rest, axis=0, correction=1 ) @@ -230,6 +232,8 @@ def _basic_stats(self) -> None: for group_index, mask_obs in enumerate(self.groups_masks_obs): x_mask = self.X[mask_obs] + if exponentiate_values: + x_mask = self.expm1_func(x_mask) if self.comp_pts: self.pts[group_index] = get_nonzeros(x_mask) / x_mask.shape[0] @@ -244,6 +248,8 @@ def _basic_stats(self) -> None: if self.ireference is None: mask_rest = ~mask_obs x_rest = self.X[mask_rest] + if exponentiate_values: + x_rest = self.expm1_func(x_rest) ( self.means_rest[group_index], self.vars_rest[group_index], @@ -259,8 +265,6 @@ def t_test( ) -> Generator[tuple[int, NDArray[np.floating], NDArray[np.floating]], None, None]: from scipy import stats - self._basic_stats() - for group_index, (mask_obs, mean_group, var_group) in enumerate( zip(self.groups_masks_obs, self.means, self.vars, strict=True) ): @@ -312,8 +316,6 @@ def wilcoxon( ) -> Generator[tuple[int, NDArray[np.floating], NDArray[np.floating]], None, None]: from scipy import stats - self._basic_stats() - n_genes = self.X.shape[1] # First loop: Loop over all genes if self.ireference is not None: @@ -429,12 +431,16 @@ def compute_statistics( # noqa: PLR0912 n_genes_user: int | None = None, rankby_abs: bool = False, tie_correct: bool = False, + exp_post_agg: bool = True, **kwds, ) -> None: if method in {"t-test", "t-test_overestim_var"}: + self._basic_stats(exponentiate_values=False) generate_test_results = self.t_test(method) elif method == "wilcoxon": generate_test_results = self.wilcoxon(tie_correct=tie_correct) + # If we're not exponentiating after the mean aggregation, then do it now. + self._basic_stats(exponentiate_values=not exp_post_agg) elif method == "logreg": generate_test_results = self.logreg(**kwds) @@ -481,9 +487,12 @@ def compute_statistics( # noqa: PLR0912 mean_rest = self.means_rest[group_index] else: mean_rest = self.means[self.ireference] - foldchanges = (self.expm1_func(mean_group) + 1e-9) / ( - self.expm1_func(mean_rest) + 1e-9 - ) # add small value to remove 0's + if exp_post_agg: + foldchanges = (self.expm1_func(mean_group) + 1e-9) / ( + self.expm1_func(mean_rest) + 1e-9 + ) # add small value to remove 0's + else: + foldchanges = (mean_group + 1e-9) / (mean_rest + 1e-9) self.stats[group_name, "logfoldchanges"] = np.log2( foldchanges[global_indices] ) @@ -511,6 +520,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 corr_method: _CorrMethod = "benjamini-hochberg", tie_correct: bool = False, layer: str | None = None, + exp_post_agg: bool = Default(preset=("rank_genes_groups", "exp_post_agg")), **kwds, ) -> AnnData | None: """Rank genes for characterizing groups. @@ -574,6 +584,8 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 The key in `adata.uns` information is saved to. copy Whether to copy `adata` or modify it inplace. + exp_post_agg + Whether to do log(mean(exp(values))) (`False`) or log(exp(mean(values))) (`True`) kwds Are passed to test methods. Currently this affects only parameters that are passed to :class:`sklearn.linear_model.LogisticRegression`. @@ -626,6 +638,8 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 if isinstance(mask_var, Default): mask_var = settings.preset.rank_genes_groups.mask_var + if isinstance(exp_post_agg, Default): + exp_post_agg = settings.preset.rank_genes_groups.exp_post_agg if method is None or isinstance(method, Default): method = settings.preset.rank_genes_groups.method @@ -714,6 +728,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 n_genes_user=n_genes_user, rankby_abs=rankby_abs, tie_correct=tie_correct, + exp_post_agg=exp_post_agg, **kwds, ) From d0f0db0193f89fc072a96532f75d9e6598eca34a Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 14 Apr 2026 16:28:02 +0200 Subject: [PATCH 02/11] chore: add explicit test --- tests/test_rank_genes_groups.py | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index ba38ffc94d..6f43e1e1de 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -311,3 +311,42 @@ def test_mask_not_equal(): with_mask = pbmc.uns["rank_genes_groups"]["names"] assert not np.array_equal(no_mask, with_mask) + + +@pytest.mark.parametrize( + ("exp_post_agg", "expected_logfc"), + [ + # exp after agg: log2(expm1(mean_log_a) / expm1(mean_log_b)) + # = log2(expm1(ln(9) * 5 / 10) / expm1(ln9)) = log2(2 / 8) = -2.0 + (True, -2.0), + # exp before agg: log2(mean(expm1(linear_a)) / mean(expm1(linear_b))) + # = log2(mean([0] * 5 + [8] * 5) / mean([8] * 10)) = log2(4 / 8) = -1.0 + (False, -1.0), + ], +) +def test_exp_post_agg( + expected_logfc: float, + *, + exp_post_agg: bool, +): + # group_a: 5 cells with log-space value 0, 5 cells with log(9) + # group_b: 10 cells all with log(9) (used as reference) + n_genes = 5 + group_a = np.zeros((10, n_genes)) + group_a[5:] = np.log(9) + group_b = np.full((10, n_genes), np.log(9)) + adata = AnnData( + X=np.concatenate([group_a, group_b]), + obs={"bulk_labels": ["a"] * 10 + ["b"] * 10}, + ) + + rank_genes_groups( + adata, + groupby="bulk_labels", + groups=["a"], + reference="b", + method="wilcoxon", + exp_post_agg=exp_post_agg, + ) + logfcs = adata.uns["rank_genes_groups"]["logfoldchanges"]["a"] + np.testing.assert_equal(logfcs, expected_logfc) From 53a2f74912147543d5531343dd570ecd0d36d466 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 14 Apr 2026 16:33:17 +0200 Subject: [PATCH 03/11] chore: relnote --- docs/release-notes/4037.feat.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/release-notes/4037.feat.md diff --git a/docs/release-notes/4037.feat.md b/docs/release-notes/4037.feat.md new file mode 100644 index 0000000000..a84539ecec --- /dev/null +++ b/docs/release-notes/4037.feat.md @@ -0,0 +1 @@ +Add `exp_post_agg` argument to {func}`scanpy.tl.rank_genes_groups` for customizing how log-fold-change is calculated {user}`ilan-gold` From 7373f55c7689963607fd741bf81a49d43d001ece Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 16 Apr 2026 14:47:01 +0200 Subject: [PATCH 04/11] move comment to correct location --- src/scanpy/tools/_rank_genes_groups.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index aec02c4b2a..6e23bdeb80 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -487,10 +487,11 @@ def compute_statistics( # noqa: PLR0912 mean_rest = self.means_rest[group_index] else: mean_rest = self.means[self.ireference] + # add small value to avoid zeros if exp_post_agg: foldchanges = (self.expm1_func(mean_group) + 1e-9) / ( self.expm1_func(mean_rest) + 1e-9 - ) # add small value to remove 0's + ) else: foldchanges = (mean_group + 1e-9) / (mean_rest + 1e-9) self.stats[group_name, "logfoldchanges"] = np.log2( From 739fffbf7afe94fc9022ec99eba4ca3f7821f993 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 16 Apr 2026 15:07:23 +0200 Subject: [PATCH 05/11] add explanation --- src/scanpy/_settings/presets.py | 2 +- src/scanpy/tools/_rank_genes_groups.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 9a6e0488e6..5e0ef02cd7 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -48,7 +48,7 @@ def __repr__(self) -> str: func, param = self.preset params = getattr(sc.settings.preset, func) value = getattr(params, param) - return f"{value!r} ({sc.settings.preset=} – changes in 2.0)" + return f"{value!r} (sc.settings.preset={str(sc.settings.preset)!r} – changes in 2.0)" class HVGPreset(NamedTuple): diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 6e23bdeb80..d469ee9e3c 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -524,7 +524,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 exp_post_agg: bool = Default(preset=("rank_genes_groups", "exp_post_agg")), **kwds, ) -> AnnData | None: - """Rank genes for characterizing groups. + r"""Rank genes for characterizing groups. Expects logarithmized data. @@ -586,7 +586,10 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 copy Whether to copy `adata` or modify it inplace. exp_post_agg - Whether to do log(mean(exp(values))) (`False`) or log(exp(mean(values))) (`True`) + Whether to do :math:`\log(\operatorname{mean}(e^x))` (`False`) + or :math:`\log(e^{\operatorname{mean}(x)})` (`True`). + The former is accurate, while the latter is a faster approximation + that underestimates accurate result in the presence of many outliers. kwds Are passed to test methods. Currently this affects only parameters that are passed to :class:`sklearn.linear_model.LogisticRegression`. From db85c7c981e677f7b01e52a5f89846679508e56c Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 16 Apr 2026 15:08:33 +0200 Subject: [PATCH 06/11] typo --- src/scanpy/tools/_rank_genes_groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index d469ee9e3c..f6af51e174 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -589,7 +589,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 Whether to do :math:`\log(\operatorname{mean}(e^x))` (`False`) or :math:`\log(e^{\operatorname{mean}(x)})` (`True`). The former is accurate, while the latter is a faster approximation - that underestimates accurate result in the presence of many outliers. + that underestimates this accurate result in the presence of many outliers. kwds Are passed to test methods. Currently this affects only parameters that are passed to :class:`sklearn.linear_model.LogisticRegression`. From 92a295392accbc580e3589d572c4b7fc65a78b5f Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 16 Apr 2026 15:12:16 +0200 Subject: [PATCH 07/11] fix type --- src/scanpy/tools/_rank_genes_groups.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index f6af51e174..770efd29a3 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -521,7 +521,9 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 corr_method: _CorrMethod = "benjamini-hochberg", tie_correct: bool = False, layer: str | None = None, - exp_post_agg: bool = Default(preset=("rank_genes_groups", "exp_post_agg")), + exp_post_agg: bool | Default = Default( + preset=("rank_genes_groups", "exp_post_agg") + ), **kwds, ) -> AnnData | None: r"""Rank genes for characterizing groups. From f66ca68fa5f0e312a307569062ca7b6f66b3db80 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 16 Apr 2026 15:28:57 +0200 Subject: [PATCH 08/11] note --- src/scanpy/tools/_rank_genes_groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 770efd29a3..7c39429479 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -614,7 +614,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 Structured array to be indexed by group id storing the log2 fold change for each gene for each group. Ordered according to scores. Only provided if method is 't-test' like. - Note: this is an approximation calculated from mean-log values. + Note: if `exp_post_agg=True`, this is an approximation calculated from mean-log values. `adata.uns['rank_genes_groups' | key_added]['pvals']` : structured :class:`numpy.ndarray` (dtype `float`) p-values. `adata.uns['rank_genes_groups' | key_added]['pvals_adj']` : structured :class:`numpy.ndarray` (dtype `float`) From 9a620535f73b01477000c08868bf309f77a9a640 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 16 Apr 2026 15:33:29 +0200 Subject: [PATCH 09/11] ternary --- src/scanpy/tools/_rank_genes_groups.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 7c39429479..5ec7c61549 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -487,13 +487,12 @@ def compute_statistics( # noqa: PLR0912 mean_rest = self.means_rest[group_index] else: mean_rest = self.means[self.ireference] - # add small value to avoid zeros - if exp_post_agg: - foldchanges = (self.expm1_func(mean_group) + 1e-9) / ( - self.expm1_func(mean_rest) + 1e-9 - ) - else: - foldchanges = (mean_group + 1e-9) / (mean_rest + 1e-9) + foldchanges = ( + (self.expm1_func(mean_group) + 1e-9) + / (self.expm1_func(mean_rest) + 1e-9) + if exp_post_agg + else (mean_group + 1e-9) / (mean_rest + 1e-9) + ) # add small value to avoid zeros self.stats[group_name, "logfoldchanges"] = np.log2( foldchanges[global_indices] ) From 0b319b8897cd76b67dea4ec294ec7610caf66dc1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 17 Apr 2026 11:46:53 +0200 Subject: [PATCH 10/11] fix: LFC unfiorm --- src/scanpy/tools/_rank_genes_groups.py | 3 +++ tests/test_rank_genes_groups.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 5ec7c61549..2e8807c422 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -437,6 +437,9 @@ def compute_statistics( # noqa: PLR0912 if method in {"t-test", "t-test_overestim_var"}: self._basic_stats(exponentiate_values=False) generate_test_results = self.t_test(method) + if not exp_post_agg: + # If we are not exponentiating after the mean aggregation, we need to recalculate the stats. + self._basic_stats(exponentiate_values=True) elif method == "wilcoxon": generate_test_results = self.wilcoxon(tie_correct=tie_correct) # If we're not exponentiating after the mean aggregation, then do it now. diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 6f43e1e1de..dd6297d509 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -324,8 +324,10 @@ def test_mask_not_equal(): (False, -1.0), ], ) +@pytest.mark.parametrize("method", ["wilcoxon", "t-test", "t-test_overestim_var"]) def test_exp_post_agg( expected_logfc: float, + method: Literal["wilcoxon", "t-test", "t-test_overestim_var"], *, exp_post_agg: bool, ): @@ -345,7 +347,7 @@ def test_exp_post_agg( groupby="bulk_labels", groups=["a"], reference="b", - method="wilcoxon", + method=method, exp_post_agg=exp_post_agg, ) logfcs = adata.uns["rank_genes_groups"]["logfoldchanges"]["a"] From 6aca9ca1a63dc3c2c2d253c1d9579db43e8c9bb5 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 22 Apr 2026 11:29:53 +0200 Subject: [PATCH 11/11] chore: `mean_in_log_space` instead of `exp_post_agg` --- docs/release-notes/4037.feat.md | 2 +- src/scanpy/_settings/presets.py | 6 +++--- src/scanpy/tools/_rank_genes_groups.py | 22 +++++++++++----------- tests/test_rank_genes_groups.py | 8 ++++---- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/release-notes/4037.feat.md b/docs/release-notes/4037.feat.md index a84539ecec..2fbcfeb684 100644 --- a/docs/release-notes/4037.feat.md +++ b/docs/release-notes/4037.feat.md @@ -1 +1 @@ -Add `exp_post_agg` argument to {func}`scanpy.tl.rank_genes_groups` for customizing how log-fold-change is calculated {user}`ilan-gold` +Add `mean_in_log_space` argument to {func}`scanpy.tl.rank_genes_groups` for customizing how log-fold-change is calculated {user}`ilan-gold` diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 1c82cfec4b..f7a243d61a 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -81,7 +81,7 @@ class PcaPreset(NamedTuple): class RankGenesGroupsPreset(NamedTuple): method: DETest mask_var: str | None - exp_post_agg: bool + mean_in_log_space: bool class ScalePreset(NamedTuple): @@ -187,10 +187,10 @@ def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: """Correlation method for :func:`~scanpy.tl.rank_genes_groups`.""" return { Preset.ScanpyV1: RankGenesGroupsPreset( - method="t-test", mask_var=None, exp_post_agg=True + method="t-test", mask_var=None, mean_in_log_space=True ), Preset.ScanpyV2Preview: RankGenesGroupsPreset( - method="wilcoxon", mask_var=None, exp_post_agg=False + method="wilcoxon", mask_var=None, mean_in_log_space=False ), } diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 2e8807c422..d8e78708dd 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -431,19 +431,19 @@ def compute_statistics( # noqa: PLR0912 n_genes_user: int | None = None, rankby_abs: bool = False, tie_correct: bool = False, - exp_post_agg: bool = True, + mean_in_log_space: bool = True, **kwds, ) -> None: if method in {"t-test", "t-test_overestim_var"}: self._basic_stats(exponentiate_values=False) generate_test_results = self.t_test(method) - if not exp_post_agg: + if not mean_in_log_space: # If we are not exponentiating after the mean aggregation, we need to recalculate the stats. self._basic_stats(exponentiate_values=True) elif method == "wilcoxon": generate_test_results = self.wilcoxon(tie_correct=tie_correct) # If we're not exponentiating after the mean aggregation, then do it now. - self._basic_stats(exponentiate_values=not exp_post_agg) + self._basic_stats(exponentiate_values=not mean_in_log_space) elif method == "logreg": generate_test_results = self.logreg(**kwds) @@ -493,7 +493,7 @@ def compute_statistics( # noqa: PLR0912 foldchanges = ( (self.expm1_func(mean_group) + 1e-9) / (self.expm1_func(mean_rest) + 1e-9) - if exp_post_agg + if mean_in_log_space else (mean_group + 1e-9) / (mean_rest + 1e-9) ) # add small value to avoid zeros self.stats[group_name, "logfoldchanges"] = np.log2( @@ -523,8 +523,8 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 corr_method: _CorrMethod = "benjamini-hochberg", tie_correct: bool = False, layer: str | None = None, - exp_post_agg: bool | Default = Default( - preset=("rank_genes_groups", "exp_post_agg") + mean_in_log_space: bool | Default = Default( + preset=("rank_genes_groups", "mean_in_log_space") ), **kwds, ) -> AnnData | None: @@ -589,7 +589,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 The key in `adata.uns` information is saved to. copy Whether to copy `adata` or modify it inplace. - exp_post_agg + mean_in_log_space Whether to do :math:`\log(\operatorname{mean}(e^x))` (`False`) or :math:`\log(e^{\operatorname{mean}(x)})` (`True`). The former is accurate, while the latter is a faster approximation @@ -616,7 +616,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 Structured array to be indexed by group id storing the log2 fold change for each gene for each group. Ordered according to scores. Only provided if method is 't-test' like. - Note: if `exp_post_agg=True`, this is an approximation calculated from mean-log values. + Note: if `mean_in_log_space=True`, this is an approximation calculated from mean-log values. `adata.uns['rank_genes_groups' | key_added]['pvals']` : structured :class:`numpy.ndarray` (dtype `float`) p-values. `adata.uns['rank_genes_groups' | key_added]['pvals_adj']` : structured :class:`numpy.ndarray` (dtype `float`) @@ -646,8 +646,8 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 if isinstance(mask_var, Default): mask_var = settings.preset.rank_genes_groups.mask_var - if isinstance(exp_post_agg, Default): - exp_post_agg = settings.preset.rank_genes_groups.exp_post_agg + if isinstance(mean_in_log_space, Default): + mean_in_log_space = settings.preset.rank_genes_groups.mean_in_log_space if method is None or isinstance(method, Default): method = settings.preset.rank_genes_groups.method @@ -736,7 +736,7 @@ def rank_genes_groups( # noqa: PLR0912, PLR0913, PLR0915 n_genes_user=n_genes_user, rankby_abs=rankby_abs, tie_correct=tie_correct, - exp_post_agg=exp_post_agg, + mean_in_log_space=mean_in_log_space, **kwds, ) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index dd6297d509..a60f914e2d 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -314,7 +314,7 @@ def test_mask_not_equal(): @pytest.mark.parametrize( - ("exp_post_agg", "expected_logfc"), + ("mean_in_log_space", "expected_logfc"), [ # exp after agg: log2(expm1(mean_log_a) / expm1(mean_log_b)) # = log2(expm1(ln(9) * 5 / 10) / expm1(ln9)) = log2(2 / 8) = -2.0 @@ -325,11 +325,11 @@ def test_mask_not_equal(): ], ) @pytest.mark.parametrize("method", ["wilcoxon", "t-test", "t-test_overestim_var"]) -def test_exp_post_agg( +def test_mean_in_log_space( expected_logfc: float, method: Literal["wilcoxon", "t-test", "t-test_overestim_var"], *, - exp_post_agg: bool, + mean_in_log_space: bool, ): # group_a: 5 cells with log-space value 0, 5 cells with log(9) # group_b: 10 cells all with log(9) (used as reference) @@ -348,7 +348,7 @@ def test_exp_post_agg( groups=["a"], reference="b", method=method, - exp_post_agg=exp_post_agg, + mean_in_log_space=mean_in_log_space, ) logfcs = adata.uns["rank_genes_groups"]["logfoldchanges"]["a"] np.testing.assert_equal(logfcs, expected_logfc)