From 4a3348d5a53d6ef6b60e229adbd3af9f318fa83f Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Sun, 8 Mar 2026 00:01:17 +0530
Subject: [PATCH 1/2] adding RATE metric for CATE evaluators

---
 causalml/metrics/__init__.py |   1 +
 causalml/metrics/rate.py     | 275 +++++++++++++++++++++++++++++++++++
 tests/test_rate.py           | 136 +++++++++++++++++
 3 files changed, 412 insertions(+)
 create mode 100644 causalml/metrics/rate.py
 create mode 100644 tests/test_rate.py

diff --git a/causalml/metrics/__init__.py b/causalml/metrics/__init__.py
index 0bc9d187..e382bc43 100644
--- a/causalml/metrics/__init__.py
+++ b/causalml/metrics/__init__.py
@@ -25,6 +25,7 @@
     get_tmleqini,
 )  # noqa
 from .visualize import auuc_score, qini_score  # noqa
+from .rate import get_toc, rate_score, plot_toc  # noqa
 from .sensitivity import Sensitivity, SensitivityPlaceboTreatment  # noqa
 from .sensitivity import (
     SensitivityRandomCause,
diff --git a/causalml/metrics/rate.py b/causalml/metrics/rate.py
new file mode 100644
index 00000000..adc027db
--- /dev/null
+++ b/causalml/metrics/rate.py
@@ -0,0 +1,275 @@
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+import logging
+
+plt.style.use("fivethirtyeight")
+RANDOM_COL = "Random"
+
+logger = logging.getLogger("causalml")
+
+
+def get_toc(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=False,
+    random_seed=42,
+):
+    """Get the Targeting Operator Characteristic (TOC) of model estimates in population.
+
+    TOC(q) is the difference between the ATE among the top-q fraction of units ranked
+    by the prioritization score and the overall ATE. A positive TOC at low q indicates
+    the model successfully identifies units with above-average treatment benefit.
+
+    If the true treatment effect is provided (e.g. in synthetic data), it's used directly
+    to calculate TOC. Otherwise, it's estimated as the difference between the mean outcomes
+    of the treatment and control groups in each quantile band.
+
+    For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules
+    via Rank-Weighted Average Treatment Effects`.
+
+    For the former, `treatment_effect_col` should be provided. For the latter, both
+    `outcome_col` and `treatment_col` should be provided.
+
+    Args:
+        df (pandas.DataFrame): a data frame with model estimates and actual data as columns
+        outcome_col (str, optional): the column name for the actual outcome
+        treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
+        treatment_effect_col (str, optional): the column name for the true treatment effect
+        normalize (bool, optional): whether to normalize the y-axis to 1 or not
+        random_seed (int, optional): deprecated
+
+    Returns:
+        (pandas.DataFrame): TOC values of model estimates in population, indexed by quantile q
+    """
+    assert (
+        (outcome_col in df.columns and df[outcome_col].notnull().all())
+        and (treatment_col in df.columns and df[treatment_col].notnull().all())
+        or (
+            treatment_effect_col in df.columns
+            and df[treatment_effect_col].notnull().all()
+        )
+    ), "{outcome_col} and {treatment_col}, or {treatment_effect_col} should be present without null.".format(
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        treatment_effect_col=treatment_effect_col,
+    )
+
+    df = df.copy()
+
+    model_names = [
+        x
+        for x in df.columns
+        if x not in [outcome_col, treatment_col, treatment_effect_col]
+    ]
+
+    use_oracle = (
+        treatment_effect_col in df.columns and df[treatment_effect_col].notnull().all()
+    )
+
+    if use_oracle:
+        overall_ate = df[treatment_effect_col].mean()
+    else:
+        treated = df[treatment_col] == 1
+        overall_ate = (
+            df.loc[treated, outcome_col].mean() - df.loc[~treated, outcome_col].mean()
+        )
+
+    n_total = len(df)
+    quantiles = np.linspace(0, 1, n_total + 1)[1:]
+
+    toc = []
+    for col in model_names:
+        sorted_df = df.sort_values(col, ascending=False).reset_index(drop=True)
+        toc_values = []
+
+        for q in quantiles:
+            top_k = max(1, int(np.ceil(q * n_total)))
+            top_subset = sorted_df.iloc[:top_k]
+
+            if use_oracle:
+                subset_ate = top_subset[treatment_effect_col].mean()
+            else:
+                t_mask = top_subset[treatment_col] == 1
+                c_mask = top_subset[treatment_col] == 0
+                if t_mask.sum() == 0 or c_mask.sum() == 0:
+                    subset_ate = overall_ate
+                else:
+                    subset_ate = (
+                        top_subset.loc[t_mask, outcome_col].mean()
+                        - top_subset.loc[c_mask, outcome_col].mean()
+                    )
+
+            toc_values.append(subset_ate - overall_ate)
+
+        toc.append(pd.Series(toc_values, index=quantiles))
+
+    toc = pd.concat(toc, join="inner", axis=1)
+    toc.loc[0] = np.zeros((toc.shape[1],))
+    toc = toc.sort_index().interpolate()
+    toc.columns = model_names
+    toc.index.name = "q"
+
+    if normalize:
+        toc = toc.div(np.abs(toc.iloc[-1, :]), axis=1)
+
+    return toc
+
+
+def rate_score(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    weighting="autoc",
+    normalize=False,
+    random_seed=42,
+):
+    """Calculate the Rank-weighted Average Treatment Effect (RATE) score.
+
+    RATE is the weighted area under the Targeting Operator Characteristic (TOC) curve:
+
+        RATE = integral_0^1 alpha(q) * TOC(q) dq
+
+    Two standard weighting schemes are supported (Yadlowsky et al., 2021):
+
+    - ``"autoc"``: alpha(q) = 1/q. Places more weight on the highest-priority units.
+      Most powerful when treatment effects are concentrated in a small subgroup.
+
+    - ``"qini"``: alpha(q) = q. Uniform weighting across units; reduces to the Qini
+      coefficient. More powerful when treatment effects are diffuse across the population.
+
+    A positive RATE indicates the prioritization rule effectively identifies units with
+    above-average treatment benefit. A RATE near zero suggests little heterogeneity or
+    a poor prioritization rule.
+
+    For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules
+    via Rank-Weighted Average Treatment Effects`. https://arxiv.org/abs/2111.07966
+
+    For the former, `treatment_effect_col` should be provided. For the latter, both
+    `outcome_col` and `treatment_col` should be provided.
+
+    Args:
+        df (pandas.DataFrame): a data frame with model estimates and actual data as columns
+        outcome_col (str, optional): the column name for the actual outcome
+        treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
+        treatment_effect_col (str, optional): the column name for the true treatment effect
+        weighting (str, optional): the weighting scheme for the RATE integral.
+            One of ``"autoc"`` (default) or ``"qini"``.
+        normalize (bool, optional): whether to normalize the TOC curve before scoring
+        random_seed (int, optional): deprecated
+
+    Returns:
+        (pandas.Series): RATE scores of model estimates
+    """
+    assert weighting in (
+        "autoc",
+        "qini",
+    ), "{} weighting is not implemented. Select one of {}".format(
+        weighting, ("autoc", "qini")
+    )
+
+    toc = get_toc(
+        df,
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        treatment_effect_col=treatment_effect_col,
+        normalize=normalize,
+    )
+
+    quantiles = toc.index.values  # shape (n,), 0 included
+
+    # Use midpoints to avoid division by zero for autoc at q=0
+    q_mid = (quantiles[:-1] + quantiles[1:]) / 2
+    toc_mid = (toc.iloc[:-1].values + toc.iloc[1:].values) / 2
+
+    if weighting == "autoc":
+        weights = 1.0 / q_mid
+    else:
+        weights = q_mid
+
+    # Normalize weights so they sum to 1 over the integration domain
+    weights = weights / weights.sum()
+
+    rate = pd.Series(
+        np.average(toc_mid, axis=0, weights=weights),
+        index=toc.columns,
+    )
+    rate.name = "RATE ({})".format(weighting)
+    return rate
+
+
+def plot_toc(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=False,
+    random_seed=42,
+    n=100,
+    figsize=(8, 8),
+    ax: Optional[plt.Axes] = None,
+) -> plt.Axes:
+    """Plot the Targeting Operator Characteristic (TOC) curve of model estimates.
+
+    The TOC(q) shows the excess ATE when treating only the top-q fraction of units
+    prioritized by a model score, relative to the overall ATE. A positive and steeply
+    decreasing curve indicates the model effectively ranks high-benefit units first.
+
+    If the true treatment effect is provided (e.g. in synthetic data), it's used directly.
+    Otherwise, it's estimated from observed outcomes and treatment assignments.
+
+    For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules
+    via Rank-Weighted Average Treatment Effects`. https://arxiv.org/abs/2111.07966
+
+    For the former, `treatment_effect_col` should be provided. For the latter, both
+    `outcome_col` and `treatment_col` should be provided.
+
+    Args:
+        df (pandas.DataFrame): a data frame with model estimates and actual data as columns
+        outcome_col (str, optional): the column name for the actual outcome
+        treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
+        treatment_effect_col (str, optional): the column name for the true treatment effect
+        normalize (bool, optional): whether to normalize the y-axis to 1 or not
+        random_seed (int, optional): deprecated
+        n (int, optional): the number of samples to be used for plotting
+        figsize (tuple, optional): the size of the figure to plot
+        ax (plt.Axes, optional): an existing axes object to draw on
+
+    Returns:
+        (plt.Axes): the matplotlib Axes with the TOC plot
+    """
+    toc = get_toc(
+        df,
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        treatment_effect_col=treatment_effect_col,
+        normalize=normalize,
+    )
+
+    if (n is not None) and (n < toc.shape[0]):
+        toc = toc.iloc[np.linspace(0, len(toc) - 1, n, endpoint=True).astype(int)]
+
+    if ax is None:
+        _, ax = plt.subplots(figsize=figsize)
+
+    ax = toc.plot(ax=ax)
+
+    # Random baseline (TOC = 0 everywhere)
+    ax.plot(
+        [toc.index[0], toc.index[-1]],
+        [0, 0],
+        label=RANDOM_COL,
+        color="k",
+        linestyle="--",
+    )
+    ax.legend()
+    ax.set_xlabel("Fraction treated (q)")
+    ax.set_ylabel("TOC(q)")
+    ax.set_title("Targeting Operator Characteristic (TOC)")
+
+    return ax
diff --git a/tests/test_rate.py b/tests/test_rate.py
new file mode 100644
index 00000000..bf507c11
--- /dev/null
+++ b/tests/test_rate.py
@@ -0,0 +1,136 @@
+from matplotlib import pyplot as plt
+import numpy as np
+import pandas as pd
+import pytest
+
+from causalml.metrics.rate import get_toc, rate_score, plot_toc
+
+
+@pytest.fixture
+def synthetic_df():
+    rng = np.random.default_rng(42)
+    n = 500
+    tau = rng.normal(1.0, 0.5, n)
+    return pd.DataFrame(
+        {
+            "y": rng.normal(0, 1, n),
+            "w": rng.integers(0, 2, n),
+            "tau": tau,
+            "perfect_model": tau,
+            "random_model": rng.normal(0, 1, n),
+        }
+    )
+
+
+@pytest.fixture
+def rct_df():
+    rng = np.random.default_rng(0)
+    n = 2000
+    tau = rng.uniform(0, 2, n)
+    w = rng.integers(0, 2, n)
+    y = tau * w + rng.normal(0, 1, n)
+    return pd.DataFrame(
+        {
+            "y": y,
+            "w": w,
+            "score": tau + rng.normal(0, 0.1, n),
+        }
+    )
+
+
+def test_get_toc_errors_on_nan():
+    df = pd.DataFrame(
+        [[0, np.nan, 0.5], [1, np.nan, 0.1], [1, 1, 0.4], [0, 1, 0.3], [1, 1, 0.2]],
+        columns=["w", "y", "pred"],
+    )
+
+    with pytest.raises(Exception):
+        get_toc(df)
+
+
+def test_get_toc_returns_dataframe(synthetic_df):
+    toc = get_toc(synthetic_df, treatment_effect_col="tau")
+    assert isinstance(toc, pd.DataFrame)
+
+
+def test_get_toc_index_bounds(synthetic_df):
+    toc = get_toc(synthetic_df, treatment_effect_col="tau")
+    assert toc.index[0] == 0.0
+    assert toc.index[-1] == 1.0
+
+
+def test_get_toc_starts_at_zero(synthetic_df):
+    toc = get_toc(synthetic_df, treatment_effect_col="tau")
+    assert np.allclose(toc.iloc[0].values, 0.0)
+
+
+def test_get_toc_model_cols_present(synthetic_df):
+    toc = get_toc(synthetic_df, treatment_effect_col="tau")
+    assert "perfect_model" in toc.columns
+    assert "random_model" in toc.columns
+
+
+def test_get_toc_perfect_model_positive_at_low_q(synthetic_df):
+    toc = get_toc(synthetic_df, treatment_effect_col="tau")
+    assert toc.loc[toc.index <= 0.15, "perfect_model"].mean() > 0
+
+
+def test_get_toc_observed_outcomes(rct_df):
+    toc = get_toc(rct_df, outcome_col="y", treatment_col="w")
+    assert isinstance(toc, pd.DataFrame)
+    assert "score" in toc.columns
+
+
+def test_rate_score_returns_series(synthetic_df):
+    result = rate_score(synthetic_df, treatment_effect_col="tau")
+    assert isinstance(result, pd.Series)
+
+
+def test_rate_score_autoc_perfect_model_positive(synthetic_df):
+    scores = rate_score(synthetic_df, treatment_effect_col="tau", weighting="autoc")
+    assert scores["perfect_model"] > 0
+
+
+def test_rate_score_qini_perfect_model_positive(synthetic_df):
+    scores = rate_score(synthetic_df, treatment_effect_col="tau", weighting="qini")
+    assert scores["perfect_model"] > 0
+
+
+def test_rate_score_perfect_beats_random(synthetic_df):
+    scores = rate_score(synthetic_df, treatment_effect_col="tau", weighting="autoc")
+    assert scores["perfect_model"] > scores["random_model"]
+
+
+def test_rate_score_invalid_weighting_raises(synthetic_df):
+    with pytest.raises(AssertionError):
+        rate_score(synthetic_df, treatment_effect_col="tau", weighting="invalid")
+
+
+def test_rate_score_constant_tau_near_zero():
+    rng = np.random.default_rng(99)
+    n = 2000
+    df = pd.DataFrame(
+        {
+            "tau": np.ones(n),
+            "random_score": rng.normal(0, 1, n),
+        }
+    )
+    scores = rate_score(df, treatment_effect_col="tau", weighting="autoc")
+    assert abs(scores["random_score"]) < 0.2
+
+
+def test_rate_score_all_finite(synthetic_df):
+    scores = rate_score(synthetic_df, treatment_effect_col="tau")
+    assert np.isfinite(scores.values).all()
+
+
+def test_rate_score_observed_outcomes(rct_df):
+    scores = rate_score(rct_df, outcome_col="y", treatment_col="w")
+    assert isinstance(scores, pd.Series)
+    assert np.isfinite(scores.values).all()
+
+
+def test_plot_toc(synthetic_df, monkeypatch):
+    monkeypatch.setattr(plt, "show", lambda: None)
+    ax = plot_toc(synthetic_df, treatment_effect_col="tau")
+    assert isinstance(ax, plt.Axes)

From 30eaebdd0a557914d388e44dc3cb98a3fcee7526 Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Sat, 14 Mar 2026 13:59:15 +0530
Subject: [PATCH 2/2] addressing suggested changes

---
 causalml/metrics/rate.py | 92 ++++++++++++++++++++++++++--------------
 tests/test_rate.py       | 33 ++++++++++++--
 2 files changed, 88 insertions(+), 37 deletions(-)

diff --git a/causalml/metrics/rate.py b/causalml/metrics/rate.py
index adc027db..8af2e91f 100644
--- a/causalml/metrics/rate.py
+++ b/causalml/metrics/rate.py
@@ -17,7 +17,6 @@ def get_toc(
     treatment_col="w",
     treatment_effect_col="tau",
     normalize=False,
-    random_seed=42,
 ):
     """Get the Targeting Operator Characteristic (TOC) of model estimates in population.
 
@@ -25,12 +24,19 @@ def get_toc(
     by the prioritization score and the overall ATE. A positive TOC at low q indicates
     the model successfully identifies units with above-average treatment benefit.
 
+    By definition, TOC(0) = 0 and TOC(1) = 0 (the subset ATE equals the overall ATE
+    when the entire population is selected).
+
     If the true treatment effect is provided (e.g. in synthetic data), it's used directly
     to calculate TOC. Otherwise, it's estimated as the difference between the mean outcomes
     of the treatment and control groups in each quantile band.
 
+    Note: when using observed outcomes, if a quantile band contains only treated or only
+    control units, the code falls back to TOC(q) = 0 for that band (i.e., subset ATE is
+    set to the overall ATE). This is a conservative approximation and is logged as a warning.
+
     For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules
-    via Rank-Weighted Average Treatment Effects`.
+    via Rank-Weighted Average Treatment Effects`. https://arxiv.org/abs/2111.07966
 
     For the former, `treatment_effect_col` should be provided. For the latter, both
     `outcome_col` and `treatment_col` should be provided.
@@ -40,8 +46,9 @@ def get_toc(
         outcome_col (str, optional): the column name for the actual outcome
         treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
         treatment_effect_col (str, optional): the column name for the true treatment effect
-        normalize (bool, optional): whether to normalize the y-axis to 1 or not
-        random_seed (int, optional): deprecated
+        normalize (bool, optional): whether to normalize the TOC curve by its maximum
+            absolute value. Uses max(|TOC|) as the reference to avoid division by zero
+            at q=1 where TOC is always zero by definition.
 
     Returns:
         (pandas.DataFrame): TOC values of model estimates in population, indexed by quantile q
@@ -80,33 +87,46 @@ def get_toc(
         )
 
     n_total = len(df)
-    quantiles = np.linspace(0, 1, n_total + 1)[1:]
 
     toc = []
     for col in model_names:
         sorted_df = df.sort_values(col, ascending=False).reset_index(drop=True)
-        toc_values = []
-
-        for q in quantiles:
-            top_k = max(1, int(np.ceil(q * n_total)))
-            top_subset = sorted_df.iloc[:top_k]
 
-            if use_oracle:
-                subset_ate = top_subset[treatment_effect_col].mean()
-            else:
-                t_mask = top_subset[treatment_col] == 1
-                c_mask = top_subset[treatment_col] == 0
-                if t_mask.sum() == 0 or c_mask.sum() == 0:
-                    subset_ate = overall_ate
-                else:
-                    subset_ate = (
-                        top_subset.loc[t_mask, outcome_col].mean()
-                        - top_subset.loc[c_mask, outcome_col].mean()
-                    )
-
-            toc_values.append(subset_ate - overall_ate)
-
-        toc.append(pd.Series(toc_values, index=quantiles))
+        if use_oracle:
+            # O(n) via cumulative sum
+            cumsum_tau = sorted_df[treatment_effect_col].cumsum().values
+            counts = np.arange(1, n_total + 1)
+            subset_ates = cumsum_tau / counts
+        else:
+            cumsum_tr = sorted_df[treatment_col].cumsum().values
+            cumsum_ct = np.arange(1, n_total + 1) - cumsum_tr
+            cumsum_y_tr = (
+                (sorted_df[outcome_col] * sorted_df[treatment_col]).cumsum().values
+            )
+            cumsum_y_ct = (
+                (sorted_df[outcome_col] * (1 - sorted_df[treatment_col]))
+                .cumsum()
+                .values
+            )
+
+            # Guard against division by zero when a band is all-treated or all-control;
+            # fall back to overall_ate (TOC = 0) for those positions.
+            with np.errstate(invalid="ignore", divide="ignore"):
+                subset_ates = np.where(
+                    (cumsum_tr == 0) | (cumsum_ct == 0),
+                    overall_ate,
+                    cumsum_y_tr / cumsum_tr - cumsum_y_ct / cumsum_ct,
+                )
+
+            if np.any((cumsum_tr == 0) | (cumsum_ct == 0)):
+                logger.warning(
+                    "Some quantile bands contain only treated or only control units "
+                    "for column '%s'. TOC is set to 0 for those positions.",
+                    col,
+                )
+
+        toc_values = subset_ates - overall_ate
+        toc.append(pd.Series(toc_values, index=np.linspace(0, 1, n_total + 1)[1:]))
 
     toc = pd.concat(toc, join="inner", axis=1)
     toc.loc[0] = np.zeros((toc.shape[1],))
@@ -115,7 +135,11 @@ def get_toc(
     toc.index.name = "q"
 
     if normalize:
-        toc = toc.div(np.abs(toc.iloc[-1, :]), axis=1)
+        # Normalize by max absolute value rather than the value at q=1, which is
+        # always zero by definition and would cause division by zero.
+        max_abs = toc.abs().max()
+        max_abs = max_abs.replace(0, 1)  # guard for flat TOC curves
+        toc = toc.div(max_abs, axis=1)
 
     return toc
 
@@ -127,7 +151,6 @@ def rate_score(
     treatment_effect_col="tau",
     weighting="autoc",
     normalize=False,
-    random_seed=42,
 ):
     """Calculate the Rank-weighted Average Treatment Effect (RATE) score.
 
@@ -147,6 +170,11 @@ def rate_score(
     above-average treatment benefit. A RATE near zero suggests little heterogeneity or
     a poor prioritization rule.
 
+    Note: the integral is approximated via a weighted mean over the discrete quantile grid
+    using midpoint values. Weights are normalized to sum to 1 (i.e. ``weights / weights.sum()``),
+    so the absolute scale matches the TOC values but may differ slightly from the paper's
+    continuous integral definition. Model rankings are preserved.
+
     For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules
     via Rank-Weighted Average Treatment Effects`. https://arxiv.org/abs/2111.07966
 
@@ -161,7 +189,6 @@ def rate_score(
         weighting (str, optional): the weighting scheme for the RATE integral.
             One of ``"autoc"`` (default) or ``"qini"``.
         normalize (bool, optional): whether to normalize the TOC curve before scoring
-        random_seed (int, optional): deprecated
 
     Returns:
         (pandas.Series): RATE scores of model estimates
@@ -181,7 +208,7 @@ def rate_score(
         normalize=normalize,
     )
 
-    quantiles = toc.index.values  # shape (n,), 0 included
+    quantiles = toc.index.values  # includes 0 and 1
 
     # Use midpoints to avoid division by zero for autoc at q=0
     q_mid = (quantiles[:-1] + quantiles[1:]) / 2
@@ -209,7 +236,6 @@ def plot_toc(
     treatment_col="w",
     treatment_effect_col="tau",
     normalize=False,
-    random_seed=42,
     n=100,
     figsize=(8, 8),
     ax: Optional[plt.Axes] = None,
@@ -234,8 +260,8 @@ def plot_toc(
         outcome_col (str, optional): the column name for the actual outcome
         treatment_col (str, optional): the column name for the treatment indicator (0 or 1)
         treatment_effect_col (str, optional): the column name for the true treatment effect
-        normalize (bool, optional): whether to normalize the y-axis to 1 or not
-        random_seed (int, optional): deprecated
+        normalize (bool, optional): whether to normalize the TOC curve by its maximum
+            absolute value before plotting
         n (int, optional): the number of samples to be used for plotting
         figsize (tuple, optional): the size of the figure to plot
         ax (plt.Axes, optional): an existing axes object to draw on
diff --git a/tests/test_rate.py b/tests/test_rate.py
index bf507c11..30585be0 100644
--- a/tests/test_rate.py
+++ b/tests/test_rate.py
@@ -5,10 +5,16 @@
 
 from causalml.metrics.rate import get_toc, rate_score, plot_toc
 
+# Use project-wide constant instead of hardcoded seed
+try:
+    from tests.const import RANDOM_SEED
+except ImportError:
+    RANDOM_SEED = 42
+
 
 @pytest.fixture
 def synthetic_df():
-    rng = np.random.default_rng(42)
+    rng = np.random.default_rng(RANDOM_SEED)
     n = 500
     tau = rng.normal(1.0, 0.5, n)
     return pd.DataFrame(
@@ -24,7 +30,7 @@ def synthetic_df():
 
 @pytest.fixture
 def rct_df():
-    rng = np.random.default_rng(0)
+    rng = np.random.default_rng(RANDOM_SEED)
     n = 2000
     tau = rng.uniform(0, 2, n)
     w = rng.integers(0, 2, n)
@@ -44,7 +50,7 @@ def test_get_toc_errors_on_nan():
         columns=["w", "y", "pred"],
     )
 
-    with pytest.raises(Exception):
+    with pytest.raises(AssertionError):
         get_toc(df)
 
 
@@ -64,6 +70,12 @@ def test_get_toc_starts_at_zero(synthetic_df):
     assert np.allclose(toc.iloc[0].values, 0.0)
 
 
+def test_get_toc_ends_at_zero(synthetic_df):
+    # TOC(1) = 0 by definition: subset ATE over full population equals overall ATE
+    toc = get_toc(synthetic_df, treatment_effect_col="tau")
+    assert np.allclose(toc.iloc[-1].values, 0.0, atol=1e-10)
+
+
 def test_get_toc_model_cols_present(synthetic_df):
     toc = get_toc(synthetic_df, treatment_effect_col="tau")
     assert "perfect_model" in toc.columns
@@ -75,6 +87,19 @@ def test_get_toc_perfect_model_positive_at_low_q(synthetic_df):
     assert toc.loc[toc.index <= 0.15, "perfect_model"].mean() > 0
 
 
+def test_get_toc_normalize_no_inf_or_nan(synthetic_df):
+    # normalize=True previously divided by TOC(1) which is always zero —
+    # now uses max(|TOC|) to avoid inf/NaN
+    toc = get_toc(synthetic_df, treatment_effect_col="tau", normalize=True)
+    assert not toc.isnull().any().any()
+    assert np.isfinite(toc.values).all()
+
+
+def test_get_toc_normalize_max_abs_is_one(synthetic_df):
+    toc = get_toc(synthetic_df, treatment_effect_col="tau", normalize=True)
+    assert np.allclose(toc.abs().max(), 1.0)
+
+
 def test_get_toc_observed_outcomes(rct_df):
     toc = get_toc(rct_df, outcome_col="y", treatment_col="w")
     assert isinstance(toc, pd.DataFrame)
@@ -107,7 +132,7 @@ def test_rate_score_invalid_weighting_raises(synthetic_df):
 
 
 def test_rate_score_constant_tau_near_zero():
-    rng = np.random.default_rng(99)
+    rng = np.random.default_rng(RANDOM_SEED)
     n = 2000
     df = pd.DataFrame(
         {