From 4f6c186947dc0f4cf1acea7b82753de357bfbe32 Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Sat, 7 Mar 2026 18:51:39 +0530 Subject: [PATCH 1/5] add store_bootstraps to fit() and return_ci to predict() in BaseTLearner --- causalml/inference/meta/tlearner.py | 45 +++++++++++++++++++++++++++-- tests/test_meta_learners.py | 28 ++++++++++++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/causalml/inference/meta/tlearner.py b/causalml/inference/meta/tlearner.py index 04ca796f..4c1030a8 100644 --- a/causalml/inference/meta/tlearner.py +++ b/causalml/inference/meta/tlearner.py @@ -69,7 +69,7 @@ def __repr__(self): ) @ignore_warnings(category=ConvergenceWarning) - def fit(self, X, treatment, y, p=None): + def fit(self, X, treatment, y, p=None, store_bootstraps=False, n_bootstraps=1000, bootstrap_size=10000): """Fit the inference model Args: @@ -94,9 +94,33 @@ def fit(self, X, treatment, y, p=None): self.models_c[group].fit(X_filt[w == 0], y_filt[w == 0]) self.models_t[group].fit(X_filt[w == 1], y_filt[w == 1]) - + if store_bootstraps: + logger.info("Storing bootstrap ensemble ({} iterations)".format(n_bootstraps)) + self.bootstrap_models_ = [] + for i in tqdm(range(n_bootstraps)): + idxs = np.random.choice(np.arange(X.shape[0]), size=bootstrap_size) + X_b, treatment_b, y_b = X[idxs], treatment[idxs], y[idxs] + models_c_b = {group: deepcopy(self.model_c) for group in self.t_groups} + models_t_b = {group: deepcopy(self.model_t) for group in self.t_groups} + for group in self.t_groups: + mask = (treatment_b == group) | (treatment_b == self.control_name) + treatment_filt = treatment_b[mask] + X_filt = X_b[mask] + y_filt = y_b[mask] + w = (treatment_filt == group).astype(int) + if w.sum() == 0 or (w == 0).sum() == 0: + models_c_b[group] = self.models_c[group] + models_t_b[group] = self.models_t[group] + continue + models_c_b[group].fit(X_filt[w == 0], y_filt[w == 0]) + models_t_b[group].fit(X_filt[w == 1], y_filt[w == 1]) + self.bootstrap_models_.append((models_c_b, models_t_b)) + else: + self.bootstrap_models_ = None + def predict( - self, X, treatment=None, y=None, p=None, return_components=False, verbose=True + self, X, treatment=None, y=None, p=None, return_components=False, verbose=True, + return_ci=False, ci_quantile=0.05, ): """Predict treatment effects. @@ -136,6 +160,21 @@ def predict( for i, group in enumerate(self.t_groups): te[:, i] = yhat_ts[group] - yhat_cs[group] + if return_ci: + if not self.bootstrap_models_: + raise ValueError( + "No bootstrap ensemble found. Call fit(..., store_bootstraps=True) first." + ) + te_bootstraps = np.zeros((X.shape[0], self.t_groups.shape[0], len(self.bootstrap_models_))) + for b, (models_c_b, models_t_b) in enumerate(self.bootstrap_models_): + for i, group in enumerate(self.t_groups): + te_bootstraps[:, i, b] = ( + models_t_b[group].predict(X) - models_c_b[group].predict(X) + ) + te_lower = np.percentile(te_bootstraps, ci_quantile / 2 * 100, axis=2) + te_upper = np.percentile(te_bootstraps, (1 - ci_quantile / 2) * 100, axis=2) + return te, te_lower, te_upper + if not return_components: return te else: diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index d5a60216..d7f4e2de 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -1220,3 +1220,31 @@ def test_BaseDRClassifier(generate_classification_data): te_separate = learner_separate.fit_predict(X=X, treatment=treatment, y=y) assert te_separate.shape == te.shape + + +def test_BaseTLearner_predict_return_ci(generate_regression_data): + y, X, treatment, tau, b, e = generate_regression_data() + + learner = BaseTRegressor(learner=LinearRegression(), control_name=0) + + # Test 1: store_bootstraps=True then predict with return_ci=True + learner.fit(X, treatment, y, store_bootstraps=True, n_bootstraps=50, bootstrap_size=500) + tau_pred, lb, ub = learner.predict(X, return_ci=True, ci_quantile=0.05) + + assert tau_pred.shape == (X.shape[0], len(learner.t_groups)) + assert lb.shape == tau_pred.shape + assert ub.shape == tau_pred.shape + assert (lb <= tau_pred).all() and (tau_pred <= ub).all() + + # Test 2: without store_bootstraps, return_ci=True should raise ValueError + learner2 = BaseTRegressor(learner=LinearRegression(), control_name=0) + learner2.fit(X, treatment, y) + try: + learner2.predict(X, return_ci=True) + assert False, "Expected ValueError was not raised" + except ValueError: + pass + + # Test 3: old API unchanged, no return_ci should return plain array + tau_plain = learner.predict(X) + assert tau_plain.shape == (X.shape[0], len(learner.t_groups)) From 39c4f502b4212a84195e5a3d1dee82987f43d0f5 Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Sat, 7 Mar 2026 18:53:35 +0530 Subject: [PATCH 2/5] style: black formatting on tlearner.py --- causalml/inference/meta/tlearner.py | 38 ++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/causalml/inference/meta/tlearner.py b/causalml/inference/meta/tlearner.py index 4c1030a8..8e9d291d 100644 --- a/causalml/inference/meta/tlearner.py +++ b/causalml/inference/meta/tlearner.py @@ -69,7 +69,16 @@ def __repr__(self): ) @ignore_warnings(category=ConvergenceWarning) - def fit(self, X, treatment, y, p=None, store_bootstraps=False, n_bootstraps=1000, bootstrap_size=10000): + def fit( + self, + X, + treatment, + y, + p=None, + store_bootstraps=False, + n_bootstraps=1000, + bootstrap_size=10000, + ): """Fit the inference model Args: @@ -95,7 +104,9 @@ def fit(self, X, treatment, y, p=None, store_bootstraps=False, n_bootstraps=1000 self.models_c[group].fit(X_filt[w == 0], y_filt[w == 0]) self.models_t[group].fit(X_filt[w == 1], y_filt[w == 1]) if store_bootstraps: - logger.info("Storing bootstrap ensemble ({} iterations)".format(n_bootstraps)) + logger.info( + "Storing bootstrap ensemble ({} iterations)".format(n_bootstraps) + ) self.bootstrap_models_ = [] for i in tqdm(range(n_bootstraps)): idxs = np.random.choice(np.arange(X.shape[0]), size=bootstrap_size) @@ -117,10 +128,17 @@ def fit(self, X, treatment, y, p=None, store_bootstraps=False, n_bootstraps=1000 self.bootstrap_models_.append((models_c_b, models_t_b)) else: self.bootstrap_models_ = None - + def predict( - self, X, treatment=None, y=None, p=None, return_components=False, verbose=True, - return_ci=False, ci_quantile=0.05, + self, + X, + treatment=None, + y=None, + p=None, + return_components=False, + verbose=True, + return_ci=False, + ci_quantile=0.05, ): """Predict treatment effects. @@ -165,12 +183,14 @@ def predict( raise ValueError( "No bootstrap ensemble found. Call fit(..., store_bootstraps=True) first." ) - te_bootstraps = np.zeros((X.shape[0], self.t_groups.shape[0], len(self.bootstrap_models_))) + te_bootstraps = np.zeros( + (X.shape[0], self.t_groups.shape[0], len(self.bootstrap_models_)) + ) for b, (models_c_b, models_t_b) in enumerate(self.bootstrap_models_): for i, group in enumerate(self.t_groups): - te_bootstraps[:, i, b] = ( - models_t_b[group].predict(X) - models_c_b[group].predict(X) - ) + te_bootstraps[:, i, b] = models_t_b[group].predict(X) - models_c_b[ + group + ].predict(X) te_lower = np.percentile(te_bootstraps, ci_quantile / 2 * 100, axis=2) te_upper = np.percentile(te_bootstraps, (1 - ci_quantile / 2) * 100, axis=2) return te, te_lower, te_upper From c78d9a1730e15d2211ff272ac46e4af5dd7e3a6d Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Tue, 10 Mar 2026 20:20:25 +0530 Subject: [PATCH 3/5] formatting on test_meta_learners.py --- tests/test_meta_learners.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index d7f4e2de..21da3824 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -1228,7 +1228,9 @@ def test_BaseTLearner_predict_return_ci(generate_regression_data): learner = BaseTRegressor(learner=LinearRegression(), control_name=0) # Test 1: store_bootstraps=True then predict with return_ci=True - learner.fit(X, treatment, y, store_bootstraps=True, n_bootstraps=50, bootstrap_size=500) + learner.fit( + X, treatment, y, store_bootstraps=True, n_bootstraps=50, bootstrap_size=500 + ) tau_pred, lb, ub = learner.predict(X, return_ci=True, ci_quantile=0.05) assert tau_pred.shape == (X.shape[0], len(learner.t_groups)) From cc000d0d61e87b92e7afc0ffff2ff1644f5d560b Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Sat, 14 Mar 2026 19:05:36 +0530 Subject: [PATCH 4/5] address review feedback --- causalml/inference/meta/tlearner.py | 36 ++++++++++++++++++----- tests/test_meta_learners.py | 45 ++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/causalml/inference/meta/tlearner.py b/causalml/inference/meta/tlearner.py index 8e9d291d..b8cd742e 100644 --- a/causalml/inference/meta/tlearner.py +++ b/causalml/inference/meta/tlearner.py @@ -62,6 +62,7 @@ def __init__( self.ate_alpha = ate_alpha self.control_name = control_name + self.bootstrap_models_ = None def __repr__(self): return "{}(model_c={}, model_t={})".format( @@ -78,6 +79,7 @@ def fit( store_bootstraps=False, n_bootstraps=1000, bootstrap_size=10000, + random_state=None, ): """Fit the inference model @@ -85,6 +87,13 @@ def fit( X (np.matrix or np.array or pd.Dataframe): a feature matrix treatment (np.array or pd.Series): a treatment vector y (np.array or pd.Series): an outcome vector + p: unused, kept for API consistency + store_bootstraps (bool, optional): if True, trains a bootstrap ensemble + during fit and stores it in self.bootstrap_models_ for post-fit CI + estimation via predict(return_ci=True). Default: False. + n_bootstraps (int, optional): number of bootstrap iterations. Default: 1000. + bootstrap_size (int, optional): number of samples per bootstrap. Default: 10000. + random_state (int, optional): random seed for reproducible bootstrap sampling. """ X, treatment, y = convert_pd_to_np(X, treatment, y) check_treatment_vector(treatment, self.control_name) @@ -103,13 +112,15 @@ def fit( self.models_c[group].fit(X_filt[w == 0], y_filt[w == 0]) self.models_t[group].fit(X_filt[w == 1], y_filt[w == 1]) + if store_bootstraps: + rng = np.random.RandomState(random_state) logger.info( "Storing bootstrap ensemble ({} iterations)".format(n_bootstraps) ) self.bootstrap_models_ = [] for i in tqdm(range(n_bootstraps)): - idxs = np.random.choice(np.arange(X.shape[0]), size=bootstrap_size) + idxs = rng.choice(np.arange(X.shape[0]), size=bootstrap_size) X_b, treatment_b, y_b = X[idxs], treatment[idxs], y[idxs] models_c_b = {group: deepcopy(self.model_c) for group in self.t_groups} models_t_b = {group: deepcopy(self.model_t) for group in self.t_groups} @@ -138,7 +149,6 @@ def predict( return_components=False, verbose=True, return_ci=False, - ci_quantile=0.05, ): """Predict treatment effects. @@ -146,11 +156,21 @@ def predict( X (np.matrix or np.array or pd.Dataframe): a feature matrix treatment (np.array or pd.Series, optional): a treatment vector y (np.array or pd.Series, optional): an outcome vector - return_components (bool, optional): whether to return outcome for treatment and control seperately + return_components (bool, optional): whether to return outcome for + treatment and control separately verbose (bool, optional): whether to output progress logs + return_ci (bool, optional): whether to return confidence intervals + using the stored bootstrap ensemble. Requires fit() to have been + called with store_bootstraps=True. CI width is controlled by + self.ate_alpha set at init time. Returns: - (numpy.ndarray): Predictions of treatment effects. + (numpy.ndarray): Predictions of treatment effects. If return_ci=True, + returns (te, te_lower, te_upper) each of shape [n_samples, n_treatment]. + return_ci=True and return_components=True cannot be used together. """ + if return_ci and return_components: + raise ValueError("return_ci and return_components cannot both be True.") + X, treatment, y = convert_pd_to_np(X, treatment, y) yhat_cs = {} yhat_ts = {} @@ -179,7 +199,7 @@ def predict( te[:, i] = yhat_ts[group] - yhat_cs[group] if return_ci: - if not self.bootstrap_models_: + if self.bootstrap_models_ is None: raise ValueError( "No bootstrap ensemble found. Call fit(..., store_bootstraps=True) first." ) @@ -191,8 +211,10 @@ def predict( te_bootstraps[:, i, b] = models_t_b[group].predict(X) - models_c_b[ group ].predict(X) - te_lower = np.percentile(te_bootstraps, ci_quantile / 2 * 100, axis=2) - te_upper = np.percentile(te_bootstraps, (1 - ci_quantile / 2) * 100, axis=2) + te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2) + te_upper = np.percentile( + te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2 + ) return te, te_lower, te_upper if not return_components: diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index 21da3824..491d1df3 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import pytest from sklearn.linear_model import LinearRegression from sklearn.linear_model import LogisticRegression @@ -1225,28 +1226,50 @@ def test_BaseDRClassifier(generate_classification_data): def test_BaseTLearner_predict_return_ci(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() - learner = BaseTRegressor(learner=LinearRegression(), control_name=0) + learner = BaseTRegressor(learner=LinearRegression(), control_name=CONTROL_NAME) # Test 1: store_bootstraps=True then predict with return_ci=True learner.fit( - X, treatment, y, store_bootstraps=True, n_bootstraps=50, bootstrap_size=500 + X, + treatment, + y, + store_bootstraps=True, + n_bootstraps=50, + bootstrap_size=500, + random_state=RANDOM_SEED, ) - tau_pred, lb, ub = learner.predict(X, return_ci=True, ci_quantile=0.05) + tau_pred, lb, ub = learner.predict(X, return_ci=True) assert tau_pred.shape == (X.shape[0], len(learner.t_groups)) assert lb.shape == tau_pred.shape assert ub.shape == tau_pred.shape - assert (lb <= tau_pred).all() and (tau_pred <= ub).all() + assert (lb <= ub).all() - # Test 2: without store_bootstraps, return_ci=True should raise ValueError - learner2 = BaseTRegressor(learner=LinearRegression(), control_name=0) + # Test 2: ValueError without store_bootstraps + learner2 = BaseTRegressor(learner=LinearRegression(), control_name=CONTROL_NAME) learner2.fit(X, treatment, y) - try: + with pytest.raises(ValueError): learner2.predict(X, return_ci=True) - assert False, "Expected ValueError was not raised" - except ValueError: - pass - # Test 3: old API unchanged, no return_ci should return plain array + # Test 3: ValueError when return_ci and return_components both True + with pytest.raises(ValueError): + learner.predict(X, return_ci=True, return_components=True) + + # Test 4: old API unchanged tau_plain = learner.predict(X) assert tau_plain.shape == (X.shape[0], len(learner.t_groups)) + + # Test 5: reproducibility via random_state + learner3 = BaseTRegressor(learner=LinearRegression(), control_name=CONTROL_NAME) + learner3.fit( + X, + treatment, + y, + store_bootstraps=True, + n_bootstraps=50, + bootstrap_size=500, + random_state=RANDOM_SEED, + ) + tau2, lb2, ub2 = learner3.predict(X, return_ci=True) + np.testing.assert_array_equal(lb, lb2) + np.testing.assert_array_equal(ub, ub2) From f258cdb22d10482d2db57eb08103a0afb716738e Mon Sep 17 00:00:00 2001 From: Aman Srivastava Date: Sat, 21 Mar 2026 03:28:26 +0530 Subject: [PATCH 5/5] use control_name=0 to match generate_regression_data --- tests/test_meta_learners.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index 491d1df3..79546981 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -1226,7 +1226,7 @@ def test_BaseDRClassifier(generate_classification_data): def test_BaseTLearner_predict_return_ci(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() - learner = BaseTRegressor(learner=LinearRegression(), control_name=CONTROL_NAME) + learner = BaseTRegressor(learner=LinearRegression(), control_name=0) # Test 1: store_bootstraps=True then predict with return_ci=True learner.fit( @@ -1246,7 +1246,7 @@ def test_BaseTLearner_predict_return_ci(generate_regression_data): assert (lb <= ub).all() # Test 2: ValueError without store_bootstraps - learner2 = BaseTRegressor(learner=LinearRegression(), control_name=CONTROL_NAME) + learner2 = BaseTRegressor(learner=LinearRegression(), control_name=0) learner2.fit(X, treatment, y) with pytest.raises(ValueError): learner2.predict(X, return_ci=True) @@ -1260,7 +1260,7 @@ def test_BaseTLearner_predict_return_ci(generate_regression_data): assert tau_plain.shape == (X.shape[0], len(learner.t_groups)) # Test 5: reproducibility via random_state - learner3 = BaseTRegressor(learner=LinearRegression(), control_name=CONTROL_NAME) + learner3 = BaseTRegressor(learner=LinearRegression(), control_name=0) learner3.fit( X, treatment,