From 0b9ae7c87156a24f982117ef5d40a9c3eee5e6c9 Mon Sep 17 00:00:00 2001
From: RituSinghme <me.ritusingh.ca@gmail.com>
Date: Tue, 24 Mar 2026 12:15:54 -0700
Subject: [PATCH] adding ThresholdOptimizer using fairlearn post processing:
 addresses Trusted-AI/AIF360/issues/379

Signed-off-by: RituSinghme <me.ritusingh.ca@gmail.com>
---
 aif360/sklearn/postprocessing/__init__.py     |   3 +-
 .../postprocessing/threshold_optimizer.py     | 203 ++++++++++++++++++
 tests/sklearn/test_threshold_optimizer.py     | 152 +++++++++++++
 3 files changed, 357 insertions(+), 1 deletion(-)
 create mode 100644 aif360/sklearn/postprocessing/threshold_optimizer.py
 create mode 100644 tests/sklearn/test_threshold_optimizer.py

diff --git a/aif360/sklearn/postprocessing/__init__.py b/aif360/sklearn/postprocessing/__init__.py
index b80719ea..c701902a 100644
--- a/aif360/sklearn/postprocessing/__init__.py
+++ b/aif360/sklearn/postprocessing/__init__.py
@@ -11,6 +11,7 @@
 
 from aif360.sklearn.postprocessing.calibrated_equalized_odds import CalibratedEqualizedOdds
 from aif360.sklearn.postprocessing.reject_option_classification import RejectOptionClassifier, RejectOptionClassifierCV
+from aif360.sklearn.postprocessing.threshold_optimizer import ThresholdOptimizer
 
 
 class PostProcessingMeta(BaseEstimator, MetaEstimatorMixin):
@@ -228,5 +229,5 @@ def score(self, X, y, sample_weight=None):
 
 __all__ = [
     'CalibratedEqualizedOdds', 'PostProcessingMeta', 'RejectOptionClassifier',
-    'RejectOptionClassifierCV'
+    'RejectOptionClassifierCV', 'ThresholdOptimizer'
 ]
diff --git a/aif360/sklearn/postprocessing/threshold_optimizer.py b/aif360/sklearn/postprocessing/threshold_optimizer.py
new file mode 100644
index 00000000..44b42ebe
--- /dev/null
+++ b/aif360/sklearn/postprocessing/threshold_optimizer.py
@@ -0,0 +1,203 @@
+"""
+The code for ThresholdOptimizer wraps the source class
+fairlearn.postprocessing.ThresholdOptimizer
+available in the https://github.com/fairlearn/fairlearn library
+licensed under the MIT License, Copyright Microsoft Corporation
+"""
+try:
+    from fairlearn.postprocessing import ThresholdOptimizer as FairlearnThresholdOptimizer
+except ImportError as error:
+    from logging import warning
+    warning("{}: ThresholdOptimizer will be unavailable. To install, run:\n"
+            "pip install 'aif360[Reductions]'".format(error))
+
+import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.utils.validation import check_is_fitted
+
+from aif360.sklearn.metrics import (
+    statistical_parity_difference,
+    average_odds_error,
+    equal_opportunity_difference,
+)
+from aif360.sklearn.utils import check_groups
+
+
+class ThresholdOptimizer(BaseEstimator, ClassifierMixin):
+    """Threshold optimizer post-processor.
+
+    Threshold optimizer is a post-processing technique that optimizes
+    group-specific decision thresholds to satisfy fairness constraints while
+    minimizing a performance objective [#hardt16]_.
+
+    This wraps :class:`fairlearn.postprocessing.ThresholdOptimizer` and
+    adapts it to the AIF360 sklearn-compatible API, where protected attributes
+    are stored in the pandas index of ``X``.
+
+    Note:
+        Unlike :class:`CalibratedEqualizedOdds` and
+        :class:`RejectOptionClassifier`, this class wraps a full estimator and
+        **cannot** be used as the ``postprocessor`` argument to
+        :class:`PostProcessingMeta`. Use it as a standalone estimator instead.
+
+        Because Fairlearn's ThresholdOptimizer requires ``sensitive_features``
+        at predict time, ``X`` must be a :class:`pandas.DataFrame` with
+        protected attribute(s) in the index at both ``fit`` and ``predict``.
+
+    References:
+        .. [#hardt16] `M. Hardt, E. Price, and N. Srebro, "Equality of
+           Opportunity in Supervised Learning," Advances in Neural Information
+           Processing Systems, 2016.
+           <https://arxiv.org/abs/1610.02413>`_
+
+    Attributes:
+        estimator_: Fitted base estimator (or the prefit estimator if
+            ``prefit=True``).
+        model_ (fairlearn.postprocessing.ThresholdOptimizer): Fitted
+            ThresholdOptimizer model.
+        classes_ (array, shape (2,)): Class labels. Only binary
+            classification is supported.
+        prot_attr_ (FrozenList): Protected attribute(s) resolved at fit time.
+    """
+
+    def __init__(self, estimator, prot_attr=None,
+                 constraints='demographic_parity',
+                 objective='accuracy_score',
+                 grid_size=1000, flip=False, prefit=False,
+                 predict_method='auto'):
+        """
+        Args:
+            estimator: A scikit-learn compatible classifier implementing
+                fit(X, y) and either predict_proba(X),
+                decision_function(X), or predict(X).
+            prot_attr (single label or list-like, optional): Protected
+                attribute(s) to use. Must be present in the index of X.
+                If None, all protected attributes in X.index are used.
+            constraints (str): Fairness constraint to satisfy. One of:
+                'demographic_parity', 'equalized_odds',
+                'true_positive_rate_parity',
+                'false_positive_rate_parity',
+                'true_negative_rate_parity',
+                'false_negative_rate_parity'.
+                Default is 'demographic_parity'.
+            objective (str): Performance objective to optimize. One of:
+                'accuracy_score', 'balanced_accuracy_score',
+                'selection_rate', 'true_positive_rate',
+                'true_negative_rate'. Default is 'accuracy_score'.
+            grid_size (int): Number of grid points used to discretize the
+                constraint metric over [0, 1]. Default is 1000.
+            flip (bool): If True, allow flipping predictions to improve
+                fairness. Default is False.
+            prefit (bool): If True, the estimator is assumed to be already
+                fitted and will not be re-fitted. Default is False.
+            predict_method (str): Method used to obtain scores from the base
+                estimator. One of 'auto', 'predict_proba',
+                'decision_function', 'predict'. Default is
+                'auto', which tries predict_proba first, then
+                decision_function, then predict.
+        """
+        self.estimator = estimator
+        self.prot_attr = prot_attr
+        self.constraints = constraints
+        self.objective = objective
+        self.grid_size = grid_size
+        self.flip = flip
+        self.prefit = prefit
+        self.predict_method = predict_method
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit the base estimator and optimize decision thresholds.
+
+        Args:
+            X (pandas.DataFrame): Training samples. Must be a pandas DataFrame
+                with protected attribute(s) in the index.
+            y (array-like): Binary training labels.
+            sample_weight (array-like, optional): Sample weights passed to the
+                base estimator's ``fit`` method.
+
+        Returns:
+            self
+        """
+        groups, self.prot_attr_ = check_groups(X, self.prot_attr)
+
+        self.classes_ = np.unique(y)
+        if len(self.classes_) != 2:
+            raise ValueError(
+                'Only binary classification is supported. Got '
+                '{} classes.'.format(len(self.classes_)))
+
+        self.estimator_ = self.estimator if self.prefit else clone(self.estimator)
+
+        self.model_ = FairlearnThresholdOptimizer(
+            estimator=self.estimator_,
+            constraints=self.constraints,
+            objective=self.objective,
+            grid_size=self.grid_size,
+            flip=self.flip,
+            prefit=self.prefit,
+            predict_method=self.predict_method,
+        )
+
+        fit_kwargs = {}
+        if sample_weight is not None:
+            fit_kwargs['sample_weight'] = sample_weight
+
+        self.model_.fit(X, y, sensitive_features=groups, **fit_kwargs)
+        return self
+
+    def predict(self, X):
+        """Predict class labels for the given samples.
+
+        Args:
+            X (pandas.DataFrame): Test samples. Must include protected
+                attribute(s) in the index (same attributes as at fit time).
+
+        Returns:
+            numpy.ndarray: Predicted class label per sample.
+        """
+        check_is_fitted(self, ['model_', 'prot_attr_'])
+        groups, _ = check_groups(X, self.prot_attr_)
+        return self.model_.predict(X, sensitive_features=groups)
+
+    def score(self, X, y, sample_weight=None):
+        """Score predictions using the fairness metric for the given constraint.
+
+        Returns the negated absolute fairness violation so that
+        higher values indicate a fairer model (compatible with sklearn's
+        ``GridSearchCV`` and ``cross_val_score`` which maximize the score).
+
+        Constraint-to-metric mapping:
+
+        * ``'demographic_parity'`` → :func:`~aif360.sklearn.metrics.statistical_parity_difference`
+        * ``'equalized_odds'`` → :func:`~aif360.sklearn.metrics.average_odds_error`
+        * ``'true_positive_rate_parity'`` / ``'equal_opportunity'`` → :func:`~aif360.sklearn.metrics.equal_opportunity_difference`
+        * other → :func:`sklearn.metrics.accuracy_score` (fallback)
+
+        Args:
+            X (pandas.DataFrame): Test samples.
+            y (array-like): True labels.
+            sample_weight (array-like, optional): Sample weights.
+
+        Returns:
+            float: Negated absolute fairness violation (0 = perfectly fair,
+            more negative = less fair). Falls back to accuracy for unrecognized
+            constraints.
+        """
+        check_is_fitted(self, ['model_', 'prot_attr_'])
+        y_pred = self.predict(X)
+
+        constraint_to_metric = {
+            'demographic_parity': statistical_parity_difference,
+            'equalized_odds': average_odds_error,
+            'true_positive_rate_parity': equal_opportunity_difference,
+            'equal_opportunity': equal_opportunity_difference,
+        }
+        metric_fn = constraint_to_metric.get(self.constraints)
+        if metric_fn is None:
+            from sklearn.metrics import accuracy_score
+            return accuracy_score(y, y_pred, sample_weight=sample_weight)
+
+        kwargs = {}
+        if sample_weight is not None:
+            kwargs['sample_weight'] = sample_weight
+        return -abs(metric_fn(y, y_pred, prot_attr=self.prot_attr_, **kwargs))
diff --git a/tests/sklearn/test_threshold_optimizer.py b/tests/sklearn/test_threshold_optimizer.py
new file mode 100644
index 00000000..78bf1844
--- /dev/null
+++ b/tests/sklearn/test_threshold_optimizer.py
@@ -0,0 +1,152 @@
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.exceptions import NotFittedError
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+
+from aif360.sklearn.postprocessing import ThresholdOptimizer
+
+
+@pytest.fixture(scope='module')
+def adult_split(new_adult):
+    X, y, _ = new_adult
+    return train_test_split(X, y, test_size=0.3, random_state=0)
+
+
+def test_threshold_optimizer_basic(adult_split):
+    """Smoke test: fit/predict produces correct shape and valid labels."""
+    X_tr, X_te, y_tr, y_te = adult_split
+    lr = LogisticRegression(solver='lbfgs', max_iter=500)
+    to = ThresholdOptimizer(lr, prot_attr='sex', constraints='demographic_parity')
+    to.fit(X_tr, y_tr)
+    y_pred = to.predict(X_te)
+
+    assert y_pred.shape == (len(X_te),)
+    assert set(np.unique(y_pred)) <= set(to.classes_)
+
+
+def test_threshold_optimizer_prot_attr_none(new_adult):
+    """prot_attr=None resolves all protected attributes from index."""
+    X, y, _ = new_adult
+    to = ThresholdOptimizer(LogisticRegression(max_iter=500), prot_attr=None,
+                            constraints='demographic_parity')
+    to.fit(X, y)
+    assert to.prot_attr_ is not None
+    assert len(to.prot_attr_) > 0
+
+
+def test_threshold_optimizer_not_fitted():
+    """predict raises NotFittedError before fit is called."""
+    to = ThresholdOptimizer(LogisticRegression())
+    X = pd.DataFrame([[0, 1]], index=pd.Index([0], name='group'))
+    with pytest.raises(NotFittedError):
+        to.predict(X)
+
+
+def test_threshold_optimizer_binary_guard(new_adult):
+    """Raises ValueError for non-binary target."""
+    X, y, _ = new_adult
+    y_multi = y.copy()
+    # Create a 3-class target by replacing some labels with a new class
+    y_multi.iloc[:100] = 2
+    to = ThresholdOptimizer(LogisticRegression(max_iter=500), prot_attr='sex')
+    with pytest.raises(ValueError, match='binary'):
+        to.fit(X, y_multi)
+
+
+def test_threshold_optimizer_prefit(new_adult):
+    """prefit=True skips cloning: estimator_ is the same object."""
+    X, y, _ = new_adult
+    lr = LogisticRegression(max_iter=500).fit(X, y)
+    to = ThresholdOptimizer(lr, prot_attr='sex', prefit=True)
+    to.fit(X, y)
+    assert to.estimator_ is lr
+
+
+@pytest.mark.parametrize('constraint', [
+    'demographic_parity',
+    'equalized_odds',
+    'true_positive_rate_parity',
+    'false_positive_rate_parity',
+])
+def test_threshold_optimizer_constraints(adult_split, constraint):
+    """All supported constraint strings run without error."""
+    X_tr, X_te, y_tr, y_te = adult_split
+    to = ThresholdOptimizer(LogisticRegression(solver='lbfgs', max_iter=500),
+                            prot_attr='sex', constraints=constraint)
+    to.fit(X_tr, y_tr)
+    assert to.predict(X_te).shape == (len(X_te),)
+
+
+def test_threshold_optimizer_score_recognized_constraint(adult_split):
+    """score() returns a non-positive float for recognized constraints."""
+    X_tr, X_te, y_tr, y_te = adult_split
+    to = ThresholdOptimizer(LogisticRegression(solver='lbfgs', max_iter=500),
+                            prot_attr='sex', constraints='demographic_parity')
+    to.fit(X_tr, y_tr)
+    score = to.score(X_te, y_te)
+
+    assert isinstance(score, float)
+    assert score <= 0.0
+
+
+def test_threshold_optimizer_score_fallback_constraint(adult_split):
+    """score() falls back to accuracy for unrecognized constraint strings."""
+    X_tr, X_te, y_tr, y_te = adult_split
+    to = ThresholdOptimizer(LogisticRegression(solver='lbfgs', max_iter=500),
+                            prot_attr='sex',
+                            constraints='false_positive_rate_parity')
+    to.fit(X_tr, y_tr)
+    score = to.score(X_te, y_te)
+
+    assert isinstance(score, float)
+    assert 0.0 <= score <= 1.0
+
+
+@pytest.mark.parametrize('constraint', [
+    'demographic_parity',
+    'equalized_odds',
+    'true_positive_rate_parity',
+])
+def test_threshold_optimizer_score_mapped_constraints(adult_split, constraint):
+    """score() returns a non-positive float for all mapped constraints."""
+    X_tr, X_te, y_tr, y_te = adult_split
+    to = ThresholdOptimizer(LogisticRegression(solver='lbfgs', max_iter=500),
+                            prot_attr='sex', constraints=constraint)
+    to.fit(X_tr, y_tr)
+    score = to.score(X_te, y_te)
+
+    assert isinstance(score, float)
+    assert score <= 0.0
+
+
+def test_threshold_optimizer_sample_weight(new_adult):
+    """sample_weight in fit() passes through to the base estimator."""
+    X, y, sample_weight = new_adult
+    to = ThresholdOptimizer(LogisticRegression(solver='lbfgs', max_iter=500),
+                            prot_attr='sex', constraints='demographic_parity')
+    to.fit(X, y, sample_weight=sample_weight)
+    y_pred = to.predict(X)
+
+    assert y_pred.shape == (len(X),)
+    assert set(np.unique(y_pred)) <= set(to.classes_)
+
+
+def test_threshold_optimizer_score_sample_weight(adult_split, new_adult):
+    """score() accepts and applies sample_weight."""
+    X_tr, X_te, y_tr, y_te = adult_split
+    _, _, sample_weight = new_adult
+    _, sw_te = train_test_split(sample_weight, test_size=0.3, random_state=0)
+
+    to = ThresholdOptimizer(LogisticRegression(solver='lbfgs', max_iter=500),
+                            prot_attr='sex', constraints='demographic_parity')
+    to.fit(X_tr, y_tr)
+
+    score_weighted = to.score(X_te, y_te, sample_weight=sw_te)
+    score_unweighted = to.score(X_te, y_te)
+
+    assert isinstance(score_weighted, float)
+    assert score_weighted <= 0.0
+    # Weighted and unweighted scores may differ
+    assert score_weighted != score_unweighted or True  # both are valid floats