Source code for mitigate_disparity

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import pickle
import fomo_estimator

[docs]def mitigate_disparity(
    dataset: str,
    protected_features: list[str],
    starting_point: str|None = None,
    save_file: str = 'estimator.pkl'
):
    """
    “mitigate_disparity.py” takes in a model development dataset (training and test datasets) that your algorithm has not seen before and generates a new, optimally fair/debiased model that can be used to make new predictions.

    Parameters
    ----------
    dataset: str
        A csv file storing a dataframe with one row per individual. 
        Columns should include:
        1. `binary outcome`: Binary outcome (i.e. 0 or 1, where 1 indicates the 
        favorable outcome for the individual being scored)
        2. `sample weights`: Sample weights. These are ignored. 
        3. All additional columns are treated as features/predictors.
    protected_features: list[str]
        The columns of the dataset over which we wish to control for fairness.
    starting_point : str | None
        Optionally start from a checkpoint file with this name.
    save_file: str, default: estimator.pkl
        The name of the saved estimator. 

    Returns
    -------
    estimator.pkl: file containing sklearn-style Estimator
        Saves a fair/debiased model object, taking the form of a sklearn-style python object.

    """

    print('dataset:',dataset)
    print('protected_features:',protected_features)

    df = pd.read_csv(dataset, index_col=False)
    X = df.drop(columns=['binary outcome', 'sample weights'],
                axis=1,
                errors='ignore'
                )
    y = df['binary outcome']
    est = fomo_estimator.est

    est.fit(
        X,
        y,
        protected_features=list(protected_features), 
        termination=fomo_estimator.termination,
        starting_point=starting_point,
        save_history=True,
        checkpoint=True
    )
    print('saving estimator to',save_file,'...')
    with open(save_file, 'wb') as of:
        pickle.dump(est, of)
    print('done.')

import fire    
if __name__ == '__main__':
  fire.Fire(mitigate_disparity)