Source code for mitigate_disparity

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import pickle
import fomo_estimator

[docs]def mitigate_disparity( dataset: str, protected_features: list[str], starting_point: str|None = None, save_file: str = 'estimator.pkl' ): """ “mitigate_disparity.py” takes in a model development dataset (training and test datasets) that your algorithm has not seen before and generates a new, optimally fair/debiased model that can be used to make new predictions. Parameters ---------- dataset: str A csv file storing a dataframe with one row per individual. Columns should include: 1. `binary outcome`: Binary outcome (i.e. 0 or 1, where 1 indicates the favorable outcome for the individual being scored) 2. `sample weights`: Sample weights. These are ignored. 3. All additional columns are treated as features/predictors. protected_features: list[str] The columns of the dataset over which we wish to control for fairness. starting_point : str | None Optionally start from a checkpoint file with this name. save_file: str, default: estimator.pkl The name of the saved estimator. Returns ------- estimator.pkl: file containing sklearn-style Estimator Saves a fair/debiased model object, taking the form of a sklearn-style python object. """ print('dataset:',dataset) print('protected_features:',protected_features) df = pd.read_csv(dataset, index_col=False) X = df.drop(columns=['binary outcome', 'sample weights'], axis=1, errors='ignore' ) y = df['binary outcome'] est = fomo_estimator.est est.fit( X, y, protected_features=list(protected_features), termination=fomo_estimator.termination, starting_point=starting_point, save_history=True, checkpoint=True ) print('saving estimator to',save_file,'...') with open(save_file, 'wb') as of: pickle.dump(est, of) print('done.')
import fire if __name__ == '__main__': fire.Fire(mitigate_disparity)