Feat C++ API
A feature engineering automation tool
evaluation.cc
Go to the documentation of this file.
1 /* FEAT
2 copyright 2017 William La Cava
3 license: GNU/GPL v3
4 */
5 
6 #include "evaluation.h"
7 
8 // code to evaluate GP programs.
9 namespace FT{
10 
11  using namespace Opt;
12 
13  namespace Eval{
14 
15  Evaluation::Evaluation(string scorer): S(scorer)
16  {
17  this->S.set_scorer(scorer);
18  }
19 
21 
22  void Evaluation::validation(vector<Individual>& individuals,
23  const Data& d,
24  const Parameters& params,
25  bool offspring
26  )
27  {
28  unsigned start =0;
29  if (offspring)
30  start = individuals.size()/2;
31 
32  // loop through individuals
33  /* #pragma omp parallel for */
34  for (unsigned i = start; i<individuals.size(); ++i)
35  {
36  Individual& ind = individuals.at(i);
37 
38  // if there is no validation data,
39  // set fitness_v to fitness and return
40  if (d.X.cols() == 0)
41  {
42  ind.fitness_v = ind.fitness;
43  continue;
44  }
45 
46  bool pass = true;
47 
48  logger.log("Validating ind " + to_string(i)
49  + ", id: " + to_string(ind.id), 3);
50 
51  shared_ptr<CLabels> yhat = ind.predict(d);
52  // assign aggregate fitness
53  logger.log("Assigning fitness to ind " + to_string(i)
54  + ", eqn: " + ind.get_eqn(), 3);
55 
56  if (!pass)
57  {
58 
59  ind.fitness_v = MAX_FLT;
60  }
61  else
62  {
63  // assign fitness to individual
64  VectorXf loss;
65  ind.fitness_v = this->S.score(d.y, yhat, loss,
66  params.class_weights);
67  }
68  }
69  }
70  // fitness of population
71  void Evaluation::fitness(vector<Individual>& individuals,
72  const Data& d,
73  const Parameters& params,
74  bool offspring)
75  {
87  unsigned start =0;
88  if (offspring) start = individuals.size()/2;
89 
90  /* for (unsigned i = start; i<individuals.size(); ++i) */
91  /* { */
92  /* cout << "ind " << i << " size: " */
93  /* << individuals.at(i).size() << endl; */
94  /* /1* cout << "ind " << i << " eqn: " *1/ */
95  /* /1* << individuals.at(i).get_eqn() << endl; *1/ */
96  /* /1* cout << "ind " << i << " program str: " *1/ */
97  /* /1* << individuals.at(i).program_str() << endl; *1/ */
98  /* } */
99 
100  // loop through individuals
101  #pragma omp parallel for
102  for (unsigned i = start; i<individuals.size(); ++i)
103  {
104  Individual& ind = individuals.at(i);
105 
106  if (params.backprop)
107  {
108  #pragma omp critical
109  {
110  AutoBackProp backprop(params.scorer_, params.bp.iters,
111  params.bp.learning_rate);
112  logger.log("Running backprop on " + ind.get_eqn(), 3);
113  backprop.run(ind, d, params);
114  }
115  }
116  bool pass = true;
117 
118  logger.log("Running ind " + to_string(i)
119  + ", id: " + to_string(ind.id), 3);
120 
121  shared_ptr<CLabels> yhat = ind.fit(d,params,pass);
122  // assign F and aggregate fitness
123  logger.log("Assigning fitness to ind " + to_string(i)
124  + ", eqn: " + ind.get_eqn(), 3);
125 
126  if (!pass)
127  {
128 
129  ind.fitness = MAX_FLT;
130  ind.error = MAX_FLT*VectorXf::Ones(d.y.size());
131  }
132  else
133  {
134  // assign weights to individual
135  assign_fit(ind,yhat,d,params,false);
136 
137 
138  if (params.hillclimb)
139  {
140  HillClimb hc(params.scorer_, params.hc.iters,
141  params.hc.step);
142  bool updated = false;
143  shared_ptr<CLabels> yhat2 = hc.run(ind, d, params,
144  updated);
145  // update the fitness of this individual
146  if (updated)
147  {
148  assign_fit(ind, yhat2, d, params);
149  }
150 
151  }
152  }
153  }
154  }
155 
156  // assign fitness to program
158  const shared_ptr<CLabels>& yhat, const Data& d,
159  const Parameters& params, bool val)
160  {
175  VectorXf loss;
176  float f = S.score(d.y, yhat, loss, params.class_weights);
177  //TODO: add if condition for this
178  float fairness = marginal_fairness(loss, d, f);
179 
180  if (fairness <0 )
181  {
182  cout << "fairness is " << fairness << "...\n";
183  }
184  if (val)
185  {
186  ind.fitness_v = f;
187  ind.fairness_v = fairness;
188  }
189  else
190  {
191  ind.fitness = f;
192  ind.fairness = fairness;
193  ind.error = loss;
194  }
195 
196  logger.log("ind " + std::to_string(ind.id) + " fitness: "
197  + std::to_string(ind.fitness),3);
198  }
199 
200  float Evaluation::marginal_fairness(VectorXf& loss, const Data& d,
201  float base_score, bool use_alpha)
202  {
203  // averages the deviation of the loss function from average loss
204  // over k
205  float avg_score = 0;
206  float count = 0;
207  float alpha = 1;
208 
209  ArrayXb x_idx;
210 
211  for (const auto& pl : d.protect_levels)
212  {
213  for (const auto& lvl : pl.second)
214  {
215  x_idx = (d.X.row(pl.first).array() == lvl);
216  float len_g = x_idx.count();
217  if (use_alpha)
218  alpha = len_g/d.X.cols();
219  /* cout << "alpha = " << len_g << "/"
220  * << d.X.cols() << endl; */
221  float Beta = fabs(base_score -
222  x_idx.select(loss,0).sum()/len_g);
223  /* cout << "Beta = |" << base_score << " - " */
224  /* << x_idx.select(loss,0).sum() << "/" */
225  /* << len_g << "|" << endl; */
226  avg_score += alpha * Beta;
227  ++count;
228  }
229 
230  }
231  avg_score /= count;
232  if (std::isinf(avg_score)
233  || std::isnan(avg_score)
234  || avg_score < 0)
235  return MAX_FLT;
236 
237  return avg_score;
238 
239  }
240  }
241 }
data holding X, y, and Z data
Definition: data.h:42
VectorXf & y
Definition: data.h:46
map< int, vector< float > > protect_levels
Definition: data.h:61
MatrixXf & X
Definition: data.h:45
void assign_fit(Individual &ind, const shared_ptr< CLabels > &yhat, const Data &d, const Parameters &params, bool val=false)
assign fitness to an individual.
Definition: evaluation.cc:157
Evaluation(string scorer="")
Definition: evaluation.cc:15
float marginal_fairness(VectorXf &loss, const Data &d, float base_score, bool use_alpha=false)
Definition: evaluation.cc:200
void validation(vector< Individual > &individuals, const Data &d, const Parameters &params, bool offspring=false)
validation of population.
Definition: evaluation.cc:22
void fitness(vector< Individual > &individuals, const Data &d, const Parameters &params, bool offspring=false)
fitness of population.
Definition: evaluation.cc:71
float score(const VectorXf &y_true, const shared_ptr< CLabels > &yhat, VectorXf &loss, const vector< float > &w)
Definition: scorer.cc:41
void set_scorer(string scorer)
Definition: scorer.cc:36
void run(Individual &ind, const Data &d, const Parameters &params)
adapt weights
shared_ptr< CLabels > run(Individual &ind, Data d, const Parameters &params, bool &updated)
adapt weights
Definition: hillclimb.cc:27
individual programs in the population
Definition: individual.h:31
float fairness
aggregate fairness score
Definition: individual.h:40
string get_eqn()
return symbolic representation of program
Definition: individual.cc:748
unsigned id
tracking id
Definition: individual.h:53
float fitness
aggregate fitness score
Definition: individual.h:38
shared_ptr< CLabels > predict(const Data &d)
Definition: individual.cc:271
shared_ptr< CLabels > fit(const Data &d, const Parameters &params, bool &pass)
fits an ML model to the data after transformation
Definition: individual.cc:234
VectorXf error
training error
Definition: individual.h:36
float fairness_v
aggregate validation fairness score
Definition: individual.h:41
float fitness_v
aggregate validation fitness score
Definition: individual.h:39
string log(string m, int v, string sep="\n") const
print message with verbosity control.
Definition: logger.cc:54
Eigen::Array< bool, Eigen::Dynamic, 1 > ArrayXb
Definition: data.h:21
ArrayXb isinf(const ArrayXf &x)
returns true for elements of x that are infinite
Definition: utils.cc:217
ArrayXb isnan(const ArrayXf &x)
returns true for elements of x that are NaN
Definition: utils.cc:226
static Logger & logger
Definition: logger.h:46
std::string to_string(const T &value)
template function to convert objects to string for logging
Definition: utils.h:422
main Feat namespace
Definition: data.cc:13
int i
Definition: params.cc:552
static float MAX_FLT
Definition: init.h:47
float learning_rate
Definition: params.h:86
holds the hyperparameters for Feat.
Definition: params.h:25
bool backprop
turns on backpropagation
Definition: params.h:65
vector< float > class_weights
weights for each class
Definition: params.h:60
bool hillclimb
turns on parameter hill climbing
Definition: params.h:66
HC hc
stochastic hill climbing parameters
Definition: params.h:102
BP bp
backprop parameters
Definition: params.h:92
string scorer_
actual loss function used, determined by scorer
Definition: params.h:63