Feat C++ API
A feature engineering automation tool
hillclimb.cc
Go to the documentation of this file.
1 /* FEAT
2 copyright 2017 William La Cava
3 license: GNU/GPL v3
4 */
5 
6 #include "hillclimb.h"
7 #include "../eval/metrics.h"
8 #include "../model/ml.h"
9 
10 namespace FT {
11 
12  namespace Opt{
13 
14  HillClimb::HillClimb(string scorer, int iters, float step)
15  {
17  score_hash["log"] = &Eval::log_loss;
18  score_hash["multi_log"] = &Eval::multi_log_loss;
19  this->cost_func = score_hash.at(scorer);
20  /* this->X = X; */
21  /* this->labels = labels; */
22  this->iters = iters;
23  this->step = step;
24 
25  }
26 
27  shared_ptr<CLabels> HillClimb::run(Individual& ind, Data d,
28  const Parameters& params, bool& updated)
29  {
30  updated = false; // keep track of whether we update this individual
31  float min_loss = ind.fitness;
32  shared_ptr<CLabels> min_yhat;
33 
34  for (int x = 0; x < this->iters; x++)
35  {
36  /* cout << "iteration " << x << "\n"; */
37  Individual tmp;
38  ind.clone(tmp);
39  bool anychanges = false;
40  for (auto& p : tmp.program)
41  {
42  if (p->isNodeDx())
43  {
44  anychanges = true;
45  auto pd = dynamic_cast<NodeDx*>(p.get());
46  vector<float> W0 = pd->W;
47  for (int i = 0; i < pd->W.size(); ++i)
48  { // perturb W[i] with gaussian noise
49  pd->W.at(i) += r.gasdev()*pd->W.at(i)*this->step;
50  /* cout << "changed " << W0.at(i) << " to " << pd->W.at(i) << "\n"; */
51  }
52  }
53  }
54  if (!anychanges) // then there are no weighted nodes, so break
55  break;
56  // evaluate perturbed program
57  logger.log("Generating output for " + tmp.get_eqn(), 3);
58 
59  bool pass = true;
60 
61  shared_ptr<CLabels> yhat = tmp.fit(d, params, pass);
62  if (!pass)
63  continue;
64 
65  float new_loss = this->cost_func(d.y,yhat, params.class_weights).mean();
66  /* cout << "old loss: " << min_loss << ", new_loss: " << new_loss << "\n"; */
67  if (new_loss < min_loss)
68  {
69  updated = true;
70  /* cout << "updating \n"; */
71  ind.program = tmp.program;
72  ind.fitness = new_loss;
73  min_loss = new_loss;
74  /* ind.set_p(ind.ml->get_weights(),params.feedback); */
75  min_yhat = yhat;
76  }
77  }
78  return min_yhat;
79  }
80  }
81 
82 }
data holding X, y, and Z data
Definition: data.h:42
VectorXf & y
Definition: data.h:46
std::map< string, callback > score_hash
Definition: hillclimb.h:40
callback cost_func
Definition: hillclimb.h:49
shared_ptr< CLabels > run(Individual &ind, Data d, const Parameters &params, bool &updated)
adapt weights
Definition: hillclimb.cc:27
HillClimb(string scorer, int iters=1, float step=0.1)
Definition: hillclimb.cc:14
individual programs in the population
Definition: individual.h:31
string get_eqn()
return symbolic representation of program
Definition: individual.cc:748
void clone(Individual &cpy, bool sameid=true) const
clone this individual
Definition: individual.cc:82
float fitness
aggregate fitness score
Definition: individual.h:38
NodeVector program
executable data structure
Definition: individual.h:33
shared_ptr< CLabels > fit(const Data &d, const Parameters &params, bool &pass)
fits an ML model to the data after transformation
Definition: individual.cc:234
std::vector< float > W
Definition: n_Dx.h:16
string log(string m, int v, string sep="\n") const
print message with verbosity control.
Definition: logger.cc:54
float gasdev()
Definition: rnd.cc:156
VectorXf multi_log_loss(const VectorXf &y, const ArrayXXf &confidences, const vector< float > &class_weights)
multinomial log loss
Definition: metrics.cc:191
VectorXf log_loss(const VectorXf &y, const VectorXf &yhat, const vector< float > &class_weights)
Definition: metrics.cc:88
VectorXf squared_difference(const VectorXf &y, const VectorXf &yhat)
Definition: metrics.cc:18
static Logger & logger
Definition: logger.h:46
static Rnd & r
Definition: rnd.h:135
main Feat namespace
Definition: data.cc:13
int i
Definition: params.cc:552
holds the hyperparameters for Feat.
Definition: params.h:25
vector< float > class_weights
weights for each class
Definition: params.h:60